summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorChirantan Ekbote <chirantan@chromium.org>2019-07-17 10:50:30 +0900
committerCommit Bot <commit-bot@chromium.org>2019-11-19 17:59:47 +0000
commitbd4723b218fab426f575e70df5c0e437bd40669f (patch)
tree664bb62dafef8831331520541c0eff467ef1b555 /src
parent9093c002fefeb403bffd1b29deca2e7d680b2f2f (diff)
downloadcrosvm-bd4723b218fab426f575e70df5c0e437bd40669f.tar
crosvm-bd4723b218fab426f575e70df5c0e437bd40669f.tar.gz
crosvm-bd4723b218fab426f575e70df5c0e437bd40669f.tar.bz2
crosvm-bd4723b218fab426f575e70df5c0e437bd40669f.tar.lz
crosvm-bd4723b218fab426f575e70df5c0e437bd40669f.tar.xz
crosvm-bd4723b218fab426f575e70df5c0e437bd40669f.tar.zst
crosvm-bd4723b218fab426f575e70df5c0e437bd40669f.zip
Add fs device to --shared-dir
Expand the `--shared-dir` option to allow callers to select between 9p
and virtio-fs for sharing directories.

BUG=b:136128319
TEST=start a VM with a virtio-fs based shared directory

Change-Id: Ie8afc1965b693805dd6000f0157786317aab060d
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/1705656
Reviewed-by: Daniel Verkamp <dverkamp@chromium.org>
Tested-by: kokoro <noreply+kokoro@google.com>
Commit-Queue: Chirantan Ekbote <chirantan@chromium.org>
Diffstat (limited to 'src')
-rw-r--r--src/crosvm.rs51
-rw-r--r--src/linux.rs97
-rw-r--r--src/main.rs91
3 files changed, 228 insertions, 11 deletions
diff --git a/src/crosvm.rs b/src/crosvm.rs
index c8b67bb..9e015b9 100644
--- a/src/crosvm.rs
+++ b/src/crosvm.rs
@@ -14,8 +14,11 @@ use std::collections::BTreeMap;
 use std::net;
 use std::os::unix::io::RawFd;
 use std::path::PathBuf;
+use std::str::FromStr;
 
+use devices::virtio::fs::passthrough;
 use devices::SerialParameters;
+use libc::{getegid, geteuid};
 
 static SECCOMP_POLICY_DIR: &str = "/usr/share/policy/crosvm";
 
@@ -69,6 +72,52 @@ impl TouchDeviceOption {
     }
 }
 
+pub enum SharedDirKind {
+    FS,
+    P9,
+}
+
+impl FromStr for SharedDirKind {
+    type Err = &'static str;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        use SharedDirKind::*;
+        match s {
+            "fs" | "FS" => Ok(FS),
+            "9p" | "9P" | "p9" | "P9" => Ok(P9),
+            _ => Err("invalid file system type"),
+        }
+    }
+}
+
+impl Default for SharedDirKind {
+    fn default() -> SharedDirKind {
+        SharedDirKind::P9
+    }
+}
+
+pub struct SharedDir {
+    pub src: PathBuf,
+    pub tag: String,
+    pub kind: SharedDirKind,
+    pub uid_map: String,
+    pub gid_map: String,
+    pub cfg: passthrough::Config,
+}
+
+impl Default for SharedDir {
+    fn default() -> SharedDir {
+        SharedDir {
+            src: Default::default(),
+            tag: Default::default(),
+            kind: Default::default(),
+            uid_map: format!("0 {} 1", unsafe { geteuid() }),
+            gid_map: format!("0 {} 1", unsafe { getegid() }),
+            cfg: Default::default(),
+        }
+    }
+}
+
 /// Aggregate of all configurable options for a running VM.
 pub struct Config {
     pub vcpu_count: Option<u32>,
@@ -93,7 +142,7 @@ pub struct Config {
     pub wayland_socket_path: Option<PathBuf>,
     pub wayland_dmabuf: bool,
     pub x_display: Option<String>,
-    pub shared_dirs: Vec<(PathBuf, String)>,
+    pub shared_dirs: Vec<SharedDir>,
     pub sandbox: bool,
     pub seccomp_policy_dir: PathBuf,
     pub seccomp_log_failures: bool,
diff --git a/src/linux.rs b/src/linux.rs
index 25aba0f..fc4c63e 100644
--- a/src/linux.rs
+++ b/src/linux.rs
@@ -14,6 +14,7 @@ use std::mem;
 use std::net::Ipv4Addr;
 #[cfg(feature = "gpu")]
 use std::num::NonZeroU8;
+use std::num::ParseIntError;
 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
 use std::os::unix::net::UnixStream;
 use std::path::{Path, PathBuf};
@@ -58,7 +59,7 @@ use vm_control::{
     VmRunMode,
 };
 
-use crate::{Config, DiskOption, Executable, TouchDeviceOption};
+use crate::{Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption};
 
 use arch::{self, LinuxArch, RunnableLinuxVm, VirtioDeviceStub, VmComponents, VmImage};
 
@@ -101,6 +102,8 @@ pub enum Error {
     Disk(io::Error),
     DiskImageLock(sys_util::Error),
     DropCapabilities(sys_util::Error),
+    FsDeviceNew(virtio::fs::Error),
+    GetMaxOpenFiles(io::Error),
     InputDeviceNew(virtio::InputError),
     InputEventsOpen(std::io::Error),
     InvalidFdPath,
@@ -114,6 +117,7 @@ pub enum Error {
     OpenKernel(PathBuf, io::Error),
     OpenVinput(PathBuf, io::Error),
     P9DeviceNew(virtio::P9Error),
+    ParseMaxOpenFiles(ParseIntError),
     PivotRootDoesntExist(&'static str),
     PmemDeviceImageTooBig,
     PmemDeviceNew(sys_util::Error),
@@ -135,6 +139,7 @@ pub enum Error {
     ResetTimerFd(sys_util::Error),
     RngDeviceNew(virtio::RngError),
     SettingGidMap(io_jail::Error),
+    SettingMaxOpenFiles(io_jail::Error),
     SettingUidMap(io_jail::Error),
     SignalFd(sys_util::SignalFdError),
     SpawnVcpu(io::Error),
@@ -183,6 +188,8 @@ impl Display for Error {
             Disk(e) => write!(f, "failed to load disk image: {}", e),
             DiskImageLock(e) => write!(f, "failed to lock disk image: {}", e),
             DropCapabilities(e) => write!(f, "failed to drop process capabilities: {}", e),
+            FsDeviceNew(e) => write!(f, "failed to create fs device: {}", e),
+            GetMaxOpenFiles(e) => write!(f, "failed to get max number of open files: {}", e),
             InputDeviceNew(e) => write!(f, "failed to set up input device: {}", e),
             InputEventsOpen(e) => write!(f, "failed to open event device: {}", e),
             InvalidFdPath => write!(f, "failed parsing a /proc/self/fd/*"),
@@ -201,6 +208,7 @@ impl Display for Error {
             OpenKernel(p, e) => write!(f, "failed to open kernel image {}: {}", p.display(), e),
             OpenVinput(p, e) => write!(f, "failed to open vinput device {}: {}", p.display(), e),
             P9DeviceNew(e) => write!(f, "failed to create 9p device: {}", e),
+            ParseMaxOpenFiles(e) => write!(f, "failed to parse max number of open files: {}", e),
             PivotRootDoesntExist(p) => write!(f, "{} doesn't exist, can't jail devices.", p),
             PmemDeviceImageTooBig => {
                 write!(f, "failed to create pmem device: pmem device image too big")
@@ -232,6 +240,7 @@ impl Display for Error {
             ResetTimerFd(e) => write!(f, "failed to reset timerfd: {}", e),
             RngDeviceNew(e) => write!(f, "failed to set up rng: {}", e),
             SettingGidMap(e) => write!(f, "error setting GID map: {}", e),
+            SettingMaxOpenFiles(e) => write!(f, "error setting max open files: {}", e),
             SettingUidMap(e) => write!(f, "error setting UID map: {}", e),
             SignalFd(e) => write!(f, "failed to read signal fd: {}", e),
             SpawnVcpu(e) => write!(f, "failed to spawn VCPU thread: {}", e),
@@ -278,6 +287,15 @@ impl AsRawFd for TaggedControlSocket {
     }
 }
 
+fn get_max_open_files() -> Result<libc::rlim_t> {
+    let mut buf = String::with_capacity(32);
+    File::open("/proc/sys/fs/file-max")
+        .and_then(|mut f| f.read_to_string(&mut buf))
+        .map_err(Error::GetMaxOpenFiles)?;
+
+    Ok(buf.trim().parse().map_err(Error::ParseMaxOpenFiles)?)
+}
+
 fn create_base_minijail(
     root: &Path,
     log_failures: bool,
@@ -721,6 +739,65 @@ fn create_vhost_vsock_device(cfg: &Config, cid: u64, mem: &GuestMemory) -> Devic
     })
 }
 
+fn create_fs_device(
+    cfg: &Config,
+    uid_map: &str,
+    gid_map: &str,
+    src: &Path,
+    tag: &str,
+    fs_cfg: virtio::fs::passthrough::Config,
+) -> DeviceResult {
+    let mut j = Minijail::new().map_err(Error::DeviceJail)?;
+
+    if cfg.sandbox {
+        j.namespace_pids();
+        j.namespace_user();
+        j.namespace_user_disable_setgroups();
+        j.uidmap(uid_map).map_err(Error::SettingUidMap)?;
+        j.gidmap(gid_map).map_err(Error::SettingGidMap)?;
+
+        // Run in an empty network namespace.
+        j.namespace_net();
+
+        j.no_new_privs();
+
+        // TODO(chirantan): Enable seccomp
+        // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP, which will correctly kill
+        // the entire device process if a worker thread commits a seccomp violation.
+        // let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device.policy");
+        // j.set_seccomp_filter_tsync();
+        // if cfg.seccomp_log_failures {
+        //     j.log_seccomp_filter_failures();
+        // }
+        // j.parse_seccomp_filters(&seccomp_policy)
+        //     .map_err(Error::DeviceJail)?;
+        // j.use_seccomp_filter();
+
+        // Don't do init setup.
+        j.run_as_init();
+    }
+
+    // Create a new mount namespace with the source directory as the root. We need this even when
+    // sandboxing is disabled as the server relies on the host kernel to prevent path traversals
+    // from leaking out of the shared directory.
+    j.namespace_vfs();
+    j.enter_pivot_root(src).map_err(Error::DevicePivotRoot)?;
+
+    // The file server opens a lot of fds and needs a really high open file limit.
+    let max_open_files = get_max_open_files()?;
+    j.set_rlimit(libc::RLIMIT_NOFILE, max_open_files, max_open_files)
+        .map_err(Error::SettingMaxOpenFiles)?;
+
+    // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
+    // when num_queues > 1.
+    let dev = virtio::fs::Fs::new(tag, 1, fs_cfg).map_err(Error::FsDeviceNew)?;
+
+    Ok(VirtioDeviceStub {
+        dev: Box::new(dev),
+        jail: Some(j),
+    })
+}
+
 fn create_9p_device(cfg: &Config, chronos: Ids, src: &Path, tag: &str) -> DeviceResult {
     let (jail, root) = match simple_jail(&cfg, "9p_device.policy")? {
         Some(mut jail) => {
@@ -926,9 +1003,21 @@ fn create_virtio_devices(
     }
 
     let chronos = get_chronos_ids();
-
-    for (src, tag) in &cfg.shared_dirs {
-        devs.push(create_9p_device(cfg, chronos, src, tag)?);
+    for shared_dir in &cfg.shared_dirs {
+        let SharedDir {
+            src,
+            tag,
+            kind,
+            uid_map,
+            gid_map,
+            cfg: fs_cfg,
+        } = shared_dir;
+
+        let dev = match kind {
+            SharedDirKind::FS => create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone())?,
+            SharedDirKind::P9 => create_9p_device(cfg, chronos, src, tag)?,
+        };
+        devs.push(dev);
     }
 
     Ok(devs)
diff --git a/src/main.rs b/src/main.rs
index d6d82a5..6ddef59 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -17,7 +17,7 @@ use std::time::Duration;
 
 use crosvm::{
     argument::{self, print_help, set_arguments, Argument},
-    linux, BindMount, Config, DiskOption, Executable, GidMap, TouchDeviceOption,
+    linux, BindMount, Config, DiskOption, Executable, GidMap, SharedDir, TouchDeviceOption,
 };
 use devices::{SerialParameters, SerialType};
 use msg_socket::{MsgReceiver, MsgSender, MsgSocket};
@@ -495,9 +495,20 @@ fn set_argument(cfg: &mut Config, name: &str, value: Option<&str>) -> argument::
             );
         }
         "shared-dir" => {
-            // Formatted as <src:tag>.
+            // This is formatted as multiple fields, each separated by ":". The first 2 fields are
+            // fixed (src:tag).  The rest may appear in any order:
+            //
+            // * type=TYPE - must be one of "p9" or "fs" (default: p9)
+            // * uidmap=UIDMAP - a uid map in the format "inner outer count[,inner outer count]"
+            //   (default: "0 <current euid> 1")
+            // * gidmap=GIDMAP - a gid map in the same format as uidmap
+            //   (default: "0 <current egid> 1")
+            // * timeout=TIMEOUT - a timeout value in seconds, which indicates how long attributes
+            //   and directory contents should be considered valid (default: 5)
+            // * cache=CACHE - one of "never", "always", or "auto" (default: auto)
+            // * writeback=BOOL - indicates whether writeback caching should be enabled (default: false)
             let param = value.unwrap();
-            let mut components = param.splitn(2, ':');
+            let mut components = param.split(':');
             let src =
                 PathBuf::from(
                     components
@@ -522,7 +533,66 @@ fn set_argument(cfg: &mut Config, name: &str, value: Option<&str>) -> argument::
                 });
             }
 
-            cfg.shared_dirs.push((src, tag));
+            let mut shared_dir = SharedDir {
+                src,
+                tag,
+                ..Default::default()
+            };
+            for opt in components {
+                let mut o = opt.splitn(2, '=');
+                let kind = o.next().ok_or_else(|| argument::Error::InvalidValue {
+                    value: opt.to_owned(),
+                    expected: "`shared-dir` options must not be empty",
+                })?;
+                let value = o.next().ok_or_else(|| argument::Error::InvalidValue {
+                    value: opt.to_owned(),
+                    expected: "`shared-dir` options must be of the form `kind=value`",
+                })?;
+
+                match kind {
+                    "type" => {
+                        shared_dir.kind =
+                            value.parse().map_err(|_| argument::Error::InvalidValue {
+                                value: value.to_owned(),
+                                expected: "`type` must be one of `fs` or `9p`",
+                            })?
+                    }
+                    "uidmap" => shared_dir.uid_map = value.into(),
+                    "gidmap" => shared_dir.gid_map = value.into(),
+                    "timeout" => {
+                        let seconds = value.parse().map_err(|_| argument::Error::InvalidValue {
+                            value: value.to_owned(),
+                            expected: "`timeout` must be an integer",
+                        })?;
+
+                        let dur = Duration::from_secs(seconds);
+                        shared_dir.cfg.entry_timeout = dur.clone();
+                        shared_dir.cfg.attr_timeout = dur;
+                    }
+                    "cache" => {
+                        let policy = value.parse().map_err(|_| argument::Error::InvalidValue {
+                            value: value.to_owned(),
+                            expected: "`cache` must be one of `never`, `always`, or `auto`",
+                        })?;
+                        shared_dir.cfg.cache_policy = policy;
+                    }
+                    "writeback" => {
+                        let writeback =
+                            value.parse().map_err(|_| argument::Error::InvalidValue {
+                                value: value.to_owned(),
+                                expected: "`writeback` must be a boolean",
+                            })?;
+                        shared_dir.cfg.writeback = writeback;
+                    }
+                    _ => {
+                        return Err(argument::Error::InvalidValue {
+                            value: kind.to_owned(),
+                            expected: "unrecognized option for `shared-dir`",
+                        })
+                    }
+                }
+            }
+            cfg.shared_dirs.push(shared_dir);
         }
         "seccomp-policy-dir" => {
             // `value` is Some because we are in this match so it's safe to unwrap.
@@ -813,8 +883,17 @@ fn run_vm(args: std::env::Args) -> std::result::Result<(), ()> {
                                 "Path to put the control socket. If PATH is a directory, a name will be generated."),
           Argument::flag("disable-sandbox", "Run all devices in one, non-sandboxed process."),
           Argument::value("cid", "CID", "Context ID for virtual sockets."),
-          Argument::value("shared-dir", "PATH:TAG",
-                          "Directory to be shared with a VM as a source:tag pair. Can be given more than once."),
+          Argument::value("shared-dir", "PATH:TAG[:type=TYPE:writeback=BOOL:timeout=SECONDS:uidmap=UIDMAP:gidmap=GIDMAP:cache=CACHE]",
+                          "Colon-separated options for configuring a directory to be shared with the VM.
+The first field is the directory to be shared and the second field is the tag that the VM can use to identify the device.
+The remaining fields are key=value pairs that may appear in any order.  Valid keys are:
+type=(p9, fs) - Indicates whether the directory should be shared via virtio-9p or virtio-fs (default: p9).
+uidmap=UIDMAP - The uid map to use for the device's jail in the format \"inner outer count[,inner outer count]\" (default: 0 <current euid> 1).
+gidmap=GIDMAP - The gid map to use for the device's jail in the format \"inner outer count[,inner outer count]\" (default: 0 <current egid> 1).
+cache=(never, auto, always) - Indicates whether the VM can cache the contents of the shared directory (default: auto).  When set to \"auto\" and the type is \"fs\", the VM will use close-to-open consistency for file contents.
+timeout=SECONDS - How long the VM should consider file attributes and directory entries to be valid (default: 5).  If the VM has exclusive access to the directory, then this should be a large value.  If the directory can be modified by other processes, then this should be 0.
+writeback=BOOL - Indicates whether the VM can use writeback caching (default: false).  This is only safe to do when the VM has exclusive access to the files in a directory.  Additionally, the server should have read permission for all files as the VM may issue read requests even for files that are opened write-only.
+"),
           Argument::value("seccomp-policy-dir", "PATH", "Path to seccomp .policy files."),
           Argument::flag("seccomp-log-failures", "Instead of seccomp filter failures being fatal, they will be logged instead."),
           #[cfg(feature = "plugin")]