From bd4723b218fab426f575e70df5c0e437bd40669f Mon Sep 17 00:00:00 2001 From: Chirantan Ekbote Date: Wed, 17 Jul 2019 10:50:30 +0900 Subject: Add fs device to --shared-dir Expand the `--shared-dir` option to allow callers to select between 9p and virtio-fs for sharing directories. BUG=b:136128319 TEST=start a VM with a virtio-fs based shared directory Change-Id: Ie8afc1965b693805dd6000f0157786317aab060d Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/1705656 Reviewed-by: Daniel Verkamp Tested-by: kokoro Commit-Queue: Chirantan Ekbote --- src/crosvm.rs | 51 ++++++++++++++++++++++++++++++- src/linux.rs | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- src/main.rs | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 228 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/crosvm.rs b/src/crosvm.rs index c8b67bb..9e015b9 100644 --- a/src/crosvm.rs +++ b/src/crosvm.rs @@ -14,8 +14,11 @@ use std::collections::BTreeMap; use std::net; use std::os::unix::io::RawFd; use std::path::PathBuf; +use std::str::FromStr; +use devices::virtio::fs::passthrough; use devices::SerialParameters; +use libc::{getegid, geteuid}; static SECCOMP_POLICY_DIR: &str = "/usr/share/policy/crosvm"; @@ -69,6 +72,52 @@ impl TouchDeviceOption { } } +pub enum SharedDirKind { + FS, + P9, +} + +impl FromStr for SharedDirKind { + type Err = &'static str; + + fn from_str(s: &str) -> Result { + use SharedDirKind::*; + match s { + "fs" | "FS" => Ok(FS), + "9p" | "9P" | "p9" | "P9" => Ok(P9), + _ => Err("invalid file system type"), + } + } +} + +impl Default for SharedDirKind { + fn default() -> SharedDirKind { + SharedDirKind::P9 + } +} + +pub struct SharedDir { + pub src: PathBuf, + pub tag: String, + pub kind: SharedDirKind, + pub uid_map: String, + pub gid_map: String, + pub cfg: passthrough::Config, +} + +impl Default for SharedDir { + fn default() -> SharedDir { + SharedDir { + src: Default::default(), + tag: Default::default(), + kind: Default::default(), + uid_map: format!("0 {} 1", unsafe { geteuid() }), + gid_map: format!("0 {} 1", unsafe { getegid() }), + cfg: Default::default(), + } + } +} + /// Aggregate of all configurable options for a running VM. pub struct Config { pub vcpu_count: Option, @@ -93,7 +142,7 @@ pub struct Config { pub wayland_socket_path: Option, pub wayland_dmabuf: bool, pub x_display: Option, - pub shared_dirs: Vec<(PathBuf, String)>, + pub shared_dirs: Vec, pub sandbox: bool, pub seccomp_policy_dir: PathBuf, pub seccomp_log_failures: bool, diff --git a/src/linux.rs b/src/linux.rs index 25aba0f..fc4c63e 100644 --- a/src/linux.rs +++ b/src/linux.rs @@ -14,6 +14,7 @@ use std::mem; use std::net::Ipv4Addr; #[cfg(feature = "gpu")] use std::num::NonZeroU8; +use std::num::ParseIntError; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::os::unix::net::UnixStream; use std::path::{Path, PathBuf}; @@ -58,7 +59,7 @@ use vm_control::{ VmRunMode, }; -use crate::{Config, DiskOption, Executable, TouchDeviceOption}; +use crate::{Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption}; use arch::{self, LinuxArch, RunnableLinuxVm, VirtioDeviceStub, VmComponents, VmImage}; @@ -101,6 +102,8 @@ pub enum Error { Disk(io::Error), DiskImageLock(sys_util::Error), DropCapabilities(sys_util::Error), + FsDeviceNew(virtio::fs::Error), + GetMaxOpenFiles(io::Error), InputDeviceNew(virtio::InputError), InputEventsOpen(std::io::Error), InvalidFdPath, @@ -114,6 +117,7 @@ pub enum Error { OpenKernel(PathBuf, io::Error), OpenVinput(PathBuf, io::Error), P9DeviceNew(virtio::P9Error), + ParseMaxOpenFiles(ParseIntError), PivotRootDoesntExist(&'static str), PmemDeviceImageTooBig, PmemDeviceNew(sys_util::Error), @@ -135,6 +139,7 @@ pub enum Error { ResetTimerFd(sys_util::Error), RngDeviceNew(virtio::RngError), SettingGidMap(io_jail::Error), + SettingMaxOpenFiles(io_jail::Error), SettingUidMap(io_jail::Error), SignalFd(sys_util::SignalFdError), SpawnVcpu(io::Error), @@ -183,6 +188,8 @@ impl Display for Error { Disk(e) => write!(f, "failed to load disk image: {}", e), DiskImageLock(e) => write!(f, "failed to lock disk image: {}", e), DropCapabilities(e) => write!(f, "failed to drop process capabilities: {}", e), + FsDeviceNew(e) => write!(f, "failed to create fs device: {}", e), + GetMaxOpenFiles(e) => write!(f, "failed to get max number of open files: {}", e), InputDeviceNew(e) => write!(f, "failed to set up input device: {}", e), InputEventsOpen(e) => write!(f, "failed to open event device: {}", e), InvalidFdPath => write!(f, "failed parsing a /proc/self/fd/*"), @@ -201,6 +208,7 @@ impl Display for Error { OpenKernel(p, e) => write!(f, "failed to open kernel image {}: {}", p.display(), e), OpenVinput(p, e) => write!(f, "failed to open vinput device {}: {}", p.display(), e), P9DeviceNew(e) => write!(f, "failed to create 9p device: {}", e), + ParseMaxOpenFiles(e) => write!(f, "failed to parse max number of open files: {}", e), PivotRootDoesntExist(p) => write!(f, "{} doesn't exist, can't jail devices.", p), PmemDeviceImageTooBig => { write!(f, "failed to create pmem device: pmem device image too big") @@ -232,6 +240,7 @@ impl Display for Error { ResetTimerFd(e) => write!(f, "failed to reset timerfd: {}", e), RngDeviceNew(e) => write!(f, "failed to set up rng: {}", e), SettingGidMap(e) => write!(f, "error setting GID map: {}", e), + SettingMaxOpenFiles(e) => write!(f, "error setting max open files: {}", e), SettingUidMap(e) => write!(f, "error setting UID map: {}", e), SignalFd(e) => write!(f, "failed to read signal fd: {}", e), SpawnVcpu(e) => write!(f, "failed to spawn VCPU thread: {}", e), @@ -278,6 +287,15 @@ impl AsRawFd for TaggedControlSocket { } } +fn get_max_open_files() -> Result { + let mut buf = String::with_capacity(32); + File::open("/proc/sys/fs/file-max") + .and_then(|mut f| f.read_to_string(&mut buf)) + .map_err(Error::GetMaxOpenFiles)?; + + Ok(buf.trim().parse().map_err(Error::ParseMaxOpenFiles)?) +} + fn create_base_minijail( root: &Path, log_failures: bool, @@ -721,6 +739,65 @@ fn create_vhost_vsock_device(cfg: &Config, cid: u64, mem: &GuestMemory) -> Devic }) } +fn create_fs_device( + cfg: &Config, + uid_map: &str, + gid_map: &str, + src: &Path, + tag: &str, + fs_cfg: virtio::fs::passthrough::Config, +) -> DeviceResult { + let mut j = Minijail::new().map_err(Error::DeviceJail)?; + + if cfg.sandbox { + j.namespace_pids(); + j.namespace_user(); + j.namespace_user_disable_setgroups(); + j.uidmap(uid_map).map_err(Error::SettingUidMap)?; + j.gidmap(gid_map).map_err(Error::SettingGidMap)?; + + // Run in an empty network namespace. + j.namespace_net(); + + j.no_new_privs(); + + // TODO(chirantan): Enable seccomp + // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP, which will correctly kill + // the entire device process if a worker thread commits a seccomp violation. + // let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device.policy"); + // j.set_seccomp_filter_tsync(); + // if cfg.seccomp_log_failures { + // j.log_seccomp_filter_failures(); + // } + // j.parse_seccomp_filters(&seccomp_policy) + // .map_err(Error::DeviceJail)?; + // j.use_seccomp_filter(); + + // Don't do init setup. + j.run_as_init(); + } + + // Create a new mount namespace with the source directory as the root. We need this even when + // sandboxing is disabled as the server relies on the host kernel to prevent path traversals + // from leaking out of the shared directory. + j.namespace_vfs(); + j.enter_pivot_root(src).map_err(Error::DevicePivotRoot)?; + + // The file server opens a lot of fds and needs a really high open file limit. + let max_open_files = get_max_open_files()?; + j.set_rlimit(libc::RLIMIT_NOFILE, max_open_files, max_open_files) + .map_err(Error::SettingMaxOpenFiles)?; + + // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic + // when num_queues > 1. + let dev = virtio::fs::Fs::new(tag, 1, fs_cfg).map_err(Error::FsDeviceNew)?; + + Ok(VirtioDeviceStub { + dev: Box::new(dev), + jail: Some(j), + }) +} + fn create_9p_device(cfg: &Config, chronos: Ids, src: &Path, tag: &str) -> DeviceResult { let (jail, root) = match simple_jail(&cfg, "9p_device.policy")? { Some(mut jail) => { @@ -926,9 +1003,21 @@ fn create_virtio_devices( } let chronos = get_chronos_ids(); - - for (src, tag) in &cfg.shared_dirs { - devs.push(create_9p_device(cfg, chronos, src, tag)?); + for shared_dir in &cfg.shared_dirs { + let SharedDir { + src, + tag, + kind, + uid_map, + gid_map, + cfg: fs_cfg, + } = shared_dir; + + let dev = match kind { + SharedDirKind::FS => create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone())?, + SharedDirKind::P9 => create_9p_device(cfg, chronos, src, tag)?, + }; + devs.push(dev); } Ok(devs) diff --git a/src/main.rs b/src/main.rs index d6d82a5..6ddef59 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,7 +17,7 @@ use std::time::Duration; use crosvm::{ argument::{self, print_help, set_arguments, Argument}, - linux, BindMount, Config, DiskOption, Executable, GidMap, TouchDeviceOption, + linux, BindMount, Config, DiskOption, Executable, GidMap, SharedDir, TouchDeviceOption, }; use devices::{SerialParameters, SerialType}; use msg_socket::{MsgReceiver, MsgSender, MsgSocket}; @@ -495,9 +495,20 @@ fn set_argument(cfg: &mut Config, name: &str, value: Option<&str>) -> argument:: ); } "shared-dir" => { - // Formatted as . + // This is formatted as multiple fields, each separated by ":". The first 2 fields are + // fixed (src:tag). The rest may appear in any order: + // + // * type=TYPE - must be one of "p9" or "fs" (default: p9) + // * uidmap=UIDMAP - a uid map in the format "inner outer count[,inner outer count]" + // (default: "0 1") + // * gidmap=GIDMAP - a gid map in the same format as uidmap + // (default: "0 1") + // * timeout=TIMEOUT - a timeout value in seconds, which indicates how long attributes + // and directory contents should be considered valid (default: 5) + // * cache=CACHE - one of "never", "always", or "auto" (default: auto) + // * writeback=BOOL - indicates whether writeback caching should be enabled (default: false) let param = value.unwrap(); - let mut components = param.splitn(2, ':'); + let mut components = param.split(':'); let src = PathBuf::from( components @@ -522,7 +533,66 @@ fn set_argument(cfg: &mut Config, name: &str, value: Option<&str>) -> argument:: }); } - cfg.shared_dirs.push((src, tag)); + let mut shared_dir = SharedDir { + src, + tag, + ..Default::default() + }; + for opt in components { + let mut o = opt.splitn(2, '='); + let kind = o.next().ok_or_else(|| argument::Error::InvalidValue { + value: opt.to_owned(), + expected: "`shared-dir` options must not be empty", + })?; + let value = o.next().ok_or_else(|| argument::Error::InvalidValue { + value: opt.to_owned(), + expected: "`shared-dir` options must be of the form `kind=value`", + })?; + + match kind { + "type" => { + shared_dir.kind = + value.parse().map_err(|_| argument::Error::InvalidValue { + value: value.to_owned(), + expected: "`type` must be one of `fs` or `9p`", + })? + } + "uidmap" => shared_dir.uid_map = value.into(), + "gidmap" => shared_dir.gid_map = value.into(), + "timeout" => { + let seconds = value.parse().map_err(|_| argument::Error::InvalidValue { + value: value.to_owned(), + expected: "`timeout` must be an integer", + })?; + + let dur = Duration::from_secs(seconds); + shared_dir.cfg.entry_timeout = dur.clone(); + shared_dir.cfg.attr_timeout = dur; + } + "cache" => { + let policy = value.parse().map_err(|_| argument::Error::InvalidValue { + value: value.to_owned(), + expected: "`cache` must be one of `never`, `always`, or `auto`", + })?; + shared_dir.cfg.cache_policy = policy; + } + "writeback" => { + let writeback = + value.parse().map_err(|_| argument::Error::InvalidValue { + value: value.to_owned(), + expected: "`writeback` must be a boolean", + })?; + shared_dir.cfg.writeback = writeback; + } + _ => { + return Err(argument::Error::InvalidValue { + value: kind.to_owned(), + expected: "unrecognized option for `shared-dir`", + }) + } + } + } + cfg.shared_dirs.push(shared_dir); } "seccomp-policy-dir" => { // `value` is Some because we are in this match so it's safe to unwrap. @@ -813,8 +883,17 @@ fn run_vm(args: std::env::Args) -> std::result::Result<(), ()> { "Path to put the control socket. If PATH is a directory, a name will be generated."), Argument::flag("disable-sandbox", "Run all devices in one, non-sandboxed process."), Argument::value("cid", "CID", "Context ID for virtual sockets."), - Argument::value("shared-dir", "PATH:TAG", - "Directory to be shared with a VM as a source:tag pair. Can be given more than once."), + Argument::value("shared-dir", "PATH:TAG[:type=TYPE:writeback=BOOL:timeout=SECONDS:uidmap=UIDMAP:gidmap=GIDMAP:cache=CACHE]", + "Colon-separated options for configuring a directory to be shared with the VM. +The first field is the directory to be shared and the second field is the tag that the VM can use to identify the device. +The remaining fields are key=value pairs that may appear in any order. Valid keys are: +type=(p9, fs) - Indicates whether the directory should be shared via virtio-9p or virtio-fs (default: p9). +uidmap=UIDMAP - The uid map to use for the device's jail in the format \"inner outer count[,inner outer count]\" (default: 0 1). +gidmap=GIDMAP - The gid map to use for the device's jail in the format \"inner outer count[,inner outer count]\" (default: 0 1). +cache=(never, auto, always) - Indicates whether the VM can cache the contents of the shared directory (default: auto). When set to \"auto\" and the type is \"fs\", the VM will use close-to-open consistency for file contents. +timeout=SECONDS - How long the VM should consider file attributes and directory entries to be valid (default: 5). If the VM has exclusive access to the directory, then this should be a large value. If the directory can be modified by other processes, then this should be 0. +writeback=BOOL - Indicates whether the VM can use writeback caching (default: false). This is only safe to do when the VM has exclusive access to the files in a directory. Additionally, the server should have read permission for all files as the VM may issue read requests even for files that are opened write-only. +"), Argument::value("seccomp-policy-dir", "PATH", "Path to seccomp .policy files."), Argument::flag("seccomp-log-failures", "Instead of seccomp filter failures being fatal, they will be logged instead."), #[cfg(feature = "plugin")] -- cgit 1.4.1