From 8864cb0f3a9184e2420bbad64c43fcddf161e427 Mon Sep 17 00:00:00 2001 From: Zach Reizner Date: Tue, 16 Jan 2018 17:59:03 -0800 Subject: crosvm: add support for plugin process The plugin process is good for running a VM that depends substantially on devices that aren't implemented inside of crosvm. TEST=cargo build --features plugin; ./build_test BUG=chromium:800626 Change-Id: I7b4f656563742cd0bedc837205dd1240d497941d Reviewed-on: https://chromium-review.googlesource.com/869357 Commit-Ready: ChromeOS CL Exonerator Bot Tested-by: Zach Reizner Reviewed-by: Dylan Reid --- Cargo.lock | 3 + Cargo.toml | 7 +- plugin_proto/Cargo.toml | 2 +- src/main.rs | 62 +++++- src/plugin/mod.rs | 473 ++++++++++++++++++++++++++++++++++++++++++ src/plugin/process.rs | 523 +++++++++++++++++++++++++++++++++++++++++++++++ src/plugin/vcpu.rs | 466 +++++++++++++++++++++++++++++++++++++++++ tests/plugin_adder.c | 222 ++++++++++++++++++++ tests/plugin_dirty_log.c | 180 ++++++++++++++++ tests/plugin_ioevent.c | 197 ++++++++++++++++++ tests/plugin_irqfd.c | 218 ++++++++++++++++++++ tests/plugins.rs | 115 +++++++++++ 12 files changed, 2457 insertions(+), 11 deletions(-) create mode 100644 src/plugin/mod.rs create mode 100644 src/plugin/process.rs create mode 100644 src/plugin/vcpu.rs create mode 100644 tests/plugin_adder.c create mode 100644 tests/plugin_dirty_log.c create mode 100644 tests/plugin_ioevent.c create mode 100644 tests/plugin_irqfd.c create mode 100644 tests/plugins.rs diff --git a/Cargo.lock b/Cargo.lock index d5b0799..056855e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,11 +25,14 @@ dependencies = [ "kernel_cmdline 0.1.0", "kernel_loader 0.1.0", "kvm 0.1.0", + "kvm_sys 0.1.0", "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)", "net_util 0.1.0", "plugin_proto 0.5.0", + "protobuf 1.4.3 (registry+https://github.com/rust-lang/crates.io-index)", "qcow 0.1.0", "qcow_utils 0.1.0", + "rand 0.3.20 (registry+https://github.com/rust-lang/crates.io-index)", "sys_util 0.1.0", "vhost 0.1.0", "vm_control 0.1.0", diff --git a/Cargo.toml b/Cargo.toml index d54ef32..3f63e6e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,12 +10,13 @@ panic = 'abort' [workspace] [features] -plugin = ["plugin_proto", "crosvm_plugin"] +plugin = ["plugin_proto", "crosvm_plugin", "protobuf"] [dependencies] devices = { path = "devices" } io_jail = { path = "io_jail" } kvm = { path = "kvm" } +kvm_sys = { path = "kvm_sys" } sys_util = { path = "sys_util" } kernel_cmdline = { path = "kernel_cmdline" } kernel_loader = { path = "kernel_loader" } @@ -28,7 +29,11 @@ data_model = { path = "data_model" } qcow = { path = "qcow" } plugin_proto = { path = "plugin_proto", optional = true } crosvm_plugin = { path = "crosvm_plugin", optional = true } +protobuf = { version = "=1.4.3", optional = true } qcow_utils = { path = "qcow_utils" } [target.'cfg(target_arch = "x86_64")'.dependencies] x86_64 = { path = "x86_64" } + +[dev-dependencies] +rand = "=0.3.20" diff --git a/plugin_proto/Cargo.toml b/plugin_proto/Cargo.toml index 6fe8198..67b8441 100644 --- a/plugin_proto/Cargo.toml +++ b/plugin_proto/Cargo.toml @@ -5,7 +5,7 @@ authors = ["The Chromium OS Authors"] build = "build.rs" [dependencies] -protobuf = "=1.4.3" +protobuf = "*" [build-dependencies] gcc = "=0.3.54" diff --git a/src/main.rs b/src/main.rs index 6e5885a..27fedfd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,6 +8,7 @@ extern crate devices; extern crate libc; extern crate io_jail; extern crate kvm; +extern crate kvm_sys; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] extern crate x86_64; extern crate kernel_loader; @@ -20,10 +21,16 @@ extern crate sys_util; extern crate vhost; extern crate vm_control; extern crate data_model; +#[cfg(feature = "plugin")] +extern crate plugin_proto; +#[cfg(feature = "plugin")] +extern crate protobuf; pub mod argument; pub mod device_manager; pub mod linux; +#[cfg(feature = "plugin")] +pub mod plugin; use std::net; use std::os::unix::net::UnixDatagram; @@ -65,6 +72,7 @@ pub struct Config { multiprocess: bool, seccomp_policy_dir: PathBuf, cid: Option, + plugin: Option, } impl Default for Config { @@ -84,6 +92,7 @@ impl Default for Config { multiprocess: true, seccomp_policy_dir: PathBuf::from(SECCOMP_POLICY_DIR), cid: None, + plugin: None, } } } @@ -119,7 +128,10 @@ fn wait_all_children() -> bool { fn set_argument(cfg: &mut Config, name: &str, value: Option<&str>) -> argument::Result<()> { match name { "" => { - if !cfg.kernel_path.as_os_str().is_empty() { + if cfg.plugin.is_some() { + return Err(argument::Error::TooManyArguments("`plugin` can not be used with kernel" + .to_owned())); + } else if !cfg.kernel_path.as_os_str().is_empty() { return Err(argument::Error::TooManyArguments("expected exactly one kernel path" .to_owned())); } else { @@ -289,6 +301,14 @@ fn set_argument(cfg: &mut Config, name: &str, value: Option<&str>) -> argument:: // `value` is Some because we are in this match so it's safe to unwrap. cfg.seccomp_policy_dir = PathBuf::from(value.unwrap()); }, + "plugin" => { + if !cfg.kernel_path.as_os_str().is_empty() { + return Err(argument::Error::TooManyArguments("`plugin` can not be used with kernel".to_owned())); + } else if cfg.plugin.is_some() { + return Err(argument::Error::TooManyArguments("`plugin` already given".to_owned())); + } + cfg.plugin = Some(PathBuf::from(value.unwrap().to_owned())); + } "help" => return Err(argument::Error::PrintHelp), _ => unreachable!(), } @@ -296,7 +316,7 @@ fn set_argument(cfg: &mut Config, name: &str, value: Option<&str>) -> argument:: } -fn run_vm(args: std::env::Args) { +fn run_vm(args: std::env::Args) -> i32 { let arguments = &[Argument::positional("KERNEL", "bzImage of kernel to run"), Argument::short_value('p', @@ -333,11 +353,13 @@ fn run_vm(args: std::env::Args) { Argument::flag("disable-sandbox", "Run all devices in one, non-sandboxed process."), Argument::value("cid", "CID", "Context ID for virtual sockets"), Argument::value("seccomp-policy-dir", "PATH", "Path to seccomp .policy files."), + #[cfg(feature = "plugin")] + Argument::value("plugin", "PATH", "Path to plugin process to run under crosvm."), Argument::short_flag('h', "help", "Print help message.")]; let mut cfg = Config::default(); let match_res = set_arguments(args, &arguments[..], |name, value| set_argument(&mut cfg, name, value)).and_then(|_| { - if cfg.kernel_path.as_os_str().is_empty() { + if cfg.kernel_path.as_os_str().is_empty() && cfg.plugin.is_none() { return Err(argument::Error::ExpectedArgument("`KERNEL`".to_owned())); } if cfg.host_ip.is_some() || cfg.netmask.is_some() || cfg.mac_address.is_some() { @@ -355,15 +377,32 @@ fn run_vm(args: std::env::Args) { }); match match_res { - Ok(_) => { + #[cfg(feature = "plugin")] + Ok(()) if cfg.plugin.is_some() => { + match plugin::run_config(cfg) { + Ok(_) => info!("crosvm and plugin have exited normally"), + Err(e) => { + error!("{}", e); + return 1; + } + } + } + Ok(()) => { match linux::run_config(cfg) { Ok(_) => info!("crosvm has exited normally"), - Err(e) => error!("{}", e), + Err(e) => { + error!("{}", e); + return 1; + } } } Err(argument::Error::PrintHelp) => print_help("crosvm run", "KERNEL", &arguments[..]), - Err(e) => println!("{}", e), + Err(e) => { + println!("{}", e); + return 1; + } } + 0 } fn stop_vms(args: std::env::Args) { @@ -430,27 +469,31 @@ fn print_usage() { fn main() { if let Err(e) = syslog::init() { println!("failed to initiailize syslog: {:?}", e); - return; + std::process::exit(1); } let mut args = std::env::args(); if args.next().is_none() { error!("expected executable name"); - return; + std::process::exit(1); } + // Past this point, usage of exit is in danger of leaking zombie processes. + + let mut exit_code = 0; match args.next().as_ref().map(|a| a.as_ref()) { None => print_usage(), Some("stop") => { stop_vms(args); } Some("run") => { - run_vm(args); + exit_code = run_vm(args); } Some("balloon") => { balloon_vms(args); } Some(c) => { + exit_code = 1; println!("invalid subcommand: {:?}", c); print_usage(); } @@ -470,4 +513,5 @@ fn main() { // WARNING: Any code added after this point is not guaranteed to run // since we may forcibly kill this process (and its children) above. + std::process::exit(exit_code); } diff --git a/src/plugin/mod.rs b/src/plugin/mod.rs new file mode 100644 index 0000000..4d4805d --- /dev/null +++ b/src/plugin/mod.rs @@ -0,0 +1,473 @@ +// Copyright 2018 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +mod process; +mod vcpu; + +use std::fmt; +use std::fs::File; +use std::io; +use std::os::unix::io::{IntoRawFd, FromRawFd}; +use std::os::unix::net::UnixDatagram; +use std::result; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, Barrier}; +use std::thread; +use std::time::{Duration, Instant}; + +use libc::{socketpair, ioctl, c_ulong, AF_UNIX, SOCK_SEQPACKET, FIOCLEX, EAGAIN, EINTR, EINVAL, + ENOENT, EPERM, EDEADLK, ENOTTY, EEXIST, EBADF, EOVERFLOW, SIGCHLD}; + +use protobuf::ProtobufError; + +use kvm::{Kvm, Vm, Vcpu, VcpuExit, IoeventAddress, NoDatamatch}; +use sys_util::{EventFd, MmapError, Killable, SignalFd, SignalFdError, Poller, Pollable, + GuestMemory, Result as SysResult, Error as SysError, register_signal_handler}; + +use Config; + +use self::process::*; +use self::vcpu::*; + +const MAX_DATAGRAM_SIZE: usize = 4096; +const MAX_VCPU_DATAGRAM_SIZE: usize = 0x40000; + +/// An error that occurs during the lifetime of a plugin process. +pub enum Error { + CloneEventFd(SysError), + CloneVcpuSocket(io::Error), + CreateEventFd(SysError), + CreateIrqChip(SysError), + CreateKvm(SysError), + CreateMainSocket(SysError), + CreateSignalFd(SignalFdError), + CreateSocketPair(io::Error), + CreateVcpu(SysError), + CreateVcpuSocket(SysError), + CreateVm(SysError), + DecodeRequest(ProtobufError), + EncodeResponse(ProtobufError), + PluginFailed(i32), + PluginKill(SysError), + PluginKilled(i32), + PluginSocketHup, + PluginSocketPoll(SysError), + PluginSocketRecv(SysError), + PluginSocketSend(SysError), + PluginSpawn(io::Error), + PluginTimeout, + PluginWait(SysError), + Poll(SysError), + SigChild { + pid: u32, + signo: u32, + status: i32, + code: i32, + }, + SignalFd(SignalFdError), + SpawnVcpu(io::Error), +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Error::CloneEventFd(ref e) => write!(f, "failed to clone eventfd: {:?}", e), + Error::CloneVcpuSocket(ref e) => write!(f, "failed to clone vcpu socket: {:?}", e), + Error::CreateEventFd(ref e) => write!(f, "failed to create eventfd: {:?}", e), + Error::CreateIrqChip(ref e) => write!(f, "failed to create kvm irqchip: {:?}", e), + Error::CreateKvm(ref e) => write!(f, "error creating Kvm: {:?}", e), + Error::CreateMainSocket(ref e) => { + write!(f, "error creating main request socket: {:?}", e) + } + Error::CreateSignalFd(ref e) => write!(f, "failed to create signalfd: {:?}", e), + Error::CreateSocketPair(ref e) => write!(f, "failed to create socket pair: {}", e), + Error::CreateVcpu(ref e) => write!(f, "error creating vcpu: {:?}", e), + Error::CreateVcpuSocket(ref e) => { + write!(f, "error creating vcpu request socket: {:?}", e) + } + Error::CreateVm(ref e) => write!(f, "error creating vm: {:?}", e), + Error::DecodeRequest(ref e) => write!(f, "failed to decode plugin request: {}", e), + Error::EncodeResponse(ref e) => write!(f, "failed to encode plugin response: {}", e), + Error::PluginFailed(ref e) => write!(f, "plugin exited with error: {}", e), + Error::PluginKill(ref e) => write!(f, "error sending kill signal to plugin: {:?}", e), + Error::PluginKilled(ref e) => write!(f, "plugin exited with signal {}", e), + Error::PluginSocketHup => write!(f, "plugin request socket has been hung up"), + Error::PluginSocketPoll(ref e) => { + write!(f, "failed to poll plugin request sockets: {:?}", e) + } + Error::PluginSocketRecv(ref e) => { + write!(f, "failed to recv from plugin request socket: {:?}", e) + } + Error::PluginSocketSend(ref e) => { + write!(f, "failed to send to plugin request socket: {:?}", e) + } + Error::PluginSpawn(ref e) => write!(f, "failed to spawn plugin: {}", e), + Error::PluginTimeout => write!(f, "plugin did not exit within timeout"), + Error::PluginWait(ref e) => write!(f, "error waiting for plugin to exit: {:?}", e), + Error::Poll(ref e) => write!(f, "failed to poll all FDs: {:?}", e), + Error::SigChild { + pid, + signo, + status, + code, + } => { + write!(f, + "process {} died with signal {}, status {}, and code {}", + pid, + signo, + status, + code) + } + Error::SignalFd(ref e) => write!(f, "failed to read signal fd: {:?}", e), + Error::SpawnVcpu(ref e) => write!(f, "error spawning vcpu thread: {}", e), + + } + } +} + +type Result = result::Result; + +fn downcast_file(f: F) -> File { + unsafe { File::from_raw_fd(f.into_raw_fd()) } +} + +fn new_seqpacket_pair() -> SysResult<(UnixDatagram, UnixDatagram)> { + let mut fds = [0, 0]; + unsafe { + let ret = socketpair(AF_UNIX, SOCK_SEQPACKET, 0, fds.as_mut_ptr()); + if ret == 0 { + ioctl(fds[0], FIOCLEX); + Ok((UnixDatagram::from_raw_fd(fds[0]), UnixDatagram::from_raw_fd(fds[1]))) + } else { + Err(SysError::last()) + } + } +} + +fn proto_to_sys_err(e: ProtobufError) -> SysError { + match e { + ProtobufError::IoError(e) => SysError::new(-e.raw_os_error().unwrap_or(EINVAL)), + _ => SysError::new(-EINVAL), + } +} + +fn io_to_sys_err(e: io::Error) -> SysError { + SysError::new(-e.raw_os_error().unwrap_or(EINVAL)) +} + +fn mmap_to_sys_err(e: MmapError) -> SysError { + match e { + MmapError::SystemCallFailed(e) => e, + _ => SysError::new(-EINVAL), + } +} + +/// Each `PluginObject` represents one object that was instantiated by the guest using the `Create` +/// request. +/// +/// Each such object has an ID associated with it that exists in an ID space shared by every variant +/// of `PluginObject`. This allows all the objects to be indexed in a single map, and allows for a +/// common destroy method. +/// + +/// In addition to the destory method, each object may have methods specific to its variant type. +/// These variant methods must be done by matching the variant to the expected type for that method. +/// For example, getting the dirty log from a `Memory` object starting with an ID: +/// +/// ``` +/// match objects.get(&request_id) { +/// Some(&PluginObject::Memory { slot, length }) => vm.get_dirty_log(slot, &mut dirty_log[..]) +/// _ => return Err(SysError::new(-ENOENT)), +/// } +/// ``` +enum PluginObject { + IoEvent { + evt: EventFd, + addr: IoeventAddress, + length: u32, + datamatch: u64, + }, + Memory { slot: u32, length: usize }, + IrqEvent { irq_id: u32, evt: EventFd }, +} + +impl PluginObject { + fn destroy(self, vm: &mut Vm) -> SysResult<()> { + match self { + PluginObject::IoEvent { + evt, + addr, + length, + datamatch, + } => { + match length { + 0 => vm.unregister_ioevent(&evt, addr, NoDatamatch), + 1 => vm.unregister_ioevent(&evt, addr, datamatch as u8), + 2 => vm.unregister_ioevent(&evt, addr, datamatch as u16), + 4 => vm.unregister_ioevent(&evt, addr, datamatch as u32), + 8 => vm.unregister_ioevent(&evt, addr, datamatch as u64), + _ => Err(SysError::new(-EINVAL)), + } + } + PluginObject::Memory { slot, .. } => vm.remove_device_memory(slot).and(Ok(())), + PluginObject::IrqEvent { irq_id, evt } => vm.unregister_irqfd(&evt, irq_id), + } + } +} + +/// Run a VM with a plugin process specified by `cfg`. +/// +/// Not every field of `cfg` will be used. In particular, most field that pertain to a specific +/// device are ignored because the plugin is responsible for emulating hardware. +pub fn run_config(cfg: Config) -> Result<()> { + info!("crosvm starting plugin process"); + + // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this + // before any jailed devices have been spawned, so that we can catch any of them that fail very + // quickly. + let sigchld_fd = SignalFd::new(SIGCHLD).map_err(Error::CreateSignalFd)?; + + let vcpu_count = cfg.vcpu_count.unwrap_or(1); + let mem = GuestMemory::new(&[]).unwrap(); + let kvm = Kvm::new().map_err(Error::CreateKvm)?; + let mut vm = Vm::new(&kvm, mem).map_err(Error::CreateVm)?; + vm.create_irq_chip().map_err(Error::CreateIrqChip)?; + let mut plugin = Process::new(vcpu_count, &mut vm, &cfg.plugin.unwrap())?; + + let exit_evt = EventFd::new().map_err(Error::CreateEventFd)?; + let kill_signaled = Arc::new(AtomicBool::new(false)); + let mut vcpu_handles = Vec::with_capacity(vcpu_count as usize); + let vcpu_thread_barrier = Arc::new(Barrier::new((vcpu_count + 1) as usize)); + for cpu_id in 0..vcpu_count { + let kill_signaled = kill_signaled.clone(); + let vcpu_thread_barrier = vcpu_thread_barrier.clone(); + let vcpu_exit_evt = exit_evt.try_clone().map_err(Error::CloneEventFd)?; + let vcpu_plugin = plugin.create_vcpu(cpu_id)?; + let vcpu = Vcpu::new(cpu_id as c_ulong, &kvm, &vm) + .map_err(Error::CreateVcpu)?; + + vcpu_handles.push(thread::Builder::new() + .name(format!("crosvm_vcpu{}", cpu_id)) + .spawn(move || { + unsafe { + extern "C" fn handle_signal() {} + // Our signal handler does nothing and is trivially async signal safe. + register_signal_handler(0, handle_signal) + .expect("failed to register vcpu signal handler"); + } + + let res = vcpu_plugin.init(&vcpu); + vcpu_thread_barrier.wait(); + if let Err(e) = res { + error!("failed to initialize vcpu {}: {:?}", cpu_id, e); + } else { + loop { + let run_res = vcpu.run(); + match run_res { + Ok(run) => { + match run { + VcpuExit::IoIn(addr, data) => { + vcpu_plugin.io_read(addr as u64, data, &vcpu); + } + VcpuExit::IoOut(addr, data) => { + vcpu_plugin.io_write(addr as u64, data, &vcpu); + } + VcpuExit::MmioRead(addr, data) => { + vcpu_plugin.mmio_read(addr as u64, data, &vcpu); + } + VcpuExit::MmioWrite(addr, data) => { + vcpu_plugin.mmio_write(addr as u64, data, &vcpu); + } + VcpuExit::Hlt => break, + VcpuExit::Shutdown => break, + VcpuExit::InternalError => { + error!("vcpu {} has internal error", cpu_id); + break; + } + r => warn!("unexpected vcpu exit: {:?}", r), + } + } + Err(e) => { + match e.errno() { + EAGAIN | EINTR => {} + _ => { + error!("vcpu hit unknown error: {:?}", e); + break; + } + } + } + } + if kill_signaled.load(Ordering::SeqCst) { + break; + } + + if let Err(e) = vcpu_plugin.pre_run(&vcpu) { + error!("failed to process pause on vcpu {}: {:?}", cpu_id, e); + break; + } + } + } + vcpu_exit_evt + .write(1) + .expect("failed to signal vcpu exit eventfd"); + }) + .map_err(Error::SpawnVcpu)?); + } + + vcpu_thread_barrier.wait(); + + const EXIT: u32 = 0; + const CHILD_SIGNAL: u32 = 1; + const PLUGIN_BASE: u32 = 2; + + let mut sockets_to_drop = Vec::new(); + let mut poller = Poller::new(3); + + let mut res = Ok(()); + // If Some, we will exit after enough time is passed to shutdown cleanly. + let mut dying_instant: Option = None; + let duration_to_die = Duration::from_millis(1000); + + // In this loop, make every attempt to not return early. If an error is encountered, set `res` + // to the error, set `dying_instant` to now, and signal the plugin that it will be killed soon. + // If the plugin cannot be singaled because it is dead of `signal_kill` failed, simply break + // from the poll loop so that the VCPU threads can be cleaned up. + 'poll: loop { + // After we have waited long enough, it's time to give up and exit. + if dying_instant + .map(|i| i.elapsed() >= duration_to_die) + .unwrap_or(false) { + break; + } + + let tokens = { + let mut pollables = Vec::new(); + // No need to check the exit event if we are already doing cleanup. + if dying_instant.is_none() { + pollables.push((EXIT, &exit_evt as &Pollable)); + } + pollables.push((CHILD_SIGNAL, &sigchld_fd as &Pollable)); + for (i, socket) in plugin.sockets().iter().enumerate() { + pollables.push((PLUGIN_BASE + i as u32, socket as &Pollable)); + } + + let poll_res = match dying_instant { + Some(ref inst) => { + poller.poll_timeout(&pollables[..], &mut (duration_to_die - inst.elapsed())) + } + None => poller.poll(&pollables[..]), + }; + match poll_res { + Ok(v) => v, + Err(e) => { + // Polling no longer works, time to break and cleanup, + if res.is_ok() { + res = Err(Error::Poll(e)); + } + break; + } + } + }; + for &token in tokens { + match token { + EXIT => { + dying_instant.get_or_insert(Instant::now()); + let sig_res = plugin.signal_kill(); + if res.is_ok() && sig_res.is_err() { + res = sig_res.map_err(Error::PluginKill); + } + } + CHILD_SIGNAL => { + // Print all available siginfo structs, then exit the loop. + loop { + match sigchld_fd.read() { + Ok(Some(siginfo)) => { + // If the plugin process has ended, there is no need to continue + // processing plugin connections, so we break early. + if siginfo.ssi_pid == plugin.pid() { + break 'poll; + } + // Because SIGCHLD is not expected from anything other than the + // plugin process, report it as an error. + if res.is_ok() { + res = Err(Error::SigChild { + pid: siginfo.ssi_pid, + signo: siginfo.ssi_signo, + status: siginfo.ssi_status, + code: siginfo.ssi_code, + }) + } + } + Ok(None) => break, // No more signals to read. + Err(e) => { + // Something really must be messed up for this to happen, continue + // processing connections for a limited time. + if res.is_ok() { + res = Err(Error::SignalFd(e)); + } + break; + } + } + } + // As we only spawn the plugin process, getting a SIGCHLD can only mean + // something went wrong. + dying_instant.get_or_insert(Instant::now()); + let sig_res = plugin.signal_kill(); + if res.is_ok() && sig_res.is_err() { + res = sig_res.map_err(Error::PluginKill); + } + } + t if t >= PLUGIN_BASE && t < PLUGIN_BASE + (plugin.sockets().len() as u32) => { + let socket_index = (t - PLUGIN_BASE) as usize; + match plugin.handle_socket(socket_index, &mut vm, &vcpu_handles) { + Ok(_) => {} + // A HUP is an expected event for a socket, so don't bother warning about + // it. + Err(Error::PluginSocketHup) => sockets_to_drop.push(socket_index), + // Only one connection out of potentially many is broken. Drop it, but don't + // start cleaning up. Because the error isn't returned, we will warn about + // it here. + Err(e) => { + warn!("error handling plugin socket: {}", e); + sockets_to_drop.push(socket_index); + } + } + } + _ => {} + } + } + + // Cleanup all of the sockets that we have determined were disconnected or suffered some + // other error. + plugin.drop_sockets(&mut sockets_to_drop); + sockets_to_drop.clear(); + } + + // vcpu threads MUST see the kill signaled flag, otherwise they may re-enter the VM. + kill_signaled.store(true, Ordering::SeqCst); + // Depending on how we ended up here, the plugin process, or a VCPU thread waiting for requests + // might be stuck. The `signal_kill` call will unstick all the VCPU threads by closing their + // blocked connections. + plugin.signal_kill().map_err(Error::PluginKill)?; + for handle in vcpu_handles { + match handle.kill(0) { + Ok(_) => { + if let Err(e) = handle.join() { + error!("failed to join vcpu thread: {:?}", e); + } + } + Err(e) => error!("failed to kill vcpu thread: {:?}", e), + } + } + + match plugin.try_wait() { + // The plugin has run out of time by now + Ok(ProcessStatus::Running) => return Err(Error::PluginTimeout), + // Return an error discovered earlier in this function. + Ok(ProcessStatus::Success) => return res, + Ok(ProcessStatus::Fail(code)) => return Err(Error::PluginFailed(code)), + Ok(ProcessStatus::Signal(code)) => return Err(Error::PluginKilled(code)), + Err(e) => return Err(Error::PluginWait(e)), + }; +} diff --git a/src/plugin/process.rs b/src/plugin/process.rs new file mode 100644 index 0000000..fb9ad7d --- /dev/null +++ b/src/plugin/process.rs @@ -0,0 +1,523 @@ +// Copyright 2018 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +use std::collections::hash_map::{HashMap, Entry, VacantEntry}; +use std::fs::File; +use std::net::Shutdown; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::os::unix::net::UnixDatagram; +use std::os::unix::process::ExitStatusExt; +use std::path::Path; +use std::process::{Command, Child}; +use std::sync::{Arc, Mutex, RwLock}; +use std::thread::JoinHandle; + +use libc::EINVAL; + +use protobuf; +use protobuf::Message; + +use kvm::{Vm, IoeventAddress, NoDatamatch, IrqSource, IrqRoute, dirty_log_bitmap_size}; +use sys_util::{EventFd, MemoryMapping, Killable, Scm, Poller, Pollable, SharedMemory, + GuestAddress, Result as SysResult, Error as SysError}; +use plugin_proto::*; + +use super::*; + +/// The status of a process, either that it is running, or that it exited under some condition. +pub enum ProcessStatus { + /// The process is running and therefore has no information about its result. + Running, + /// The process has exited with a successful code. + Success, + /// The process failed with the given exit code. + Fail(i32), + /// The process was terminated with the given signal code. + Signal(i32), +} + +/// Creates, owns, and handles messages from a plugin process. +/// +/// A plugin process has control over a single VM and a fixed number of VCPUs via a set of unix +/// domain socket connections and a protocol defined in `plugin_proto`. The plugin process is run in +/// an unprivileged manner as a child process spawned via a path to a arbitrary executable. +pub struct Process { + started: bool, + plugin_proc: Child, + request_sockets: Vec, + objects: HashMap, + shared_vcpu_state: Arc>, + per_vcpu_states: Vec>>, + + // Resource to sent to plugin + kill_evt: EventFd, + vcpu_sockets: Vec<(UnixDatagram, UnixDatagram)>, + + // Socket Transmission + scm: Scm, + request_buffer: Vec, + datagram_files: Vec, + response_buffer: Vec, +} + +impl Process { + /// Creates a new plugin process for the given number of vcpus and VM. + /// + /// This will immediately spawn the plugin process and wait for the child to signal that it is + /// ready to start. This call may block indefinitely. + pub fn new(cpu_count: u32, vm: &mut Vm, cmd_path: &Path) -> Result { + let (request_socket, child_socket) = new_seqpacket_pair().map_err(Error::CreateMainSocket)?; + + let mut vcpu_sockets: Vec<(UnixDatagram, UnixDatagram)> = Vec::with_capacity(cpu_count as + usize); + for _ in 0..cpu_count { + vcpu_sockets.push(new_seqpacket_pair().map_err(Error::CreateVcpuSocket)?); + } + let mut per_vcpu_states: Vec>> = Vec::new(); + per_vcpu_states.resize(cpu_count as usize, Default::default()); + + // TODO(zachr): use a minijail + let plugin_proc = Command::new(cmd_path) + .env("CROSVM_SOCKET", child_socket.as_raw_fd().to_string()) + .spawn() + .map_err(Error::PluginSpawn)?; + + // Very important to drop the child socket so that the pair will properly hang up if the + // plugin process exits or closes its end. + drop(child_socket); + + let request_sockets = vec![request_socket]; + + let mut plugin = Process { + started: false, + plugin_proc, + request_sockets, + objects: Default::default(), + shared_vcpu_state: Default::default(), + per_vcpu_states, + kill_evt: EventFd::new().map_err(Error::CreateEventFd)?, + vcpu_sockets, + scm: Scm::new(1), + request_buffer: vec![0; MAX_DATAGRAM_SIZE], + datagram_files: Vec::new(), + response_buffer: Vec::new(), + }; + + plugin.run_until_started(vm)?; + + Ok(plugin) + } + + + fn run_until_started(&mut self, vm: &mut Vm) -> Result<()> { + let mut sockets_to_drop = Vec::new(); + let mut poller = Poller::new(1); + while !self.started { + if self.request_sockets.is_empty() { + return Err(Error::PluginSocketHup); + } + + let tokens = { + let mut pollables = Vec::with_capacity(self.objects.len()); + for (i, socket) in self.request_sockets.iter().enumerate() { + pollables.push((i as u32, socket as &Pollable)); + } + poller + .poll(&pollables[..]) + .map_err(Error::PluginSocketPoll)? + }; + + for &token in tokens { + match self.handle_socket(token as usize, vm, &[]) { + Ok(_) => {} + Err(Error::PluginSocketHup) => sockets_to_drop.push(token as usize), + r => return r, + } + } + + self.drop_sockets(&mut sockets_to_drop); + sockets_to_drop.clear(); + } + + Ok(()) + } + + /// Creates a VCPU plugin connection object, used by a VCPU run loop to communicate with the + /// plugin process. + /// + /// While each invocation of `create_vcpu` with the given `cpu_id` will return a unique + /// `PluginVcpu` object, the underlying resources are shared by each `PluginVcpu` resulting from + /// the same `cpu_id`. + pub fn create_vcpu(&self, cpu_id: u32) -> Result { + let vcpu_socket = self.vcpu_sockets[cpu_id as usize] + .0 + .try_clone() + .map_err(Error::CloneVcpuSocket)?; + Ok(PluginVcpu::new(self.shared_vcpu_state.clone(), + self.per_vcpu_states[cpu_id as usize].clone(), + vcpu_socket)) + } + + /// Returns the process ID of the plugin process. + pub fn pid(&self) -> u32 { + self.plugin_proc.id() + } + + /// Returns a slice of each socket that should be polled. + /// + /// If any socket in this slice becomes readable, `handle_socket` should be called with the + /// index of that socket. If any socket becomes closed, its index should be passed to + /// `drop_sockets`. + pub fn sockets(&self) -> &[UnixDatagram] { + &self.request_sockets + } + + /// Drops the each socket identified by its index in the slice returned by `sockets`. + /// + /// The given `socket_idxs` slice will be modified in an arbitrary way for efficient removal of + /// the sockets from internal data structures. + pub fn drop_sockets(&mut self, socket_idxs: &mut [usize]) { + // Takes a mutable slice so that the indices can be sorted for efficient removal in + // request_sockets.. + socket_idxs.sort_unstable_by(|a, b| b.cmp(a)); + let old_len = self.request_sockets.len(); + for &socket_index in socket_idxs.iter() { + // swap_remove changes the index of the last element, but we already know that one + // doesn't need to be removed because we are removing sockets in descending order thanks + // to the above sort. + self.request_sockets.swap_remove(socket_index); + } + assert_eq!(old_len - socket_idxs.len(), self.request_sockets.len()); + } + + /// Gently requests that the plugin process exit cleanly, and ends handling of all VCPU + /// connections. + /// + /// The plugin process can ignore the given signal, and so some timeout should be used before + /// forcefully terminating the process. + /// + /// Any blocked VCPU connections will get interrupted so that the VCPU threads can exit cleanly. + /// Any subsequent attempt to use the VCPU connections will fail. + pub fn signal_kill(&mut self) -> SysResult<()> { + self.kill_evt.write(1)?; + // By shutting down our half of the VCPU sockets, any blocked calls in the VCPU threads will + // unblock, allowing them to exit cleanly. + for sock in self.vcpu_sockets.iter() { + sock.0.shutdown(Shutdown::Both)?; + } + Ok(()) + } + + /// Waits without blocking for the plugin process to exit and returns the status. + pub fn try_wait(&mut self) -> SysResult { + match self.plugin_proc.try_wait() { + Ok(Some(status)) => { + match status.code() { + Some(0) => Ok(ProcessStatus::Success), + Some(code) => Ok(ProcessStatus::Fail(code)), + // If there was no exit code there must be a signal. + None => Ok(ProcessStatus::Signal(status.signal().unwrap())), + } + } + Ok(None) => Ok(ProcessStatus::Running), + Err(e) => Err(io_to_sys_err(e)), + } + } + + fn handle_io_event(entry: VacantEntry, + vm: &mut Vm, + io_event: &MainRequest_Create_IoEvent) + -> SysResult { + let evt = EventFd::new()?; + let addr = match io_event.space { + AddressSpace::IOPORT => IoeventAddress::Pio(io_event.address), + AddressSpace::MMIO => IoeventAddress::Mmio(io_event.address), + }; + match io_event.length { + 0 => vm.register_ioevent(&evt, addr, NoDatamatch)?, + 1 => vm.register_ioevent(&evt, addr, io_event.datamatch as u8)?, + 2 => vm.register_ioevent(&evt, addr, io_event.datamatch as u16)?, + 4 => vm.register_ioevent(&evt, addr, io_event.datamatch as u32)?, + 8 => vm.register_ioevent(&evt, addr, io_event.datamatch as u64)?, + _ => return Err(SysError::new(-EINVAL)), + }; + + let fd = evt.as_raw_fd(); + entry.insert(PluginObject::IoEvent { + evt, + addr, + length: io_event.length, + datamatch: io_event.datamatch, + }); + Ok(fd) + } + + + fn handle_memory(entry: VacantEntry, + vm: &mut Vm, + memfd: File, + offset: u64, + start: u64, + length: u64, + read_only: bool) + -> SysResult<()> { + let shm = SharedMemory::from_raw_fd(memfd)?; + // Checking the seals ensures the plugin process won't shrink the mmapped file, causing us + // to SIGBUS in the future. + let seals = shm.get_seals()?; + if !seals.shrink_seal() { + return Err(SysError::new(-EPERM)); + } + // Check to make sure we don't mmap areas beyond the end of the memfd. + match length.checked_add(offset) { + Some(end) if end > shm.size() => return Err(SysError::new(-EINVAL)), + None => return Err(SysError::new(-EOVERFLOW)), + _ => {} + } + let mem = MemoryMapping::from_fd_offset(&shm, length as usize, offset as usize) + .map_err(mmap_to_sys_err)?; + let slot = vm.add_device_memory(GuestAddress(start), mem, read_only, true)?; + entry.insert(PluginObject::Memory { + slot, + length: length as usize, + }); + Ok(()) + } + + fn handle_reserve_range(&mut self, reserve_range: &MainRequest_ReserveRange) -> SysResult<()> { + match self.shared_vcpu_state.write() { + Ok(mut lock) => { + let space = match reserve_range.space { + AddressSpace::IOPORT => IoSpace::Ioport, + AddressSpace::MMIO => IoSpace::Mmio, + }; + match reserve_range.length { + 0 => lock.unreserve_range(space, reserve_range.start), + _ => lock.reserve_range(space, reserve_range.start, reserve_range.length), + } + } + Err(_) => Err(SysError::new(-EDEADLK)), + } + } + + fn handle_set_irq_routing(vm: &mut Vm, + irq_routing: &MainRequest_SetIrqRouting) + -> SysResult<()> { + let mut routes = Vec::with_capacity(irq_routing.routes.len()); + for route in irq_routing.routes.iter() { + routes.push(IrqRoute { + gsi: route.irq_id, + source: if route.has_irqchip() { + let irqchip = route.get_irqchip(); + IrqSource::Irqchip { + chip: irqchip.irqchip, + pin: irqchip.pin, + } + } else if route.has_msi() { + let msi = route.get_msi(); + IrqSource::Msi { + address: msi.address, + data: msi.data, + } + } else { + // Because route is a oneof field in the proto definition, this should + // only happen if a new variant gets added without updating this chained + // if block. + return Err(SysError::new(-EINVAL)); + }, + }); + } + vm.set_gsi_routing(&routes[..]) + } + + fn handle_pause_vcpus(&self, vcpu_handles: &[JoinHandle<()>], cpu_mask: u64, user_data: u64) { + for (cpu_id, (handle, per_cpu_state)) in + vcpu_handles + .iter() + .zip(self.per_vcpu_states.iter()) + .enumerate() { + if cpu_mask & (1 << cpu_id) != 0 { + per_cpu_state.lock().unwrap().request_pause(user_data); + if let Err(e) = handle.kill(0) { + error!("failed to interrupt vcpu {}: {:?}", cpu_id, e); + } + } + } + } + + /// Handles a request on a readable socket identified by its index in the slice returned by + /// `sockets`. + /// + /// The `vm` is used to service request that affect the VM. The `vcpu_handles` slice is used to + /// interrupt a VCPU thread currently running in the VM if the socket request it. + pub fn handle_socket(&mut self, + index: usize, + vm: &mut Vm, + vcpu_handles: &[JoinHandle<()>]) + -> Result<()> { + let msg_size = self.scm + .recv(&self.request_sockets[index], + &mut [&mut self.request_buffer], + &mut self.datagram_files) + .map_err(Error::PluginSocketRecv)?; + + if msg_size == 0 { + return Err(Error::PluginSocketHup); + } + + let request = protobuf::parse_from_bytes::(&self.request_buffer[..msg_size]) + .map_err(Error::DecodeRequest)?; + + let mut response_files = Vec::new(); + let mut response_fds = Vec::new(); + let mut response = MainResponse::new(); + let res = if request.has_create() { + response.mut_create(); + let create = request.get_create(); + match self.objects.entry(create.id) { + Entry::Vacant(entry) => { + if create.has_io_event() { + match Self::handle_io_event(entry, vm, create.get_io_event()) { + Ok(fd) => { + response_fds.push(fd); + Ok(()) + } + Err(e) => Err(e), + } + } else if create.has_memory() { + let memory = create.get_memory(); + match self.datagram_files.pop() { + Some(memfd) => { + Self::handle_memory(entry, + vm, + memfd, + memory.offset, + memory.start, + memory.length, + memory.read_only) + } + None => Err(SysError::new(-EBADF)), + } + } else if create.has_irq_event() { + let irq_event = create.get_irq_event(); + match (EventFd::new(), EventFd::new()) { + (Ok(evt), Ok(resample_evt)) => { + match vm.register_irqfd_resample(&evt, + &resample_evt, + irq_event.irq_id) { + Ok(()) => { + response_fds.push(evt.as_raw_fd()); + response_fds.push(resample_evt.as_raw_fd()); + response_files.push(downcast_file(resample_evt)); + entry.insert(PluginObject::IrqEvent { + irq_id: irq_event.irq_id, + evt, + }); + Ok(()) + } + Err(e) => Err(e), + } + } + (Err(e), _) | (_, Err(e)) => Err(e), + } + } else { + Err(SysError::new(-ENOTTY)) + } + } + Entry::Occupied(_) => Err(SysError::new(-EEXIST)), + } + } else if request.has_destroy() { + response.mut_destroy(); + match self.objects.entry(request.get_destroy().id) { + Entry::Occupied(entry) => entry.remove().destroy(vm), + Entry::Vacant(_) => Err(SysError::new(-ENOENT)), + } + } else if request.has_new_connection() { + response.mut_new_connection(); + match new_seqpacket_pair() { + Ok((request_socket, child_socket)) => { + self.request_sockets.push(request_socket); + response_fds.push(child_socket.as_raw_fd()); + response_files.push(downcast_file(child_socket)); + Ok(()) + } + Err(e) => Err(e), + } + } else if request.has_get_shutdown_eventfd() { + response.mut_get_shutdown_eventfd(); + response_fds.push(self.kill_evt.as_raw_fd()); + Ok(()) + } else if request.has_reserve_range() { + response.mut_reserve_range(); + self.handle_reserve_range(request.get_reserve_range()) + } else if request.has_set_irq() { + response.mut_set_irq(); + let irq = request.get_set_irq(); + vm.set_irq_line(irq.irq_id, irq.active) + } else if request.has_set_irq_routing() { + response.mut_set_irq_routing(); + Self::handle_set_irq_routing(vm, request.get_set_irq_routing()) + } else if request.has_set_identity_map_addr() { + response.mut_set_identity_map_addr(); + let addr = request.get_set_identity_map_addr().address; + vm.set_identity_map_addr(GuestAddress(addr as u64)) + } else if request.has_pause_vcpus() { + response.mut_pause_vcpus(); + let pause_vcpus = request.get_pause_vcpus(); + self.handle_pause_vcpus(vcpu_handles, pause_vcpus.cpu_mask, pause_vcpus.user); + Ok(()) + } else if request.has_get_vcpus() { + response.mut_get_vcpus(); + response_fds.extend(self.vcpu_sockets.iter().map(|s| s.1.as_raw_fd())); + Ok(()) + } else if request.has_start() { + response.mut_start(); + if self.started { + Err(SysError::new(-EINVAL)) + } else { + self.started = true; + Ok(()) + } + } else if request.has_dirty_log() { + let dirty_log_response = response.mut_dirty_log(); + match self.objects.get(&request.get_dirty_log().id) { + Some(&PluginObject::Memory { slot, length }) => { + let dirty_log = dirty_log_response.mut_bitmap(); + dirty_log.resize(dirty_log_bitmap_size(length), 0); + vm.get_dirty_log(slot, &mut dirty_log[..]) + } + _ => Err(SysError::new(-ENOENT)), + } + } else { + Err(SysError::new(-ENOTTY)) + }; + + if let Err(e) = res { + response.errno = e.errno(); + } + + self.datagram_files.clear(); + self.response_buffer.clear(); + response + .write_to_vec(&mut self.response_buffer) + .map_err(Error::EncodeResponse)?; + assert_ne!(self.response_buffer.len(), 0); + self.scm + .send(&self.request_sockets[index], + &[&self.response_buffer[..]], + &response_fds) + .map_err(Error::PluginSocketSend)?; + + Ok(()) + } +} + +impl Drop for Process { + fn drop(&mut self) { + // Ignore the result because there is nothing we can do about it. + if let Err(e) = self.signal_kill() { + error!("failed to singal kill event for plugin: {:?}", e); + } + } +} diff --git a/src/plugin/vcpu.rs b/src/plugin/vcpu.rs new file mode 100644 index 0000000..8cfbdca --- /dev/null +++ b/src/plugin/vcpu.rs @@ -0,0 +1,466 @@ +// Copyright 2018 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +use std::cell::{Cell, RefCell}; +use std::cmp::min; +use std::cmp::{self, Ord, PartialOrd, PartialEq}; +use std::collections::btree_set::BTreeSet; +use std::os::unix::net::UnixDatagram; +use std::sync::{Arc, Mutex, RwLock}; + +use libc::{EINVAL, EPROTO, ENOENT, EPERM, EPIPE, EDEADLK, ENOTTY}; + +use protobuf; +use protobuf::Message; + +use data_model::DataInit; +use kvm::Vcpu; +use kvm_sys::{kvm_regs, kvm_sregs, kvm_fpu}; +use plugin_proto::*; + +use super::*; + +/// Identifier for an address space in the VM. +#[derive(Copy, Clone)] +pub enum IoSpace { + Ioport, + Mmio, +} + +#[derive(Debug, Copy, Clone)] +struct Range(u64, u64); + +impl Eq for Range {} + +impl PartialEq for Range { + fn eq(&self, other: &Range) -> bool { + self.0 == other.0 + } +} + +impl Ord for Range { + fn cmp(&self, other: &Range) -> cmp::Ordering { + self.0.cmp(&other.0) + } +} + +impl PartialOrd for Range { + fn partial_cmp(&self, other: &Range) -> Option { + self.0.partial_cmp(&other.0) + } +} + +// Wrapper types to make the kvm register structs DataInit +#[derive(Copy, Clone)] +struct VcpuRegs(kvm_regs); +unsafe impl DataInit for VcpuRegs {} +#[derive(Copy, Clone)] +struct VcpuSregs(kvm_sregs); +unsafe impl DataInit for VcpuSregs {} +#[derive(Copy, Clone)] +struct VcpuFpu(kvm_fpu); +unsafe impl DataInit for VcpuFpu {} + + +fn get_vcpu_state(vcpu: &Vcpu, state_set: VcpuRequest_StateSet) -> SysResult> { + Ok(match state_set { + VcpuRequest_StateSet::REGS => VcpuRegs(vcpu.get_regs()?).as_slice().to_vec(), + VcpuRequest_StateSet::SREGS => VcpuSregs(vcpu.get_sregs()?).as_slice().to_vec(), + VcpuRequest_StateSet::FPU => VcpuFpu(vcpu.get_fpu()?).as_slice().to_vec(), + }) +} + +fn set_vcpu_state(vcpu: &Vcpu, state_set: VcpuRequest_StateSet, state: &[u8]) -> SysResult<()> { + match state_set { + VcpuRequest_StateSet::REGS => { + vcpu.set_regs(&VcpuRegs::from_slice(state) + .ok_or(SysError::new(-EINVAL))? + .0) + } + VcpuRequest_StateSet::SREGS => { + vcpu.set_sregs(&VcpuSregs::from_slice(state) + .ok_or(SysError::new(-EINVAL))? + .0) + } + VcpuRequest_StateSet::FPU => { + vcpu.set_fpu(&VcpuFpu::from_slice(state) + .ok_or(SysError::new(-EINVAL))? + .0) + } + } +} + + +/// State shared by every VCPU, grouped together to make edits to the state coherent across VCPUs. +#[derive(Default)] +pub struct SharedVcpuState { + ioport_regions: BTreeSet, + mmio_regions: BTreeSet, +} + +impl SharedVcpuState { + /// Reserves the given range for handling by the plugin process. + /// + /// This will reject any reservation that overlaps with an existing reservation. + pub fn reserve_range(&mut self, space: IoSpace, start: u64, length: u64) -> SysResult<()> { + if length == 0 { + return Err(SysError::new(-EINVAL)); + } + + // Reject all cases where this reservation is part of another reservation. + if self.is_reserved(space, start) { + return Err(SysError::new(-EPERM)); + } + + let last_address = match start.checked_add(length) { + Some(end) => end - 1, + None => return Err(SysError::new(-EINVAL)), + }; + + let space = match space { + IoSpace::Ioport => &mut self.ioport_regions, + IoSpace::Mmio => &mut self.mmio_regions, + }; + + match space + .range(..Range(last_address, 0)) + .next_back() + .cloned() { + Some(Range(existing_start, _)) if existing_start >= start => Err(SysError::new(-EPERM)), + _ => { + space.insert(Range(start, length)); + Ok(()) + } + } + } + + //// Releases a reservation previously made at `start` in the given `space`. + pub fn unreserve_range(&mut self, space: IoSpace, start: u64) -> SysResult<()> { + let range = Range(start, 0); + let space = match space { + IoSpace::Ioport => &mut self.ioport_regions, + IoSpace::Mmio => &mut self.mmio_regions, + }; + if space.remove(&range) { + Ok(()) + } else { + Err(SysError::new(-ENOENT)) + } + } + + fn is_reserved(&self, space: IoSpace, addr: u64) -> bool { + if let Some(Range(start, len)) = self.first_before(space, addr) { + let offset = addr - start; + if offset < len { + return true; + } + } + false + } + + fn first_before(&self, io_space: IoSpace, addr: u64) -> Option { + let space = match io_space { + IoSpace::Ioport => &self.ioport_regions, + IoSpace::Mmio => &self.mmio_regions, + }; + + match addr.checked_add(1) { + Some(next_addr) => space.range(..Range(next_addr, 0)).next_back().cloned(), + None => None, + } + } +} + +/// State specific to a VCPU, grouped so that each `PluginVcpu` object will share a canonical +/// version. +#[derive(Default)] +pub struct PerVcpuState { + pause_request: Option, +} + +impl PerVcpuState { + /// Indicates that a VCPU should wait until the plugin process resumes the VCPU. + /// + /// This method will not cause a VCPU to pause immediately. Instead, the VCPU thread will + /// continue running until a interrupted, at which point it will check for a pending pause. If + /// there is another call to `request_pause` for this VCPU before that happens, the last pause + /// request's `data` will be overwritten with the most recent `data. + /// + /// To get an immediate pause after calling `request_pause`, send a signal (with a registered + /// handler) to the thread handling the VCPU corresponding to this state. This should interrupt + /// the running VCPU, which should check for a pause with `PluginVcpu::pre_run`. + pub fn request_pause(&mut self, data: u64) { + self.pause_request = Some(data); + } +} + +enum VcpuRunData<'a> { + Read(&'a mut [u8]), + Write(&'a [u8]), +} + +impl<'a> VcpuRunData<'a> { + fn is_write(&self) -> bool { + match self { + &VcpuRunData::Write(_) => true, + _ => false, + } + } + + fn as_slice(&self) -> &[u8] { + match self { + &VcpuRunData::Read(ref s) => s, + &VcpuRunData::Write(ref s) => s, + } + } + + fn copy_from_slice(&mut self, data: &[u8]) { + match self { + &mut VcpuRunData::Read(ref mut s) => { + let copy_size = min(s.len(), data.len()); + s.copy_from_slice(&data[..copy_size]); + } + _ => {} + } + } +} + +/// State object for a VCPU's connection with the plugin process. +/// +/// This is used by a VCPU thread to allow the plugin process to handle vmexits. Each method may +/// block indefinitely while the plugin process is handling requests. In order to cleanly shutdown +/// during these blocking calls, the `connection` socket should be shutdown. This will end the +/// blocking calls, +pub struct PluginVcpu { + shared_vcpu_state: Arc>, + per_vcpu_state: Arc>, + connection: UnixDatagram, + wait_reason: Cell>, + request_buffer: RefCell>, + response_buffer: RefCell>, +} + +impl PluginVcpu { + /// Creates the plugin state and connection container for a VCPU thread. + pub fn new(shared_vcpu_state: Arc>, + per_vcpu_state: Arc>, + connection: UnixDatagram) + -> PluginVcpu { + PluginVcpu { + shared_vcpu_state, + per_vcpu_state, + connection, + wait_reason: Default::default(), + request_buffer: Default::default(), + response_buffer: Default::default(), + } + } + + /// Tells the plugin process to initialize this VCPU. + /// + /// This should be called for each VCPU before the first run of any of the VCPUs in the VM. + pub fn init(&self, vcpu: &Vcpu) -> SysResult<()> { + let mut wait_reason = VcpuResponse_Wait::new(); + wait_reason.mut_init(); + self.wait_reason.set(Some(wait_reason)); + self.handle_until_resume(vcpu)?; + Ok(()) + } + + /// The VCPU thread should call this before rerunning a VM in order to handle pending requests + /// to this VCPU. + pub fn pre_run(&self, vcpu: &Vcpu) -> SysResult<()> { + match self.per_vcpu_state.lock() { + Ok(mut per_vcpu_state) => { + if let Some(user) = per_vcpu_state.pause_request.take() { + let mut wait_reason = VcpuResponse_Wait::new(); + wait_reason.mut_user().user = user; + self.wait_reason.set(Some(wait_reason)); + self.handle_until_resume(vcpu)?; + } + Ok(()) + } + Err(_) => Err(SysError::new(-EDEADLK)), + } + } + + fn process(&self, io_space: IoSpace, addr: u64, mut data: VcpuRunData, vcpu: &Vcpu) -> bool { + let vcpu_state_lock = match self.shared_vcpu_state.read() { + Ok(l) => l, + Err(e) => { + error!("error read locking shared cpu state: {:?}", e); + return false; + } + }; + + let first_before_addr = vcpu_state_lock.first_before(io_space, addr); + // Drops the read lock as soon as possible, to prevent holding lock while blocked in + // `handle_until_resume`. + drop(vcpu_state_lock); + + match first_before_addr { + Some(Range(start, len)) => { + let offset = addr - start; + if offset >= len { + return false; + } + let mut wait_reason = VcpuResponse_Wait::new(); + { + let io = wait_reason.mut_io(); + io.space = match io_space { + IoSpace::Ioport => AddressSpace::IOPORT, + IoSpace::Mmio => AddressSpace::MMIO, + }; + io.address = addr; + io.is_write = data.is_write(); + io.data = data.as_slice().to_vec(); + } + self.wait_reason.set(Some(wait_reason)); + match self.handle_until_resume(vcpu) { + Ok(resume_data) => data.copy_from_slice(&resume_data), + Err(e) if e.errno() == -EPIPE => {} + Err(e) => error!("failed to process vcpu requests: {:?}", e), + } + true + } + None => false, + } + } + + /// Has the plugin process handle a IO port read. + pub fn io_read(&self, addr: u64, data: &mut [u8], vcpu: &Vcpu) -> bool { + self.process(IoSpace::Ioport, addr, VcpuRunData::Read(data), vcpu) + } + + /// Has the plugin process handle a IO port write. + pub fn io_write(&self, addr: u64, data: &[u8], vcpu: &Vcpu) -> bool { + self.process(IoSpace::Ioport, addr, VcpuRunData::Write(data), vcpu) + } + + /// Has the plugin process handle a MMIO read. + pub fn mmio_read(&self, addr: u64, data: &mut [u8], vcpu: &Vcpu) -> bool { + self.process(IoSpace::Mmio, addr, VcpuRunData::Read(data), vcpu) + } + + /// Has the plugin process handle a MMIO write. + pub fn mmio_write(&self, addr: u64, data: &[u8], vcpu: &Vcpu) -> bool { + self.process(IoSpace::Mmio, addr, VcpuRunData::Write(data), vcpu) + } + + fn handle_request(&self, vcpu: &Vcpu) -> SysResult>> { + let mut resume_data = None; + let mut request_buffer = self.request_buffer.borrow_mut(); + request_buffer.resize(MAX_VCPU_DATAGRAM_SIZE, 0); + + let msg_size = self.connection + .recv(&mut request_buffer) + .map_err(io_to_sys_err)?; + + + let mut request = protobuf::parse_from_bytes::(&request_buffer[..msg_size]) + .map_err(proto_to_sys_err)?; + + let wait_reason = self.wait_reason.take(); + + let mut response = VcpuResponse::new(); + let res = if request.has_wait() { + match wait_reason { + Some(wait_reason) => { + response.set_wait(wait_reason); + Ok(()) + } + None => Err(SysError::new(-EPROTO)), + } + } else if wait_reason.is_some() { + // Any request other than getting the wait_reason while there is one pending is invalid. + self.wait_reason.set(wait_reason); + Err(SysError::new(-EPROTO)) + } else if request.has_resume() { + response.mut_resume(); + resume_data = Some(request.take_resume().take_data()); + Ok(()) + } else if request.has_get_state() { + let response_state = response.mut_get_state(); + match get_vcpu_state(vcpu, request.get_get_state().set) { + Ok(state) => { + response_state.state = state; + Ok(()) + } + Err(e) => Err(e), + } + } else if request.has_set_state() { + response.mut_set_state(); + let set_state = request.get_set_state(); + set_vcpu_state(vcpu, set_state.set, set_state.get_state()) + } else { + Err(SysError::new(-ENOTTY)) + }; + + if let Err(e) = res { + response.errno = e.errno(); + } + + let mut response_buffer = self.response_buffer.borrow_mut(); + response_buffer.clear(); + response + .write_to_vec(&mut response_buffer) + .map_err(proto_to_sys_err)?; + self.connection + .send(&response_buffer[..]) + .map_err(io_to_sys_err)?; + + Ok(resume_data) + } + + fn handle_until_resume(&self, vcpu: &Vcpu) -> SysResult> { + loop { + if let Some(resume_data) = self.handle_request(vcpu)? { + return Ok(resume_data); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn shared_vcpu_reserve() { + let mut shared_vcpu_state = SharedVcpuState::default(); + shared_vcpu_state + .reserve_range(IoSpace::Ioport, 0x10, 0) + .unwrap_err(); + shared_vcpu_state + .reserve_range(IoSpace::Ioport, 0x10, 0x10) + .unwrap(); + shared_vcpu_state + .reserve_range(IoSpace::Ioport, 0x0f, 0x10) + .unwrap_err(); + shared_vcpu_state + .reserve_range(IoSpace::Ioport, 0x10, 0x10) + .unwrap_err(); + shared_vcpu_state + .reserve_range(IoSpace::Ioport, 0x10, 0x15) + .unwrap_err(); + shared_vcpu_state + .reserve_range(IoSpace::Ioport, 0x12, 0x15) + .unwrap_err(); + shared_vcpu_state + .reserve_range(IoSpace::Ioport, 0x12, 0x01) + .unwrap_err(); + shared_vcpu_state + .reserve_range(IoSpace::Ioport, 0x0, 0x20) + .unwrap_err(); + shared_vcpu_state + .reserve_range(IoSpace::Ioport, 0x20, 0x05) + .unwrap(); + shared_vcpu_state + .reserve_range(IoSpace::Ioport, 0x25, 0x05) + .unwrap(); + shared_vcpu_state + .reserve_range(IoSpace::Ioport, 0x0, 0x10) + .unwrap(); + } +} diff --git a/tests/plugin_adder.c b/tests/plugin_adder.c new file mode 100644 index 0000000..f6c514f --- /dev/null +++ b/tests/plugin_adder.c @@ -0,0 +1,222 @@ +/* + * Copyright 2017 The Chromium OS Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "crosvm.h" + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif + +#ifndef F_ADD_SEALS +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#endif + +#ifndef F_SEAL_SHRINK +#define F_SEAL_SHRINK 0x0002 +#endif + +#define SERIAL_ADDRESS 0x3f8 +#define KILL_ADDRESS 0x3f9 + +char g_serial_out[16]; +int g_kill_evt; + +void *vcpu_thread(void *arg) { + struct crosvm_vcpu *vcpu = arg; + struct crosvm_vcpu_event evt; + int i = 0; + while (crosvm_vcpu_wait(vcpu, &evt) == 0) { + if (evt.kind == CROSVM_VCPU_EVENT_KIND_INIT) { + struct kvm_sregs sregs; + crosvm_vcpu_get_sregs(vcpu, &sregs); + sregs.cs.base = 0; + sregs.cs.selector = 0; + sregs.es.base = KILL_ADDRESS; + sregs.es.selector = 0; + crosvm_vcpu_set_sregs(vcpu, &sregs); + + struct kvm_regs regs; + crosvm_vcpu_get_regs(vcpu, ®s); + regs.rip = 0x1000; + regs.rax = 2; + regs.rbx = 7; + regs.rflags = 2; + crosvm_vcpu_set_regs(vcpu, ®s); + } + if (evt.kind == CROSVM_VCPU_EVENT_KIND_IO_ACCESS) { + if (evt.io_access.address_space == CROSVM_ADDRESS_SPACE_IOPORT && + evt.io_access.address == SERIAL_ADDRESS && + evt.io_access.is_write && + evt.io_access.length == 1) { + g_serial_out[i] = evt.io_access.data[0]; + i++; + } + if (evt.io_access.address_space == CROSVM_ADDRESS_SPACE_IOPORT && + evt.io_access.address == KILL_ADDRESS && + evt.io_access.is_write && + evt.io_access.length == 1 && + evt.io_access.data[0] == 1) + { + uint64_t dummy = 1; + write(g_kill_evt, &dummy, sizeof(dummy)); + return NULL; + } + } + + crosvm_vcpu_resume(vcpu); + } + + return NULL; +} + +int main(int argc, char** argv) { + const uint8_t code[] = { + /* + 0000 BAF803 mov dx,0x3f8 + 0003 00D8 add al,bl + 0005 0430 add al,0x30 + 0007 EE out dx,al + 0008 B05C mov al,0x0a + 000A EE out dx,al + 000B BAF903 mov dx,0x3f9 + 000E B001 mov al,0x1 + 0010 EE out dx,al + 0011 F4 hlt + */ + 0xba, 0xf8, 0x03, + 0x00, 0xd8, + 0x04, '0', + 0xee, + 0xb0, '\n', + 0xee, + 0xba, 0xf9, 0x03, + 0xb0, 0x01, + 0xee, + 0xf4 + }; + + struct crosvm *crosvm; + int ret = crosvm_connect(&crosvm); + if (ret) { + fprintf(stderr, "failed to connect to crosvm: %d\n", ret); + return 1; + } + + /* + * Not strictly necessary, but demonstrates we can have as many connections + * as we please. + */ + struct crosvm *extra_crosvm; + ret = crosvm_new_connection(crosvm, &extra_crosvm); + if (ret) { + fprintf(stderr, "failed to make new socket: %d\n", ret); + return 1; + } + + /* We needs this eventfd to know when to exit before being killed. */ + g_kill_evt = crosvm_get_shutdown_eventfd(crosvm); + if (g_kill_evt < 0) { + fprintf(stderr, "failed to get kill eventfd: %d\n", g_kill_evt); + return 1; + } + + ret = crosvm_reserve_range(crosvm, CROSVM_ADDRESS_SPACE_IOPORT, SERIAL_ADDRESS, 1); + if (ret) { + fprintf(stderr, "failed to reserve ioport range: %d\n", ret); + return 1; + } + + ret = crosvm_reserve_range(crosvm, CROSVM_ADDRESS_SPACE_IOPORT, KILL_ADDRESS, 1); + if (ret) { + fprintf(stderr, "failed to reserve mmio range: %d\n", ret); + return 1; + } + + int mem_size = 0x2000; + int mem_fd = syscall(SYS_memfd_create, "guest_mem", MFD_CLOEXEC | MFD_ALLOW_SEALING); + if (mem_fd < 0) { + fprintf(stderr, "failed to create guest memfd: %d\n", errno); + return 1; + } + ret = ftruncate(mem_fd, mem_size); + if (ret) { + fprintf(stderr, "failed to set size of guest memory: %d\n", errno); + return 1; + } + uint8_t *mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, mem_fd, 0x1000); + if (mem == MAP_FAILED) { + fprintf(stderr, "failed to mmap guest memory: %d\n", errno); + return 1; + } + fcntl(mem_fd, F_ADD_SEALS, F_SEAL_SHRINK); + memcpy(mem, code, sizeof(code)); + + struct crosvm_memory *mem_obj; + ret = crosvm_create_memory(crosvm, mem_fd, 0x1000, 0x1000, 0x1000, false, &mem_obj); + if (ret) { + fprintf(stderr, "failed to create memory in crosvm: %d\n", ret); + return 1; + } + + /* get and creat a thread for each vcpu */ + struct crosvm_vcpu *vcpus[32]; + pthread_t vcpu_threads[32]; + uint32_t vcpu_count; + for (vcpu_count = 0; vcpu_count < 32; vcpu_count++) { + ret = crosvm_get_vcpu(crosvm, vcpu_count, &vcpus[vcpu_count]); + if (ret == -ENOENT) + break; + + if (ret) { + fprintf(stderr, "error while getting all vcpus: %d\n", ret); + return 1; + } + pthread_create(&vcpu_threads[vcpu_count], NULL, vcpu_thread, vcpus[vcpu_count]); + } + + ret = crosvm_start(extra_crosvm); + if (ret) { + fprintf(stderr, "failed to tell crosvm to start: %d\n", ret); + return 1; + } + + /* Wait for crosvm to request that we exit otherwise we will be killed. */ + uint64_t dummy; + read(g_kill_evt, &dummy, 8); + + ret = crosvm_destroy_memory(crosvm, &mem_obj); + if (ret) { + fprintf(stderr, "failed to destroy memory in crosvm: %d\n", ret); + return 1; + } + + ret = crosvm_reserve_range(crosvm, CROSVM_ADDRESS_SPACE_IOPORT, SERIAL_ADDRESS, 0); + if (ret) { + fprintf(stderr, "failed to unreserve ioport range: %d\n", ret); + return 1; + } + + ret = crosvm_reserve_range(crosvm, CROSVM_ADDRESS_SPACE_IOPORT, KILL_ADDRESS, 0); + if (ret) { + fprintf(stderr, "failed to unreserve mmio range: %d\n", ret); + return 1; + } + + return strcmp(g_serial_out, "9\n"); +} diff --git a/tests/plugin_dirty_log.c b/tests/plugin_dirty_log.c new file mode 100644 index 0000000..bc3db83 --- /dev/null +++ b/tests/plugin_dirty_log.c @@ -0,0 +1,180 @@ +/* + * Copyright 2017 The Chromium OS Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "crosvm.h" + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif + +#ifndef F_ADD_SEALS +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#endif + +#ifndef F_SEAL_SHRINK +#define F_SEAL_SHRINK 0x0002 +#endif + +#define LOAD_ADDRESS 0x1000 +#define SI_ADDRESS 0x8000 +#define BL_VALUE 0x12 +#define KILL_ADDRESS 0x9000 + +int g_kill_evt; + +void *vcpu_thread(void *arg) { + struct crosvm_vcpu *vcpu = arg; + struct crosvm_vcpu_event evt; + int i = 0; + while (crosvm_vcpu_wait(vcpu, &evt) == 0) { + if (evt.kind == CROSVM_VCPU_EVENT_KIND_INIT) { + struct kvm_sregs sregs; + crosvm_vcpu_get_sregs(vcpu, &sregs); + sregs.cs.base = 0; + sregs.cs.selector = 0; + sregs.es.base = KILL_ADDRESS; + sregs.es.selector = 0; + crosvm_vcpu_set_sregs(vcpu, &sregs); + + struct kvm_regs regs; + crosvm_vcpu_get_regs(vcpu, ®s); + regs.rflags = 2; + regs.rip = LOAD_ADDRESS; + regs.rbx = BL_VALUE; + regs.rsi = SI_ADDRESS; + crosvm_vcpu_set_regs(vcpu, ®s); + } + + if (evt.kind == CROSVM_VCPU_EVENT_KIND_IO_ACCESS && + evt.io_access.address_space == CROSVM_ADDRESS_SPACE_MMIO && + evt.io_access.address == KILL_ADDRESS && + evt.io_access.is_write && + evt.io_access.length == 1 && + evt.io_access.data[0] == 1) + { + uint64_t dummy = 1; + write(g_kill_evt, &dummy, sizeof(dummy)); + return NULL; + } + + crosvm_vcpu_resume(vcpu); + } + + return NULL; +} + +int main(int argc, char** argv) { + const uint8_t code[] = { + /* + 0000 881C mov [si],bl + 0014 26C606000001 mov byte [es:0x0],0x1 + 0002 F4 hlt + */ + 0x88, 0x1c, 0x26, 0xc6, 0x06, 0x00, 0x00, 0x01, 0xf4 + }; + + struct crosvm *crosvm; + int ret = crosvm_connect(&crosvm); + if (ret) { + fprintf(stderr, "failed to connect to crosvm: %d\n", ret); + return 1; + } + + g_kill_evt = crosvm_get_shutdown_eventfd(crosvm); + if (g_kill_evt < 0) { + fprintf(stderr, "failed to get kill eventfd: %d\n", g_kill_evt); + return 1; + } + + ret = crosvm_reserve_range(crosvm, CROSVM_ADDRESS_SPACE_MMIO, KILL_ADDRESS, 1); + if (ret) { + fprintf(stderr, "failed to reserve mmio range: %d\n", ret); + return 1; + } + + int mem_size = 0x9000; + int mem_fd = syscall(SYS_memfd_create, "guest_mem", MFD_CLOEXEC | MFD_ALLOW_SEALING); + if (mem_fd < 0) { + fprintf(stderr, "failed to create guest memfd: %d\n", errno); + return 1; + } + ret = ftruncate(mem_fd, mem_size); + if (ret) { + fprintf(stderr, "failed to set size of guest memory: %d\n", errno); + return 1; + } + uint8_t *mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, mem_fd, 0); + if (mem == MAP_FAILED) { + fprintf(stderr, "failed to mmap guest memory: %d\n", errno); + return 1; + } + fcntl(mem_fd, F_ADD_SEALS, F_SEAL_SHRINK); + memcpy(mem + LOAD_ADDRESS, code, sizeof(code)); + + struct crosvm_memory *mem_obj; + ret = crosvm_create_memory(crosvm, mem_fd, 0, mem_size, 0, false, &mem_obj); + if (ret) { + fprintf(stderr, "failed to create memory in crosvm: %d\n", ret); + return 1; + } + + struct crosvm_vcpu *vcpus[32]; + pthread_t vcpu_threads[32]; + uint32_t vcpu_count; + for (vcpu_count = 0; vcpu_count < 32; vcpu_count++) { + ret = crosvm_get_vcpu(crosvm, vcpu_count, &vcpus[vcpu_count]); + if (ret == -ENOENT) + break; + + if (ret) { + fprintf(stderr, "error while getting all vcpus: %d\n", ret); + return 1; + } + pthread_create(&vcpu_threads[vcpu_count], NULL, vcpu_thread, vcpus[vcpu_count]); + } + + ret = crosvm_start(crosvm); + if (ret) { + fprintf(stderr, "failed to tell crosvm to start: %d\n", ret); + return 1; + } + + uint64_t dummy; + read(g_kill_evt, &dummy, 8); + + uint8_t dirty_log[2] = {0}; + ret = crosvm_memory_get_dirty_log(crosvm, mem_obj, dirty_log); + if (ret) { + fprintf(stderr, "failed to get dirty log: %d\n", ret); + return 1; + } + + if (dirty_log[1] != 0x1) { + fprintf(stderr, "dirty log does not have expected bits: %x\n", dirty_log[1]); + return 1; + } + + uint64_t val = *(uint64_t*)(&mem[SI_ADDRESS]); + if (val != BL_VALUE) { + fprintf(stderr, "memory does not have expected value %d\n", val); + return 1; + } + + return 0; +} diff --git a/tests/plugin_ioevent.c b/tests/plugin_ioevent.c new file mode 100644 index 0000000..3662810 --- /dev/null +++ b/tests/plugin_ioevent.c @@ -0,0 +1,197 @@ +/* + * Copyright 2017 The Chromium OS Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "crosvm.h" + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif + +#ifndef F_ADD_SEALS +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#endif + +#ifndef F_SEAL_SHRINK +#define F_SEAL_SHRINK 0x0002 +#endif + +#define LOAD_ADDRESS 0x1000 +#define DATAMATCH_VAL 0x88 +#define KILL_ADDRESS 0x4000 + +int g_kill_evt; + +void *vcpu_thread(void *arg) { + struct crosvm_vcpu *vcpu = arg; + struct crosvm_vcpu_event evt; + int i = 0; + while (crosvm_vcpu_wait(vcpu, &evt) == 0) { + if (evt.kind == CROSVM_VCPU_EVENT_KIND_INIT) { + struct kvm_sregs sregs; + crosvm_vcpu_get_sregs(vcpu, &sregs); + sregs.cs.base = 0; + sregs.cs.selector = 0; + sregs.es.base = KILL_ADDRESS; + sregs.es.selector = 0; + crosvm_vcpu_set_sregs(vcpu, &sregs); + + struct kvm_regs regs; + crosvm_vcpu_get_regs(vcpu, ®s); + regs.rflags = 2; + regs.rip = LOAD_ADDRESS; + regs.rax = DATAMATCH_VAL; + regs.rbx = DATAMATCH_VAL - 1; + crosvm_vcpu_set_regs(vcpu, ®s); + } + + if (evt.kind == CROSVM_VCPU_EVENT_KIND_IO_ACCESS && + evt.io_access.address_space == CROSVM_ADDRESS_SPACE_MMIO && + evt.io_access.address == KILL_ADDRESS && + evt.io_access.is_write && + evt.io_access.length == 1 && + evt.io_access.data[0] == 1) + { + uint64_t dummy = 1; + write(g_kill_evt, &dummy, sizeof(dummy)); + return NULL; + } + + crosvm_vcpu_resume(vcpu); + } + + return NULL; +} + +int main(int argc, char** argv) { + const uint8_t code[] = { + /* + 0000 BAF803 mov dx,0x3f8 + 0003 88C3 mov bl,al + 0005 EE out dx,al + 0006 B000 mov al,0x0 + 0008 EE out dx,al + 0009 88D8 mov al,bl + 000B EE out dx,al + 0014 26C606000001 mov byte [es:0x0],0x1 + 000C F4 hlt + */ + 0xba, 0xf8, 0x03, + 0x88, 0xc3, + 0xee, + 0xb0, 0x00, + 0xee, + 0x88, 0xd8, + 0xee, + 0x26, 0xc6, 0x06, 0x00, 0x00, 0x01, + 0xf4, + }; + + struct crosvm *crosvm; + int ret = crosvm_connect(&crosvm); + if (ret) { + fprintf(stderr, "failed to connect to crosvm: %d\n", ret); + return 1; + } + + g_kill_evt = crosvm_get_shutdown_eventfd(crosvm); + if (g_kill_evt < 0) { + fprintf(stderr, "failed to get kill eventfd: %d\n", g_kill_evt); + return 1; + } + + ret = crosvm_reserve_range(crosvm, CROSVM_ADDRESS_SPACE_MMIO, KILL_ADDRESS, 1); + if (ret) { + fprintf(stderr, "failed to reserve mmio range: %d\n", ret); + return 1; + } + + uint8_t datamatch = DATAMATCH_VAL; + struct crosvm_io *io; + ret = crosvm_create_io_event(crosvm, CROSVM_ADDRESS_SPACE_IOPORT, 0x3f8, 1, &datamatch, &io); + if (ret) { + fprintf(stderr, "failed to create ioevent: %d\n", ret); + return 1; + } + + int ioeventfd = crosvm_io_event_fd(io); + + int mem_size = 0x4000; + int mem_fd = syscall(SYS_memfd_create, "guest_mem", MFD_CLOEXEC | MFD_ALLOW_SEALING); + if (mem_fd < 0) { + fprintf(stderr, "failed to create guest memfd: %d\n", errno); + return 1; + } + ret = ftruncate(mem_fd, mem_size); + if (ret) { + fprintf(stderr, "failed to set size of guest memory: %d\n", errno); + return 1; + } + uint8_t *mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, mem_fd, 0); + if (mem == MAP_FAILED) { + fprintf(stderr, "failed to mmap guest memory: %d\n", errno); + return 1; + } + fcntl(mem_fd, F_ADD_SEALS, F_SEAL_SHRINK); + memcpy(mem + LOAD_ADDRESS, code, sizeof(code)); + + struct crosvm_memory *mem_obj; + ret = crosvm_create_memory(crosvm, mem_fd, 0, mem_size, 0, false, &mem_obj); + if (ret) { + fprintf(stderr, "failed to create memory in crosvm: %d\n", ret); + return 1; + } + + /* get and creat a thread for each vcpu */ + struct crosvm_vcpu *vcpus[32]; + pthread_t vcpu_threads[32]; + uint32_t vcpu_count; + for (vcpu_count = 0; vcpu_count < 32; vcpu_count++) { + ret = crosvm_get_vcpu(crosvm, vcpu_count, &vcpus[vcpu_count]); + if (ret == -ENOENT) + break; + + if (ret) { + fprintf(stderr, "error while getting all vcpus: %d\n", ret); + return 1; + } + pthread_create(&vcpu_threads[vcpu_count], NULL, vcpu_thread, vcpus[vcpu_count]); + } + + ret = crosvm_start(crosvm); + if (ret) { + fprintf(stderr, "failed to tell crosvm to start: %d\n", ret); + return 1; + } + + uint64_t dummy; + read(g_kill_evt, &dummy, 8); + + ret = read(ioeventfd, &dummy, sizeof(dummy)); + if (ret == -1) { + fprintf(stderr, "failed to read ioeventfd: %d\n", errno); + return 1; + } + + if (dummy != 2) { + fprintf(stderr, "ioeventfd was not triggered the expected number of times: %d\n", dummy); + return 1; + } + + return 0; +} diff --git a/tests/plugin_irqfd.c b/tests/plugin_irqfd.c new file mode 100644 index 0000000..c6951df --- /dev/null +++ b/tests/plugin_irqfd.c @@ -0,0 +1,218 @@ +/* + * Copyright 2017 The Chromium OS Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "crosvm.h" + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif + +#ifndef F_ADD_SEALS +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#endif + +#ifndef F_SEAL_SHRINK +#define F_SEAL_SHRINK 0x0002 +#endif + +#define LOAD_ADDRESS 0x1000 +#define STACK_BASE (LOAD_ADDRESS + 0x1000) +#define STACK_SIZE 0x1000 +#define SUCCESS_ADDRESS 0x3000 +#define KILL_ADDRESS 0x4000 + +/* +org 0x1000 +bits 16 + +cli + +; Set entry 0x0 in the interrupt vector table +mov word [0x0], handle +mov word [0x2], 0x0 + +sti + +; Loop until interrupt is handled +loop: + cmp byte [si], 0x01 + jne loop + +cli + +; Signal that we are ready to end +end: + mov byte [es:0], 0x01 + hlt + +; Handle the interrupt by halting +handle: + mov byte [si], 0x01 + iret +*/ +const uint8_t g_code[] = { + 0xfa, 0xc7, 0x06, 0x00, 0x00, 0x1b, 0x10, 0xc7, 0x06, 0x02, 0x00, 0x00, + 0x00, 0xfb, 0x80, 0x3c, 0x01, 0x75, 0xfb, 0xfa, 0x26, 0xc6, 0x06, 0x00, + 0x00, 0x01, 0xf4, 0xc6, 0x04, 0x01, 0xcf +}; + +struct vcpu_context { + struct crosvm_vcpu *vcpu; + int irqeventfd; + int kill_evt; +}; + +void *vcpu_thread(void *arg) { + struct vcpu_context *ctx = arg; + struct crosvm_vcpu *vcpu = ctx->vcpu; + struct crosvm_vcpu_event evt; + uint64_t dummy = 1; + int i = 0; + int ret; + while (crosvm_vcpu_wait(vcpu, &evt) == 0) { + if (evt.kind == CROSVM_VCPU_EVENT_KIND_INIT) { + struct kvm_sregs sregs; + crosvm_vcpu_get_sregs(vcpu, &sregs); + sregs.cs.base = 0; + sregs.cs.selector = 0x0; + sregs.ss.base = 0; + sregs.ss.selector = 0x0; + sregs.es.base = KILL_ADDRESS; + sregs.es.selector = 0x0; + crosvm_vcpu_set_sregs(vcpu, &sregs); + + struct kvm_regs regs; + crosvm_vcpu_get_regs(vcpu, ®s); + regs.rflags = 2; + regs.rip = LOAD_ADDRESS; + regs.rsp = STACK_BASE + STACK_SIZE; + regs.rsi = SUCCESS_ADDRESS; + crosvm_vcpu_set_regs(vcpu, ®s); + + write(ctx->irqeventfd, &dummy, sizeof(dummy)); + } + + if (evt.kind == CROSVM_VCPU_EVENT_KIND_IO_ACCESS && + evt.io_access.address_space == CROSVM_ADDRESS_SPACE_MMIO && + evt.io_access.address == KILL_ADDRESS && + evt.io_access.is_write && + evt.io_access.length == 1 && + evt.io_access.data[0] == 1) + { + write(ctx->kill_evt, &dummy, sizeof(dummy)); + return NULL; + } + + crosvm_vcpu_resume(vcpu); + } + + return NULL; +} + +int main(int argc, char** argv) { + int i; + uint64_t dummy = 1; + struct crosvm *crosvm; + int ret = crosvm_connect(&crosvm); + if (ret) { + fprintf(stderr, "failed to connect to crosvm: %d\n", ret); + return 1; + } + + int kill_evt = crosvm_get_shutdown_eventfd(crosvm); + if (kill_evt < 0) { + fprintf(stderr, "failed to get kill eventfd: %d\n", kill_evt); + return 1; + } + + crosvm_reserve_range(crosvm, CROSVM_ADDRESS_SPACE_MMIO, KILL_ADDRESS, 1); + + struct crosvm_irq *irq; + ret = crosvm_create_irq_event(crosvm, 0, &irq); + if (ret) { + fprintf(stderr, "failed to create irq event: %d\n", ret); + return 1; + } + + int irqeventfd = crosvm_irq_event_get_fd(irq); + + int mem_size = 0x4000; + int mem_fd = syscall(SYS_memfd_create, "guest_mem", MFD_CLOEXEC | MFD_ALLOW_SEALING); + if (mem_fd < 0) { + fprintf(stderr, "failed to create guest memfd: %d\n", errno); + return 1; + } + ret = ftruncate(mem_fd, mem_size); + if (ret) { + fprintf(stderr, "failed to set size of guest memory: %d\n", errno); + return 1; + } + uint8_t *mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, mem_fd, 0); + if (mem == MAP_FAILED) { + fprintf(stderr, "failed to mmap guest memory: %d\n", errno); + return 1; + } + fcntl(mem_fd, F_ADD_SEALS, F_SEAL_SHRINK); + memcpy(mem + LOAD_ADDRESS, g_code, sizeof(g_code)); + + struct crosvm_memory *mem_obj; + ret = crosvm_create_memory(crosvm, mem_fd, 0, mem_size, 0, false, &mem_obj); + if (ret) { + fprintf(stderr, "failed to create memory in crosvm: %d\n", ret); + return 1; + } + + struct crosvm_vcpu *vcpus[32]; + struct vcpu_context ctxs[32]; + pthread_t vcpu_threads[32]; + uint32_t vcpu_count; + for (vcpu_count = 0; vcpu_count < 32; vcpu_count++) { + ret = crosvm_get_vcpu(crosvm, vcpu_count, &vcpus[vcpu_count]); + if (ret == -ENOENT) + break; + + if (ret) { + fprintf(stderr, "error while getting all vcpus: %d\n", ret); + return 1; + } + ctxs[vcpu_count].vcpu = vcpus[vcpu_count]; + ctxs[vcpu_count].irqeventfd = irqeventfd; + ctxs[vcpu_count].kill_evt = kill_evt; + pthread_create(&vcpu_threads[vcpu_count], NULL, vcpu_thread, &ctxs[vcpu_count]); + } + + ret = crosvm_start(crosvm); + if (ret) { + fprintf(stderr, "failed to tell crosvm to start: %d\n", ret); + return 1; + } + + ret = read(kill_evt, &dummy, sizeof(dummy)); + if (ret == -1) { + fprintf(stderr, "failed to read kill eventfd: %d\n", errno); + return 1; + } + + if (mem[SUCCESS_ADDRESS] != 0x01) { + fprintf(stderr, "interrupt was not handled: 0x%x\n", mem[SUCCESS_ADDRESS]); + return 1; + } + + return 0; +} diff --git a/tests/plugins.rs b/tests/plugins.rs new file mode 100644 index 0000000..349634d --- /dev/null +++ b/tests/plugins.rs @@ -0,0 +1,115 @@ +// Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#![cfg(feature = "plugin")] + +extern crate rand; + +use rand::{thread_rng, Rng}; + +use std::ffi::OsString; +use std::fs::remove_file; +use std::io::Write; +use std::env::{current_exe, var_os}; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; +use std::thread::sleep; +use std::time::Duration; + +struct RemovePath(PathBuf); +impl Drop for RemovePath { + fn drop(&mut self) { + if let Err(e) = remove_file(&self.0) { + eprintln!("failed to remove path: {:?}", e); + } + } +} + +fn get_crosvm_path() -> PathBuf { + let mut crosvm_path = current_exe() + .ok() + .map(|mut path| { + path.pop(); + if path.ends_with("deps") { + path.pop(); + } + path + }) + .expect("failed to get crosvm binary directory"); + crosvm_path.push("crosvm"); + crosvm_path +} + +fn build_plugin(src: &str) -> RemovePath { + let mut out_bin = PathBuf::from("target"); + let mut libcrosvm_plugin = get_crosvm_path(); + libcrosvm_plugin.set_file_name("libcrosvm_plugin.so"); + out_bin.push(thread_rng() + .gen_ascii_chars() + .take(10) + .collect::()); + let mut child = Command::new(var_os("CC").unwrap_or(OsString::from("cc"))) + .args(&["-Icrosvm_plugin", "-pthread", "-o"]) + .arg(&out_bin) + .arg(libcrosvm_plugin) + .args(&["-xc", "-"]) + .stdin(Stdio::piped()) + .spawn() + .expect("failed to spawn compiler"); + { + let stdin = child.stdin.as_mut().expect("failed to open stdin"); + stdin + .write_all(src.as_bytes()) + .expect("failed to write source to stdin"); + } + + let status = child.wait().expect("failed to wait for compiler"); + assert!(status.success(), "failed to build plugin"); + + RemovePath(PathBuf::from(out_bin)) +} + +fn run_plugin(bin_path: &Path) { + let mut child = Command::new(get_crosvm_path()) + .args(&["run", "-c", "1", "--plugin"]) + .arg(bin_path) + .spawn() + .expect("failed to spawn crosvm"); + for _ in 0..12 { + match child.try_wait().expect("failed to wait for crosvm") { + Some(status) => { + assert!(status.success()); + return; + } + None => sleep(Duration::from_millis(100)), + } + } + child.kill().expect("failed to kill crosvm"); + panic!("crosvm process has timed out"); +} + +fn test_plugin(src: &str) { + let bin_path = build_plugin(src); + run_plugin(&bin_path.0); +} + +#[test] +fn test_adder() { + test_plugin(include_str!("plugin_adder.c")); +} + +#[test] +fn test_dirty_log() { + test_plugin(include_str!("plugin_dirty_log.c")); +} + +#[test] +fn test_ioevent() { + test_plugin(include_str!("plugin_ioevent.c")); +} + +#[test] +fn test_irqfd() { + test_plugin(include_str!("plugin_irqfd.c")); +} -- cgit 1.4.1