From cadce3914276387389d10fd2d180c85c46d77f20 Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Wed, 7 Sep 2022 14:16:29 +0000 Subject: [PATCH 3/3] virtio-devices: add a GPU device This adds support for exposing a virtio-gpu device to guest by implementing a vhost-user frontend compatible with crosvm's GPU device backend. Note that this is not the same as the "vhost-user-gpu" protocol implemented by QEMU. Adding a GPU device from the command line looks like this: --gpu socket=/path/to/crosvm-gpu-vhost-user.sock As a workaround, We fall back to trying to map shared memory as read-only if it can't be mapped read-write, because wlroots' keymaps are read-only, and crosvm does not properly handle this, causing cloud-hypervisor to crash. Signed-off-by: Alyssa Ross Co-authored-by: Puck Meerburg Signed-off-by: Puck Meerburg --- Cargo.lock | 1 + src/main.rs | 9 + virtio-devices/src/device.rs | 8 +- virtio-devices/src/lib.rs | 2 + virtio-devices/src/seccomp_filters.rs | 10 + virtio-devices/src/transport/pci_device.rs | 4 +- virtio-devices/src/vhost_user/gpu.rs | 372 ++++++++++++++++++ virtio-devices/src/vhost_user/mod.rs | 2 + .../src/vhost_user/vu_common_ctrl.rs | 14 +- vmm/Cargo.toml | 1 + vmm/src/api/mod.rs | 7 + vmm/src/config.rs | 123 ++++++ vmm/src/device_manager.rs | 140 ++++++- vmm/src/lib.rs | 83 ++++ vmm/src/vm.rs | 33 +- 15 files changed, 793 insertions(+), 16 deletions(-) create mode 100644 virtio-devices/src/vhost_user/gpu.rs diff --git a/Cargo.lock b/Cargo.lock index d119f28e..c8a20cef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1463,6 +1463,7 @@ dependencies = [ "vfio-ioctls", "vfio_user", "vhdx", + "virtio-bindings", "virtio-devices", "virtio-queue", "vm-allocator", diff --git a/src/main.rs b/src/main.rs index 8725129a..eac7a126 100644 --- a/src/main.rs +++ b/src/main.rs @@ -263,6 +263,14 @@ fn create_app<'a>( .min_values(1) .group("vm-config"), ) + .arg( + Arg::new("gpu") + .long("gpu") + .help(config::GpuConfig::SYNTAX) + .takes_value(true) + .min_values(1) + .group("vm-config"), + ) .arg( Arg::new("pmem") .long("pmem") @@ -724,6 +732,7 @@ mod unit_tests { }, balloon: None, fs: None, + gpu: None, pmem: None, serial: ConsoleConfig { file: None, diff --git a/virtio-devices/src/device.rs b/virtio-devices/src/device.rs index c09adbb2..8bdbb7b9 100644 --- a/virtio-devices/src/device.rs +++ b/virtio-devices/src/device.rs @@ -11,7 +11,7 @@ use crate::{ VIRTIO_F_RING_INDIRECT_DESC, }; use libc::EFD_NONBLOCK; -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; use std::io::Write; use std::num::Wrapping; use std::sync::{ @@ -47,19 +47,19 @@ pub struct UserspaceMapping { pub mergeable: bool, } -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct VirtioSharedMemory { pub offset: u64, pub len: u64, } -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct VirtioSharedMemoryList { pub host_addr: u64, pub mem_slot: u32, pub addr: GuestAddress, pub len: GuestUsize, - pub region_list: Vec, + pub region_list: BTreeMap, } /// Trait for virtio devices to be driven by a virtio transport. diff --git a/virtio-devices/src/lib.rs b/virtio-devices/src/lib.rs index 8316d1ce..d1e31f27 100644 --- a/virtio-devices/src/lib.rs +++ b/virtio-devices/src/lib.rs @@ -88,6 +88,8 @@ pub enum ActivateError { ThreadSpawn(std::io::Error), #[error("Failed to setup vhost-user-fs daemon: {0}")] VhostUserFsSetup(vhost_user::Error), + #[error("Failed to setup vhost-user-gpu daemon: {0}")] + VhostUserGpuSetup(vhost_user::Error), #[error("Failed to setup vhost-user-blk daemon: {0}")] VhostUserBlkSetup(vhost_user::Error), #[error("Failed to create seccomp filter: {0}")] diff --git a/virtio-devices/src/seccomp_filters.rs b/virtio-devices/src/seccomp_filters.rs index 6044231b..a1ec89d8 100644 --- a/virtio-devices/src/seccomp_filters.rs +++ b/virtio-devices/src/seccomp_filters.rs @@ -22,6 +22,7 @@ pub enum Thread { VirtioRng, VirtioVhostBlock, VirtioVhostFs, + VirtioVhostGpu, VirtioVhostNet, VirtioVhostNetCtl, VirtioVsock, @@ -168,6 +169,14 @@ fn virtio_vhost_fs_thread_rules() -> Vec<(i64, Vec)> { ] } +fn virtio_vhost_gpu_thread_rules() -> Vec<(i64, Vec)> { + vec![ + (libc::SYS_getcwd, vec![]), + (libc::SYS_recvmsg, vec![]), + (libc::SYS_sendmsg, vec![]), + ] +} + fn virtio_vhost_net_ctl_thread_rules() -> Vec<(i64, Vec)> { vec![] } @@ -230,6 +239,7 @@ fn get_seccomp_rules(thread_type: Thread) -> Vec<(i64, Vec)> { Thread::VirtioRng => virtio_rng_thread_rules(), Thread::VirtioVhostBlock => virtio_vhost_block_thread_rules(), Thread::VirtioVhostFs => virtio_vhost_fs_thread_rules(), + Thread::VirtioVhostGpu => virtio_vhost_gpu_thread_rules(), Thread::VirtioVhostNet => virtio_vhost_net_thread_rules(), Thread::VirtioVhostNetCtl => virtio_vhost_net_ctl_thread_rules(), Thread::VirtioVsock => virtio_vsock_thread_rules(), diff --git a/virtio-devices/src/transport/pci_device.rs b/virtio-devices/src/transport/pci_device.rs index 748d4328..6b0d0daa 100644 --- a/virtio-devices/src/transport/pci_device.rs +++ b/virtio-devices/src/transport/pci_device.rs @@ -949,11 +949,11 @@ impl PciDevice for VirtioPciDevice { bars.push(bar); - for (idx, shm) in shm_list.region_list.iter().enumerate() { + for (shmid, shm) in shm_list.region_list.iter() { let shm_cap = VirtioPciCap64::new( PciCapabilityType::SharedMemoryConfig, VIRTIO_SHM_BAR_INDEX as u8, - idx as u8, + *shmid, shm.offset, shm.len, ); diff --git a/virtio-devices/src/vhost_user/gpu.rs b/virtio-devices/src/vhost_user/gpu.rs new file mode 100644 index 00000000..2eb18445 --- /dev/null +++ b/virtio-devices/src/vhost_user/gpu.rs @@ -0,0 +1,372 @@ +// Copyright 2019 Intel Corporation. All Rights Reserved. +// Copyright 2022 Unikie +// Copyright 2022 Puck Meerburg +// SPDX-License-Identifier: Apache-2.0 + +use crate::seccomp_filters::Thread; +use crate::thread_helper::spawn_virtio_thread; +use crate::vhost_user::vu_common_ctrl::VhostUserHandle; +use crate::vhost_user::{Error, Result, VhostUserCommon}; +use crate::{ + ActivateError, ActivateResult, GuestMemoryMmap, GuestRegionMmap, MmapRegion, UserspaceMapping, + VirtioCommon, VirtioDevice, VirtioDeviceType, VirtioInterrupt, VirtioSharedMemoryList, + VIRTIO_F_VERSION_1, +}; +use seccompiler::SeccompAction; +use std::io::{self, Write}; +use std::os::unix::prelude::AsRawFd; +use std::sync::{Arc, Barrier, Mutex}; +use std::thread; +use vhost::vhost_user::message::{ + VhostUserConfigFlags, VhostUserProtocolFeatures, VhostUserShmemMapMsg, VhostUserShmemUnmapMsg, + VhostUserVirtioFeatures, +}; +use vhost::vhost_user::{ + HandlerResult, MasterReqHandler, VhostUserMaster, VhostUserMasterReqHandler, +}; +use virtio_bindings::virtio_gpu::{ + VIRTIO_GPU_F_CONTEXT_INIT, VIRTIO_GPU_F_RESOURCE_BLOB, VIRTIO_GPU_F_RESOURCE_UUID, + VIRTIO_GPU_F_VIRGL, +}; +use virtio_queue::Queue; +use vm_memory::GuestMemoryAtomic; +use vm_migration::Pausable; +use vmm_sys_util::eventfd::EventFd; + +const QUEUE_SIZES: &[u16] = &[256, 16]; +const NUM_QUEUES: u16 = QUEUE_SIZES.len() as _; + +struct SlaveReqHandler { + cache_size: u64, + mmap_cache_addr: u64, +} + +impl SlaveReqHandler { + // Make sure request is within cache range + fn is_req_valid(&self, offset: u64, len: u64) -> bool { + let end = match offset.checked_add(len) { + Some(n) => n, + None => return false, + }; + + !(offset >= self.cache_size || end > self.cache_size) + } +} + +impl VhostUserMasterReqHandler for SlaveReqHandler { + fn shmem_map(&self, req: &VhostUserShmemMapMsg, fd: &dyn AsRawFd) -> HandlerResult { + if !self.is_req_valid(req.shm_offset, req.len) { + return Err(io::Error::from_raw_os_error(libc::EINVAL)); + } + + let addr = self.mmap_cache_addr + req.shm_offset; + let mut ret = unsafe { + libc::mmap( + addr as *mut libc::c_void, + req.len as usize, + req.flags.bits() as i32, + libc::MAP_SHARED | libc::MAP_FIXED, + fd.as_raw_fd(), + req.fd_offset as libc::off_t, + ) + }; + + if ret == libc::MAP_FAILED { + ret = unsafe { + libc::mmap( + addr as *mut libc::c_void, + req.len as usize, + (req.flags.bits() as i32) & !libc::PROT_WRITE, + libc::MAP_SHARED | libc::MAP_FIXED, + fd.as_raw_fd(), + req.fd_offset as libc::off_t, + ) + }; + } + + if ret == libc::MAP_FAILED { + return Err(io::Error::last_os_error()); + } + + Ok(0) + } + + fn shmem_unmap(&self, req: &VhostUserShmemUnmapMsg) -> HandlerResult { + if !self.is_req_valid(req.shm_offset, req.len) { + return Err(io::Error::from_raw_os_error(libc::EINVAL)); + } + + let addr = self.mmap_cache_addr + req.shm_offset; + let ret = unsafe { + libc::mmap( + addr as *mut libc::c_void, + req.len as usize, + libc::PROT_NONE, + libc::MAP_ANONYMOUS | libc::MAP_PRIVATE | libc::MAP_FIXED, + -1, + 0, + ) + }; + if ret == libc::MAP_FAILED { + return Err(io::Error::last_os_error()); + } + + Ok(0) + } +} + +#[derive(Default)] +#[repr(C, packed)] +pub struct VirtioGpuConfig { + pub events_read: u32, + pub events_clear: u32, + pub num_scanouts: u32, + pub num_capsets: u32, +} + +pub struct Gpu { + common: VirtioCommon, + vu_common: VhostUserCommon, + id: String, + // Hold ownership of the memory that is allocated for the device + // which will be automatically dropped when the device is dropped + cache: Option<(VirtioSharedMemoryList, MmapRegion)>, + slave_req_support: bool, + epoll_thread: Option>, + seccomp_action: SeccompAction, + exit_evt: EventFd, +} + +impl Gpu { + /// Create a new virtio-gpu device. + pub fn new( + id: String, + path: &str, + cache: Option<(VirtioSharedMemoryList, MmapRegion)>, + seccomp_action: SeccompAction, + restoring: bool, + exit_evt: EventFd, + iommu: bool, + ) -> Result { + assert!(!restoring); + + // Connect to the vhost-user socket. + let mut vu = VhostUserHandle::connect_vhost_user(false, path, NUM_QUEUES as u64, false)?; + + let avail_features = 1 << VIRTIO_F_VERSION_1 + | 1 << VIRTIO_GPU_F_VIRGL + | 1 << VIRTIO_GPU_F_RESOURCE_UUID + | 1 << VIRTIO_GPU_F_RESOURCE_BLOB + | 1 << VIRTIO_GPU_F_CONTEXT_INIT + | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(); + + let avail_protocol_features = + VhostUserProtocolFeatures::CONFIG | VhostUserProtocolFeatures::SLAVE_REQ; + + let (acked_features, acked_protocol_features) = + vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?; + + Ok(Gpu { + common: VirtioCommon { + device_type: VirtioDeviceType::Gpu as u32, + avail_features: acked_features, + // If part of the available features that have been acked, the + // PROTOCOL_FEATURES bit must be already set through the VIRTIO + // acked features as we know the guest would never ack it, this + // the feature would be lost. + acked_features: acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(), + paused_sync: Some(Arc::new(Barrier::new(NUM_QUEUES as usize))), + queue_sizes: QUEUE_SIZES.to_vec(), + min_queues: NUM_QUEUES, + ..Default::default() + }, + vu_common: VhostUserCommon { + vu: Some(Arc::new(Mutex::new(vu))), + acked_protocol_features, + socket_path: path.to_string(), + vu_num_queues: NUM_QUEUES as usize, + ..Default::default() + }, + id, + slave_req_support: acked_protocol_features + & VhostUserProtocolFeatures::SLAVE_REQ.bits() + != 0, + cache, + seccomp_action, + epoll_thread: None, + exit_evt, + }) + } +} + +impl Drop for Gpu { + fn drop(&mut self) { + if let Some(kill_evt) = self.common.kill_evt.take() { + // Ignore the result because there is nothing we can do about it. + let _ = kill_evt.write(1); + } + } +} + +impl VirtioDevice for Gpu { + fn device_type(&self) -> u32 { + self.common.device_type + } + + fn queue_max_sizes(&self) -> &[u16] { + &self.common.queue_sizes + } + + fn features(&self) -> u64 { + self.common.avail_features + } + + fn ack_features(&mut self, value: u64) { + self.common.ack_features(value) + } + + fn read_config(&self, offset: u64, mut data: &mut [u8]) { + if let Some(vu) = &self.vu_common.vu { + if let Err(e) = vu + .lock() + .unwrap() + .socket_handle() + .get_config( + offset as u32, + data.len() as u32, + VhostUserConfigFlags::WRITABLE, + &data, + ) + .map_err(|e| format!("{:?}", e)) + .and_then(|(_, config)| data.write_all(&config).map_err(|e| format!("{:?}", e))) + { + error!("Failed getting vhost-user-gpu configuration: {:?}", e); + } else { + eprintln!("read_config({}, {:?})", offset, data); + } + } + } + + fn activate( + &mut self, + mem: GuestMemoryAtomic, + interrupt_cb: Arc, + queues: Vec<(usize, Queue, EventFd)>, + ) -> ActivateResult { + self.common.activate(&queues, &interrupt_cb)?; + + // Initialize slave communication. + let slave_req_handler = if self.slave_req_support { + if let Some(cache) = self.cache.as_ref() { + let vu_master_req_handler = Arc::new(SlaveReqHandler { + cache_size: cache.0.len, + mmap_cache_addr: cache.0.host_addr, + }); + + let mut req_handler = + MasterReqHandler::new(vu_master_req_handler).map_err(|e| { + ActivateError::VhostUserGpuSetup(Error::MasterReqHandlerCreation(e)) + })?; + + if self.vu_common.acked_protocol_features + & VhostUserProtocolFeatures::REPLY_ACK.bits() + != 0 + { + req_handler.set_reply_ack_flag(true); + } + + Some(req_handler) + } else { + None + } + } else { + None + }; + + // Run a dedicated thread for handling potential reconnections with + // the backend. + let (kill_evt, pause_evt) = self.common.dup_eventfds(); + + let mut handler = self.vu_common.activate( + mem, + queues, + interrupt_cb, + self.common.acked_features, + slave_req_handler, + kill_evt, + pause_evt, + )?; + + let paused = self.common.paused.clone(); + let paused_sync = self.common.paused_sync.clone(); + + let mut epoll_threads = Vec::new(); + spawn_virtio_thread( + &self.id, + &self.seccomp_action, + Thread::VirtioVhostGpu, + &mut epoll_threads, + &self.exit_evt, + move || handler.run(paused, paused_sync.unwrap()), + )?; + self.epoll_thread = Some(epoll_threads.remove(0)); + + event!("virtio-device", "activated", "id", &self.id); + Ok(()) + } + + fn reset(&mut self) -> Option> { + // We first must resume the virtio thread if it was paused. + if self.common.pause_evt.take().is_some() { + self.common.resume().ok()?; + } + + if let Some(vu) = &self.vu_common.vu { + if let Err(e) = vu.lock().unwrap().reset_vhost_user() { + error!("Failed to reset vhost-user daemon: {:?}", e); + return None; + } + } + + if let Some(kill_evt) = self.common.kill_evt.take() { + // Ignore the result because there is nothing we can do about it. + let _ = kill_evt.write(1); + } + + event!("virtio-device", "reset", "id", &self.id); + + // Return the interrupt + Some(self.common.interrupt_cb.take().unwrap()) + } + + fn shutdown(&mut self) { + self.vu_common.shutdown() + } + + fn get_shm_regions(&self) -> Option { + // It would be possible to the size of the region from the + // backend over vhost-user, but since we need to know the size + // up front in cloud-hypervisor to construct Self, it wouldn't + // help. The user is thereforce responsible for configuring + // the correct region size in VM configuration. + self.cache.as_ref().map(|cache| cache.0.clone()) + } + + fn set_shm_regions( + &mut self, + shm_regions: VirtioSharedMemoryList, + ) -> std::result::Result<(), crate::Error> { + todo!("set_shm_regions({:?})", shm_regions) + } + + fn add_memory_region( + &mut self, + region: &Arc, + ) -> std::result::Result<(), crate::Error> { + todo!("add_memory_region({:?})", region) + } + + fn userspace_mappings(&self) -> Vec { + todo!() + } +} diff --git a/virtio-devices/src/vhost_user/mod.rs b/virtio-devices/src/vhost_user/mod.rs index 81c48edf..669a962b 100644 --- a/virtio-devices/src/vhost_user/mod.rs +++ b/virtio-devices/src/vhost_user/mod.rs @@ -31,11 +31,13 @@ use vu_common_ctrl::VhostUserHandle; pub mod blk; pub mod fs; +pub mod gpu; pub mod net; pub mod vu_common_ctrl; pub use self::blk::Blk; pub use self::fs::*; +pub use self::gpu::*; pub use self::net::Net; pub use self::vu_common_ctrl::VhostUserConfig; diff --git a/virtio-devices/src/vhost_user/vu_common_ctrl.rs b/virtio-devices/src/vhost_user/vu_common_ctrl.rs index 0ab0bdda..fd6716a2 100644 --- a/virtio-devices/src/vhost_user/vu_common_ctrl.rs +++ b/virtio-devices/src/vhost_user/vu_common_ctrl.rs @@ -199,6 +199,14 @@ impl VhostUserHandle { .map_err(Error::VhostUserSetInflight)?; } + // FIXME: crosvm's vhost-user backend requires this to come first. + // Can we fix that in crosvm? + if let Some(slave_req_handler) = slave_req_handler { + self.vu + .set_slave_request_fd(&slave_req_handler.get_tx_raw_fd()) + .map_err(Error::VhostUserSetSlaveRequestFd)?; + } + let mut vrings_info = Vec::new(); for (queue_index, queue, queue_evt) in queues.iter() { let actual_size: usize = queue.size().try_into().unwrap(); @@ -267,12 +275,6 @@ impl VhostUserHandle { self.enable_vhost_user_vrings(self.queue_indexes.clone(), true)?; - if let Some(slave_req_handler) = slave_req_handler { - self.vu - .set_slave_request_fd(&slave_req_handler.get_tx_raw_fd()) - .map_err(Error::VhostUserSetSlaveRequestFd)?; - } - self.vrings_info = Some(vrings_info); self.ready = true; diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml index c95384b5..87a36722 100644 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -49,6 +49,7 @@ versionize_derive = "0.1.4" vfio-ioctls = { git = "https://github.com/rust-vmm/vfio", branch = "main", default-features = false } vfio_user = { path = "../vfio_user" } vhdx = { path = "../vhdx" } +virtio-bindings = "0.1.0" virtio-devices = { path = "../virtio-devices" } virtio-queue = "0.5.0" vm-allocator = { path = "../vm-allocator" } diff --git a/vmm/src/api/mod.rs b/vmm/src/api/mod.rs index 0513ef37..9a9942fc 100644 --- a/vmm/src/api/mod.rs +++ b/vmm/src/api/mod.rs @@ -34,6 +34,7 @@ pub use self::http::start_http_path_thread; pub mod http; pub mod http_endpoint; +use crate::config::GpuConfig; use crate::config::{ DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, RestoreConfig, UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig, @@ -132,6 +133,9 @@ pub enum ApiError { /// The fs could not be added to the VM. VmAddFs(VmError), + /// The gpu could not be added to the VM. + VmAddGpu(VmError), + /// The pmem device could not be added to the VM. VmAddPmem(VmError), @@ -301,6 +305,9 @@ pub enum ApiRequest { /// Add a fs to the VM. VmAddFs(Arc, Sender), + /// Add a gpu to the VM. + VmAddGpu(Arc, Sender), + /// Add a pmem device to the VM. VmAddPmem(Arc, Sender), diff --git a/vmm/src/config.rs b/vmm/src/config.rs index 9d72f8fe..193f948d 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -43,6 +43,8 @@ pub enum Error { ParseFsTagMissing, /// Filesystem socket is missing ParseFsSockMissing, + /// GPU socket is missing + ParseGpuSockMissing, /// Missing persistent memory file parameter. ParsePmemFileMissing, /// Missing vsock socket path parameter. @@ -71,6 +73,8 @@ pub enum Error { ParseBalloon(OptionParserError), /// Error parsing filesystem parameters ParseFileSystem(OptionParserError), + /// Error parsing GPU parameters + ParseGpu(OptionParserError), /// Error parsing persistent memory parameters ParsePersistentMemory(OptionParserError), /// Failed parsing console @@ -292,6 +296,8 @@ impl fmt::Display for Error { ParseFileSystem(o) => write!(f, "Error parsing --fs: {}", o), ParseFsSockMissing => write!(f, "Error parsing --fs: socket missing"), ParseFsTagMissing => write!(f, "Error parsing --fs: tag missing"), + ParseGpu(o) => write!(f, "Error parsing --gpu: {}", o), + ParseGpuSockMissing => write!(f, "Error parsing --gpu: socket missing"), ParsePersistentMemory(o) => write!(f, "Error parsing --pmem: {}", o), ParsePmemFileMissing => write!(f, "Error parsing --pmem: file missing"), ParseVsock(o) => write!(f, "Error parsing --vsock: {}", o), @@ -352,6 +358,7 @@ pub struct VmParams<'a> { pub rng: &'a str, pub balloon: Option<&'a str>, pub fs: Option>, + pub gpu: Option>, pub pmem: Option>, pub serial: &'a str, pub console: &'a str, @@ -387,6 +394,7 @@ impl<'a> VmParams<'a> { let console = args.value_of("console").unwrap(); let balloon = args.value_of("balloon"); let fs: Option> = args.values_of("fs").map(|x| x.collect()); + let gpu: Option> = args.values_of("gpu").map(|x| x.collect()); let pmem: Option> = args.values_of("pmem").map(|x| x.collect()); let devices: Option> = args.values_of("device").map(|x| x.collect()); let user_devices: Option> = args.values_of("user-device").map(|x| x.collect()); @@ -414,6 +422,7 @@ impl<'a> VmParams<'a> { rng, balloon, fs, + gpu, pmem, serial, console, @@ -1668,6 +1677,73 @@ impl FsConfig { } } +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct GpuConfig { + pub socket: PathBuf, + #[serde(default = "default_gpuconfig_cache_size")] + pub cache_size: u64, + pub id: Option, + pub pci_segment: u16, +} + +fn default_gpuconfig_cache_size() -> u64 { + 1 << 33 +} + +impl GpuConfig { + pub const SYNTAX: &'static str = + "GPU parameters \"socket=,cache_size=,\ + id=,pci_segment=\""; + + pub fn parse(gpu: &str) -> Result { + let mut parser = OptionParser::new(); + parser + .add("socket") + .add("cache_size") + .add("id") + .add("pci_segment"); + parser.parse(gpu).map_err(Error::ParseGpu)?; + + let socket = PathBuf::from(parser.get("socket").ok_or(Error::ParseGpuSockMissing)?); + let cache_size = parser + .convert::("cache_size") + .map_err(Error::ParseGpu)? + .unwrap_or_else(|| ByteSized(default_gpuconfig_cache_size())) + .0; + let id = parser.get("id"); + + let pci_segment = parser + .convert("pci_segment") + .map_err(Error::ParseGpu)? + .unwrap_or_default(); + + Ok(GpuConfig { + socket, + cache_size, + id, + pci_segment, + }) + } + + pub fn validate(&self, vm_config: &VmConfig) -> ValidationResult<()> { + if let Some(platform_config) = vm_config.platform.as_ref() { + if self.pci_segment >= platform_config.num_pci_segments { + return Err(ValidationError::InvalidPciSegment(self.pci_segment)); + } + + if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { + if iommu_segments.contains(&self.pci_segment) { + return Err(ValidationError::IommuNotSupportedOnSegment( + self.pci_segment, + )); + } + } + } + + Ok(()) + } +} + #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] pub struct PmemConfig { pub file: PathBuf, @@ -2280,6 +2356,7 @@ pub struct VmConfig { pub rng: RngConfig, pub balloon: Option, pub fs: Option>, + pub gpu: Option>, pub pmem: Option>, #[serde(default = "ConsoleConfig::default_serial")] pub serial: ConsoleConfig, @@ -2415,6 +2492,17 @@ impl VmConfig { } } + if let Some(gpus) = &self.gpu { + if !gpus.is_empty() && !self.memory.shared { + return Err(ValidationError::VhostUserRequiresSharedMemory); + } + for gpu in gpus { + gpu.validate(self)?; + + Self::validate_identifier(&mut id_list, &gpu.id)?; + } + } + if let Some(pmems) = &self.pmem { for pmem in pmems { pmem.validate(self)?; @@ -2600,6 +2688,15 @@ impl VmConfig { fs = Some(fs_config_list); } + let mut gpu: Option> = None; + if let Some(gpu_list) = &vm_params.gpu { + let mut gpu_config_list = Vec::new(); + for item in gpu_list.iter() { + gpu_config_list.push(GpuConfig::parse(item)?); + } + gpu = Some(gpu_config_list); + } + let mut pmem: Option> = None; if let Some(pmem_list) = &vm_params.pmem { let mut pmem_config_list = Vec::new(); @@ -2704,6 +2801,7 @@ impl VmConfig { rng, balloon, fs, + gpu, pmem, serial, console, @@ -3078,6 +3176,21 @@ mod tests { Ok(()) } + #[test] + fn test_parse_gpu() -> Result<()> { + // "socket" must be supplied + assert!(GpuConfig::parse("").is_err()); + assert_eq!( + GpuConfig::parse("socket=/tmp/sock")?, + GpuConfig { + socket: PathBuf::from("/tmp/sock"), + ..Default::default() + } + ); + + Ok(()) + } + #[test] fn test_pmem_parsing() -> Result<()> { // Must always give a file and size @@ -3303,6 +3416,7 @@ mod tests { }, balloon: None, fs: None, + gpu: None, pmem: None, serial: ConsoleConfig { file: None, @@ -3457,6 +3571,15 @@ mod tests { Err(ValidationError::VhostUserRequiresSharedMemory) ); + let mut invalid_config = valid_config.clone(); + invalid_config.gpu = Some(vec![GpuConfig { + ..Default::default() + }]); + assert_eq!( + invalid_config.validate(), + Err(ValidationError::VhostUserRequiresSharedMemory) + ); + let mut still_valid_config = valid_config.clone(); still_valid_config.memory.shared = true; assert!(still_valid_config.validate().is_ok()); diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 19ce15be..81a28446 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -10,8 +10,8 @@ // use crate::config::{ - ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, - VdpaConfig, VhostMode, VmConfig, VsockConfig, + ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, GpuConfig, NetConfig, PmemConfig, + UserDeviceConfig, VdpaConfig, VhostMode, VmConfig, VsockConfig, }; use crate::device_tree::{DeviceNode, DeviceTree}; use crate::interrupt::LegacyUserspaceInterruptManager; @@ -68,6 +68,7 @@ use std::collections::{BTreeSet, HashMap}; use std::convert::TryInto; use std::fs::{read_link, File, OpenOptions}; use std::io::{self, stdout, Seek, SeekFrom}; +use std::iter::once; use std::mem::zeroed; use std::num::Wrapping; use std::os::unix::fs::OpenOptionsExt; @@ -77,11 +78,13 @@ use std::result; use std::sync::{Arc, Mutex}; use std::time::Instant; use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; +use virtio_bindings::virtio_gpu::virtio_gpu_shm_id_VIRTIO_GPU_SHM_ID_HOST_VISIBLE as VIRTIO_GPU_SHM_ID_HOST_VISIBLE; use virtio_devices::transport::VirtioTransport; use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator}; use virtio_devices::vhost_user::VhostUserConfig; use virtio_devices::{ AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource, + VirtioSharedMemory, VirtioSharedMemoryList, }; use virtio_devices::{Endpoint, IommuMapping}; use vm_allocator::{AddressAllocator, SystemAllocator}; @@ -124,6 +127,7 @@ const CONSOLE_DEVICE_NAME: &str = "__console"; // identifiers if the user doesn't give one const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; const FS_DEVICE_NAME_PREFIX: &str = "_fs"; +const GPU_DEVICE_NAME_PREFIX: &str = "_gpu"; const NET_DEVICE_NAME_PREFIX: &str = "_net"; const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; @@ -160,9 +164,15 @@ pub enum DeviceManagerError { /// Cannot create virtio-fs device CreateVirtioFs(virtio_devices::vhost_user::Error), + /// Cannot create virtio-gpu device + CreateVirtioGpu(virtio_devices::vhost_user::Error), + /// Virtio-fs device was created without a socket. NoVirtioFsSock, + /// Virtio-gpu device was created without a socket. + NoVirtioGpuSock, + /// Cannot create vhost-user-blk device CreateVhostUserBlk(virtio_devices::vhost_user::Error), @@ -244,6 +254,9 @@ pub enum DeviceManagerError { /// Cannot find a memory range for virtio-fs FsRangeAllocation, + /// Cannot find a memory range for virtio-gpu + GpuRangeAllocation, + /// Error creating serial output file SerialOutputFileOpen(io::Error), @@ -2064,6 +2077,9 @@ impl DeviceManager { // Add virtio-fs if required devices.append(&mut self.make_virtio_fs_devices()?); + // Add virtio-gpu if required + devices.append(&mut self.make_virtio_gpu_devices()?); + // Add virtio-pmem if required devices.append(&mut self.make_virtio_pmem_devices()?); @@ -2514,6 +2530,119 @@ impl DeviceManager { Ok(devices) } + fn make_virtio_gpu_device( + &mut self, + gpu_cfg: &mut GpuConfig, + ) -> DeviceManagerResult { + let id = if let Some(id) = &gpu_cfg.id { + id.clone() + } else { + let id = self.next_device_name(GPU_DEVICE_NAME_PREFIX)?; + gpu_cfg.id = Some(id.clone()); + id + }; + + info!("Creating virtio-gpu device: {:?}", gpu_cfg); + + let mut node = device_node!(id); + + if let Some(gpu_socket) = gpu_cfg.socket.to_str() { + let cache_size = gpu_cfg.cache_size; + // In crosvm, the 8 GiB bar is 8 GiB-aligned. + let cache_base = self.pci_segments[gpu_cfg.pci_segment as usize] + .allocator + .lock() + .unwrap() + .allocate(None, cache_size as GuestUsize, Some(cache_size)) + .ok_or(DeviceManagerError::GpuRangeAllocation)? + .raw_value(); + + // Update the node with correct resource information. + node.resources.push(Resource::MmioAddressRange { + base: cache_base, + size: cache_size, + }); + + let mmap_region = MmapRegion::build( + None, + cache_size as usize, + libc::PROT_NONE, + libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, + ) + .map_err(DeviceManagerError::NewMmapRegion)?; + let host_addr: u64 = mmap_region.as_ptr() as u64; + + let mem_slot = self + .memory_manager + .lock() + .unwrap() + .create_userspace_mapping(cache_base, cache_size, host_addr, false, false, false) + .map_err(DeviceManagerError::MemoryManager)?; + + let region_list = once(( + VIRTIO_GPU_SHM_ID_HOST_VISIBLE as u8, + VirtioSharedMemory { + offset: 0, + len: cache_size, + }, + )) + .collect(); + + let cache = Some(( + VirtioSharedMemoryList { + host_addr, + mem_slot, + addr: GuestAddress(cache_base), + len: cache_size as GuestUsize, + region_list, + }, + mmap_region, + )); + + let virtio_gpu_device = Arc::new(Mutex::new( + virtio_devices::vhost_user::Gpu::new( + id.clone(), + gpu_socket, + cache, + self.seccomp_action.clone(), + self.restoring, + self.exit_evt + .try_clone() + .map_err(DeviceManagerError::EventFd)?, + self.force_iommu, + ) + .map_err(DeviceManagerError::CreateVirtioGpu)?, + )); + + self.device_tree.lock().unwrap().insert(id.clone(), node); + + Ok(MetaVirtioDevice { + virtio_device: Arc::clone(&virtio_gpu_device) + as Arc>, + iommu: false, + id, + pci_segment: gpu_cfg.pci_segment, + dma_handler: None, + }) + } else { + Err(DeviceManagerError::NoVirtioGpuSock) + } + } + + fn make_virtio_gpu_devices(&mut self) -> DeviceManagerResult> { + let mut devices = Vec::new(); + + let mut gpu_devices = self.config.lock().unwrap().gpu.clone(); + if let Some(gpu_list_cfg) = &mut gpu_devices { + for gpu_cfg in gpu_list_cfg.iter_mut() { + devices.push(self.make_virtio_gpu_device(gpu_cfg)?); + } + } + self.config.lock().unwrap().gpu = gpu_devices; + + Ok(devices) + } + fn make_virtio_pmem_device( &mut self, pmem_cfg: &mut PmemConfig, @@ -4004,6 +4133,13 @@ impl DeviceManager { self.hotplug_virtio_pci_device(device) } + pub fn add_gpu(&mut self, gpu_cfg: &mut GpuConfig) -> DeviceManagerResult { + self.validate_identifier(&gpu_cfg.id)?; + + let device = self.make_virtio_gpu_device(gpu_cfg)?; + self.hotplug_virtio_pci_device(device) + } + pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult { self.validate_identifier(&pmem_cfg.id)?; diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index a114488b..5baaa29f 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -26,6 +26,7 @@ use crate::migration::{recv_vm_config, recv_vm_state}; use crate::seccomp_filters::{get_seccomp_filter, Thread}; use crate::vm::{Error as VmError, Vm, VmState}; use anyhow::anyhow; +use config::GpuConfig; use libc::{EFD_NONBLOCK, SIGINT, SIGTERM}; use memory_manager::MemoryManagerSnapshotData; use pci::PciBdf; @@ -969,6 +970,32 @@ impl Vmm { } } + fn vm_add_gpu(&mut self, gpu_cfg: GpuConfig) -> result::Result>, VmError> { + self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; + + { + // Validate the configuration change in a cloned configuration + let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); + add_to_config(&mut config.gpu, gpu_cfg.clone()); + config.validate().map_err(VmError::ConfigValidation)?; + } + + if let Some(ref mut vm) = self.vm { + let info = vm.add_gpu(gpu_cfg).map_err(|e| { + error!("Error when adding new gpu to the VM: {:?}", e); + e + })?; + serde_json::to_vec(&info) + .map(Some) + .map_err(VmError::SerializeJson) + } else { + // Update VmConfig by adding the new device. + let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); + add_to_config(&mut config.gpu, gpu_cfg); + Ok(None) + } + } + fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result>, VmError> { self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; @@ -1858,6 +1885,13 @@ impl Vmm { .map(ApiResponsePayload::VmAction); sender.send(response).map_err(Error::ApiResponseSend)?; } + ApiRequest::VmAddGpu(add_gpu_data, sender) => { + let response = self + .vm_add_gpu(add_gpu_data.as_ref().clone()) + .map_err(ApiError::VmAddGpu) + .map(ApiResponsePayload::VmAction); + sender.send(response).map_err(Error::ApiResponseSend)?; + } ApiRequest::VmAddPmem(add_pmem_data, sender) => { let response = self .vm_add_pmem(add_pmem_data.as_ref().clone()) @@ -2027,6 +2061,7 @@ mod unit_tests { }, balloon: None, fs: None, + gpu: None, pmem: None, serial: ConsoleConfig { file: None, @@ -2253,6 +2288,54 @@ mod unit_tests { ); } + #[test] + fn test_vmm_vm_cold_add_gpu() { + let mut vmm = create_dummy_vmm(); + let gpu_config = GpuConfig::parse("socket=/tmp/sock").unwrap(); + + assert!(matches!( + vmm.vm_add_gpu(gpu_config.clone()), + Err(VmError::VmNotCreated) + )); + + let _ = vmm.vm_create(create_dummy_vm_config()); + assert!(vmm + .vm_config + .as_ref() + .unwrap() + .lock() + .unwrap() + .gpu + .is_none()); + + let result = vmm.vm_add_gpu(gpu_config.clone()); + assert!(result.is_ok()); + assert!(result.unwrap().is_none()); + assert_eq!( + vmm.vm_config + .as_ref() + .unwrap() + .lock() + .unwrap() + .gpu + .clone() + .unwrap() + .len(), + 1 + ); + assert_eq!( + vmm.vm_config + .as_ref() + .unwrap() + .lock() + .unwrap() + .gpu + .clone() + .unwrap()[0], + gpu_config + ); + } + #[test] fn test_vmm_vm_cold_add_pmem() { let mut vmm = create_dummy_vmm(); diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 87278d5c..30a0c896 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -12,8 +12,8 @@ // use crate::config::{ - add_to_config, DeviceConfig, DiskConfig, FsConfig, HotplugMethod, NetConfig, PmemConfig, - UserDeviceConfig, ValidationError, VdpaConfig, VmConfig, VsockConfig, + add_to_config, DeviceConfig, DiskConfig, FsConfig, GpuConfig, HotplugMethod, NetConfig, + PmemConfig, UserDeviceConfig, ValidationError, VdpaConfig, VmConfig, VsockConfig, }; use crate::config::{NumaConfig, PayloadConfig}; #[cfg(feature = "guest_debug")] @@ -1555,6 +1555,11 @@ impl Vm { fs.retain(|dev| dev.id.as_ref() != Some(&id)); } + // Remove if gpu device + if let Some(gpu) = config.gpu.as_mut() { + gpu.retain(|dev| dev.id.as_ref() != Some(&id)); + } + // Remove if net device if let Some(net) = config.net.as_mut() { net.retain(|dev| dev.id.as_ref() != Some(&id)); @@ -1633,6 +1638,30 @@ impl Vm { Ok(pci_device_info) } + pub fn add_gpu(&mut self, mut gpu_cfg: GpuConfig) -> Result { + let pci_device_info = self + .device_manager + .lock() + .unwrap() + .add_gpu(&mut gpu_cfg) + .map_err(Error::DeviceManager)?; + + // Update VmConfig by adding the new device. This is important to + // ensure the device would be created in case of a reboot. + { + let mut config = self.config.lock().unwrap(); + add_to_config(&mut config.gpu, gpu_cfg); + } + + self.device_manager + .lock() + .unwrap() + .notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED) + .map_err(Error::DeviceManager)?; + + Ok(pci_device_info) + } + pub fn add_pmem(&mut self, mut pmem_cfg: PmemConfig) -> Result { let pci_device_info = self .device_manager -- 2.37.1