diff options
6 files changed, 2145 insertions, 3 deletions
diff --git a/pkgs/applications/virtualization/cloud-hypervisor/0001-build-use-local-vhost.patch b/pkgs/applications/virtualization/cloud-hypervisor/0001-build-use-local-vhost.patch new file mode 100644 index 00000000000..fd6ba1f91e7 --- /dev/null +++ b/pkgs/applications/virtualization/cloud-hypervisor/0001-build-use-local-vhost.patch @@ -0,0 +1,40 @@ +From 9ac46605a87746d2d3e5a46a75cde33a7f01d31c Mon Sep 17 00:00:00 2001 +From: Alyssa Ross <alyssa.ross@unikie.com> +Date: Wed, 28 Sep 2022 12:18:19 +0000 +Subject: [PATCH 1/2] build: use local vhost + +Signed-off-by: Alyssa Ross <alyssa.ross@unikie.com> +Signed-off-by: Alyssa Ross <hi@alyssa.is> +--- + Cargo.lock | 2 -- + Cargo.toml | 1 + + 2 files changed, 1 insertion(+), 2 deletions(-) + +diff --git a/Cargo.lock b/Cargo.lock +index f4e667f7..d4e58b21 100644 +--- a/Cargo.lock ++++ b/Cargo.lock +@@ -1413,8 +1413,6 @@ dependencies = [ + [[package]] + name = "vhost" + version = "0.6.0" +-source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "c9b791c5b0717a0558888a4cf7240cea836f39a99cb342e12ce633dcaa078072" + dependencies = [ + "bitflags", + "libc", +diff --git a/Cargo.toml b/Cargo.toml +index 230bd499..dcd5bb24 100644 +--- a/Cargo.toml ++++ b/Cargo.toml +@@ -53,6 +53,7 @@ vm-memory = "0.10.0" + [patch.crates-io] + kvm-bindings = { git = "https://github.com/cloud-hypervisor/kvm-bindings", branch = "ch-v0.6.0-tdx" } + versionize_derive = { git = "https://github.com/cloud-hypervisor/versionize_derive", branch = "ch" } ++vhost = { path = "../vhost/crates/vhost" } + + [dev-dependencies] + dirs = "4.0.0" +-- +2.37.1 + diff --git a/pkgs/applications/virtualization/cloud-hypervisor/0002-virtio-devices-add-a-GPU-device.patch b/pkgs/applications/virtualization/cloud-hypervisor/0002-virtio-devices-add-a-GPU-device.patch new file mode 100644 index 00000000000..c455119f6cc --- /dev/null +++ b/pkgs/applications/virtualization/cloud-hypervisor/0002-virtio-devices-add-a-GPU-device.patch @@ -0,0 +1,1303 @@ +From 5d4b824e72a329b587d11fd1b562ed9dc3633c7f Mon Sep 17 00:00:00 2001 +From: Alyssa Ross <alyssa.ross@unikie.com> +Date: Wed, 7 Sep 2022 14:16:29 +0000 +Subject: [PATCH 2/2] virtio-devices: add a GPU device + +This adds support for exposing a virtio-gpu device to guest by +implementing a vhost-user frontend compatible with crosvm's GPU device +backend. + +Note that this is not the same as the "vhost-user-gpu" protocol +implemented by QEMU. + +Adding a GPU device from the command line looks like this: + + --gpu socket=/path/to/crosvm-gpu-vhost-user.sock + +As a workaround, We fall back to trying to map shared memory as +read-only if it can't be mapped read-write, because wlroots' keymaps +are read-only, and crosvm does not properly handle this, causing +cloud-hypervisor to crash. + +Signed-off-by: Alyssa Ross <alyssa.ross@unikie.com> +Co-authored-by: Puck Meerburg <puck@puckipedia.com> +Signed-off-by: Puck Meerburg <puck@puckipedia.com> +Co-authored-by: Alyssa Ross <hi@alyssa.is> +Signed-off-by: Alyssa Ross <hi@alyssa.is> +--- + Cargo.lock | 1 + + src/main.rs | 12 + + virtio-devices/src/device.rs | 8 +- + virtio-devices/src/lib.rs | 2 + + virtio-devices/src/seccomp_filters.rs | 10 + + virtio-devices/src/transport/pci_device.rs | 4 +- + virtio-devices/src/vhost_user/gpu.rs | 373 ++++++++++++++++++ + virtio-devices/src/vhost_user/mod.rs | 2 + + .../src/vhost_user/vu_common_ctrl.rs | 14 +- + vmm/Cargo.toml | 1 + + vmm/src/api/mod.rs | 7 + + vmm/src/config.rs | 105 ++++- + vmm/src/device_manager.rs | 139 ++++++- + vmm/src/lib.rs | 83 ++++ + vmm/src/vm.rs | 33 +- + vmm/src/vm_config.rs | 25 ++ + 16 files changed, 802 insertions(+), 17 deletions(-) + create mode 100644 virtio-devices/src/vhost_user/gpu.rs + +diff --git a/Cargo.lock b/Cargo.lock +index d4e58b21..bc51e03b 100644 +--- a/Cargo.lock ++++ b/Cargo.lock +@@ -1630,6 +1630,7 @@ dependencies = [ + "vfio-ioctls", + "vfio_user", + "vhdx", ++ "virtio-bindings 0.2.0", + "virtio-devices", + "virtio-queue", + "vm-allocator", +diff --git a/src/main.rs b/src/main.rs +index eca2d2fa..8393f699 100644 +--- a/src/main.rs ++++ b/src/main.rs +@@ -188,6 +188,10 @@ pub struct TopLevel { + /// tag=<tag_name>,socket=<socket_path>,num_queues=<number_of_queues>,queue_size=<size_of_each_queue>,id=<device_id>,pci_segment=<segment_id> + fs: Vec<String>, + ++ #[argh(option, long = "gpu")] ++ /// socket=<socket_path>,cache_size=<default 8GiB>,id=<device_id>,pci_segment=<segment_id> ++ gpu: Vec<String>, ++ + #[argh(option, long = "pmem")] + /// file=<backing_file_path>,size=<persistent_memory_size>,iommu=on|off,discard_writes=on|off,id=<device_id>,pci_segment=<segment_id> + pmem: Vec<String>, +@@ -303,6 +307,12 @@ impl TopLevel { + None + }; + ++ let gpu = if !self.gpu.is_empty() { ++ Some(self.gpu.iter().map(|x| x.as_str()).collect()) ++ } else { ++ None ++ }; ++ + let pmem = if !self.pmem.is_empty() { + Some(self.pmem.iter().map(|x| x.as_str()).collect()) + } else { +@@ -356,6 +366,7 @@ impl TopLevel { + rng, + balloon, + fs, ++ gpu, + pmem, + serial, + console, +@@ -707,6 +718,7 @@ mod unit_tests { + }, + balloon: None, + fs: None, ++ gpu: None, + pmem: None, + serial: ConsoleConfig { + file: None, +diff --git a/virtio-devices/src/device.rs b/virtio-devices/src/device.rs +index b70092f8..e091ddd6 100644 +--- a/virtio-devices/src/device.rs ++++ b/virtio-devices/src/device.rs +@@ -11,7 +11,7 @@ use crate::{ + VIRTIO_F_RING_INDIRECT_DESC, + }; + use libc::EFD_NONBLOCK; +-use std::collections::HashMap; ++use std::collections::{BTreeMap, HashMap}; + use std::io::Write; + use std::num::Wrapping; + use std::sync::{ +@@ -47,19 +47,19 @@ pub struct UserspaceMapping { + pub mergeable: bool, + } + +-#[derive(Clone)] ++#[derive(Clone, Debug)] + pub struct VirtioSharedMemory { + pub offset: u64, + pub len: u64, + } + +-#[derive(Clone)] ++#[derive(Clone, Debug)] + pub struct VirtioSharedMemoryList { + pub host_addr: u64, + pub mem_slot: u32, + pub addr: GuestAddress, + pub len: GuestUsize, +- pub region_list: Vec<VirtioSharedMemory>, ++ pub region_list: BTreeMap<u8, VirtioSharedMemory>, + } + + /// Trait for virtio devices to be driven by a virtio transport. +diff --git a/virtio-devices/src/lib.rs b/virtio-devices/src/lib.rs +index f7037692..98c39658 100644 +--- a/virtio-devices/src/lib.rs ++++ b/virtio-devices/src/lib.rs +@@ -86,6 +86,8 @@ pub enum ActivateError { + ThreadSpawn(std::io::Error), + #[error("Failed to setup vhost-user-fs daemon: {0}")] + VhostUserFsSetup(vhost_user::Error), ++ #[error("Failed to setup vhost-user-gpu daemon: {0}")] ++ VhostUserGpuSetup(vhost_user::Error), + #[error("Failed to setup vhost-user-blk daemon: {0}")] + VhostUserBlkSetup(vhost_user::Error), + #[error("Failed to create seccomp filter: {0}")] +diff --git a/virtio-devices/src/seccomp_filters.rs b/virtio-devices/src/seccomp_filters.rs +index 1e689591..823e6cec 100644 +--- a/virtio-devices/src/seccomp_filters.rs ++++ b/virtio-devices/src/seccomp_filters.rs +@@ -22,6 +22,7 @@ pub enum Thread { + VirtioRng, + VirtioVhostBlock, + VirtioVhostFs, ++ VirtioVhostGpu, + VirtioVhostNet, + VirtioVhostNetCtl, + VirtioVsock, +@@ -162,6 +163,14 @@ fn virtio_vhost_fs_thread_rules() -> Vec<(i64, Vec<SeccompRule>)> { + ] + } + ++fn virtio_vhost_gpu_thread_rules() -> Vec<(i64, Vec<SeccompRule>)> { ++ vec![ ++ (libc::SYS_getcwd, vec![]), ++ (libc::SYS_recvmsg, vec![]), ++ (libc::SYS_sendmsg, vec![]), ++ ] ++} ++ + fn virtio_vhost_net_ctl_thread_rules() -> Vec<(i64, Vec<SeccompRule>)> { + vec![] + } +@@ -223,6 +232,7 @@ fn get_seccomp_rules(thread_type: Thread) -> Vec<(i64, Vec<SeccompRule>)> { + Thread::VirtioRng => virtio_rng_thread_rules(), + Thread::VirtioVhostBlock => virtio_vhost_block_thread_rules(), + Thread::VirtioVhostFs => virtio_vhost_fs_thread_rules(), ++ Thread::VirtioVhostGpu => virtio_vhost_gpu_thread_rules(), + Thread::VirtioVhostNet => virtio_vhost_net_thread_rules(), + Thread::VirtioVhostNetCtl => virtio_vhost_net_ctl_thread_rules(), + Thread::VirtioVsock => virtio_vsock_thread_rules(), +diff --git a/virtio-devices/src/transport/pci_device.rs b/virtio-devices/src/transport/pci_device.rs +index b51b33f7..b9c3ac35 100644 +--- a/virtio-devices/src/transport/pci_device.rs ++++ b/virtio-devices/src/transport/pci_device.rs +@@ -1055,11 +1055,11 @@ impl PciDevice for VirtioPciDevice { + PciDeviceError::IoRegistrationFailed(shm_list.addr.raw_value(), e) + })?; + +- for (idx, shm) in shm_list.region_list.iter().enumerate() { ++ for (shmid, shm) in shm_list.region_list.iter() { + let shm_cap = VirtioPciCap64::new( + PciCapabilityType::SharedMemoryConfig, + VIRTIO_SHM_BAR_INDEX as u8, +- idx as u8, ++ *shmid, + shm.offset, + shm.len, + ); +diff --git a/virtio-devices/src/vhost_user/gpu.rs b/virtio-devices/src/vhost_user/gpu.rs +new file mode 100644 +index 00000000..42703431 +--- /dev/null ++++ b/virtio-devices/src/vhost_user/gpu.rs +@@ -0,0 +1,373 @@ ++// Copyright 2019 Intel Corporation. All Rights Reserved. ++// Copyright 2022 Unikie ++// Copyright 2022 Puck Meerburg ++// Copyright 2023 Alyssa Ross <hi@alyssa.is> ++// SPDX-License-Identifier: Apache-2.0 ++ ++use crate::seccomp_filters::Thread; ++use crate::thread_helper::spawn_virtio_thread; ++use crate::vhost_user::vu_common_ctrl::VhostUserHandle; ++use crate::vhost_user::{Error, Result, VhostUserCommon}; ++use crate::{ ++ ActivateError, ActivateResult, GuestMemoryMmap, GuestRegionMmap, MmapRegion, UserspaceMapping, ++ VirtioCommon, VirtioDevice, VirtioDeviceType, VirtioInterrupt, VirtioSharedMemoryList, ++ VIRTIO_F_VERSION_1, ++}; ++use seccompiler::SeccompAction; ++use std::io::{self, Write}; ++use std::os::unix::prelude::AsRawFd; ++use std::sync::{Arc, Barrier, Mutex}; ++use std::thread; ++use vhost::vhost_user::message::{ ++ VhostUserConfigFlags, VhostUserProtocolFeatures, VhostUserShmemMapMsg, VhostUserShmemUnmapMsg, ++ VhostUserVirtioFeatures, ++}; ++use vhost::vhost_user::{ ++ HandlerResult, MasterReqHandler, VhostUserMaster, VhostUserMasterReqHandler, ++}; ++use virtio_bindings::virtio_gpu::{ ++ VIRTIO_GPU_F_CONTEXT_INIT, VIRTIO_GPU_F_RESOURCE_BLOB, VIRTIO_GPU_F_RESOURCE_UUID, ++ VIRTIO_GPU_F_VIRGL, ++}; ++use virtio_queue::Queue; ++use vm_memory::GuestMemoryAtomic; ++use vm_migration::Pausable; ++use vmm_sys_util::eventfd::EventFd; ++ ++const QUEUE_SIZES: &[u16] = &[256, 16]; ++const NUM_QUEUES: u16 = QUEUE_SIZES.len() as _; ++ ++struct SlaveReqHandler { ++ cache_size: u64, ++ mmap_cache_addr: u64, ++} ++ ++impl SlaveReqHandler { ++ // Make sure request is within cache range ++ fn is_req_valid(&self, offset: u64, len: u64) -> bool { ++ let end = match offset.checked_add(len) { ++ Some(n) => n, ++ None => return false, ++ }; ++ ++ !(offset >= self.cache_size || end > self.cache_size) ++ } ++} ++ ++impl VhostUserMasterReqHandler for SlaveReqHandler { ++ fn shmem_map(&self, req: &VhostUserShmemMapMsg, fd: &dyn AsRawFd) -> HandlerResult<u64> { ++ if !self.is_req_valid(req.shm_offset, req.len) { ++ return Err(io::Error::from_raw_os_error(libc::EINVAL)); ++ } ++ ++ let addr = self.mmap_cache_addr + req.shm_offset; ++ let mut ret = unsafe { ++ libc::mmap( ++ addr as *mut libc::c_void, ++ req.len as usize, ++ req.flags.bits() as i32, ++ libc::MAP_SHARED | libc::MAP_FIXED, ++ fd.as_raw_fd(), ++ req.fd_offset as libc::off_t, ++ ) ++ }; ++ ++ if ret == libc::MAP_FAILED { ++ ret = unsafe { ++ libc::mmap( ++ addr as *mut libc::c_void, ++ req.len as usize, ++ (req.flags.bits() as i32) & !libc::PROT_WRITE, ++ libc::MAP_SHARED | libc::MAP_FIXED, ++ fd.as_raw_fd(), ++ req.fd_offset as libc::off_t, ++ ) ++ }; ++ } ++ ++ if ret == libc::MAP_FAILED { ++ return Err(io::Error::last_os_error()); ++ } ++ ++ Ok(0) ++ } ++ ++ fn shmem_unmap(&self, req: &VhostUserShmemUnmapMsg) -> HandlerResult<u64> { ++ if !self.is_req_valid(req.shm_offset, req.len) { ++ return Err(io::Error::from_raw_os_error(libc::EINVAL)); ++ } ++ ++ let addr = self.mmap_cache_addr + req.shm_offset; ++ let ret = unsafe { ++ libc::mmap( ++ addr as *mut libc::c_void, ++ req.len as usize, ++ libc::PROT_NONE, ++ libc::MAP_ANONYMOUS | libc::MAP_PRIVATE | libc::MAP_FIXED, ++ -1, ++ 0, ++ ) ++ }; ++ if ret == libc::MAP_FAILED { ++ return Err(io::Error::last_os_error()); ++ } ++ ++ Ok(0) ++ } ++} ++ ++#[derive(Default)] ++#[repr(C, packed)] ++pub struct VirtioGpuConfig { ++ pub events_read: u32, ++ pub events_clear: u32, ++ pub num_scanouts: u32, ++ pub num_capsets: u32, ++} ++ ++pub struct Gpu { ++ common: VirtioCommon, ++ vu_common: VhostUserCommon, ++ id: String, ++ // Hold ownership of the memory that is allocated for the device ++ // which will be automatically dropped when the device is dropped ++ cache: Option<(VirtioSharedMemoryList, MmapRegion)>, ++ slave_req_support: bool, ++ epoll_thread: Option<thread::JoinHandle<()>>, ++ seccomp_action: SeccompAction, ++ exit_evt: EventFd, ++} ++ ++impl Gpu { ++ /// Create a new virtio-gpu device. ++ pub fn new( ++ id: String, ++ path: &str, ++ cache: Option<(VirtioSharedMemoryList, MmapRegion)>, ++ seccomp_action: SeccompAction, ++ exit_evt: EventFd, ++ iommu: bool, ++ ) -> Result<Gpu> { ++ // Connect to the vhost-user socket. ++ let mut vu = VhostUserHandle::connect_vhost_user(false, path, NUM_QUEUES as u64, false)?; ++ ++ let avail_features = 1 << VIRTIO_F_VERSION_1 ++ | 1 << VIRTIO_GPU_F_VIRGL ++ | 1 << VIRTIO_GPU_F_RESOURCE_UUID ++ | 1 << VIRTIO_GPU_F_RESOURCE_BLOB ++ | 1 << VIRTIO_GPU_F_CONTEXT_INIT ++ | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(); ++ ++ let avail_protocol_features = ++ VhostUserProtocolFeatures::CONFIG | VhostUserProtocolFeatures::SLAVE_REQ; ++ // The SHARED_MEMORY_REGIONS protocol feature is a way for the backend to indicate ++ // that it supports the GET_SHARED_MEMORY_REGIONS request. Since we don't use that ++ // request, we don't ack SHARED_MEMORY_REGIONS. ++ ++ let (acked_features, acked_protocol_features) = ++ vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?; ++ ++ Ok(Gpu { ++ common: VirtioCommon { ++ device_type: VirtioDeviceType::Gpu as u32, ++ avail_features: acked_features, ++ // If part of the available features that have been acked, the ++ // PROTOCOL_FEATURES bit must be already set through the VIRTIO ++ // acked features as we know the guest would never ack it, this ++ // the feature would be lost. ++ acked_features: acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(), ++ paused_sync: Some(Arc::new(Barrier::new(NUM_QUEUES as usize))), ++ queue_sizes: QUEUE_SIZES.to_vec(), ++ min_queues: NUM_QUEUES, ++ ..Default::default() ++ }, ++ vu_common: VhostUserCommon { ++ vu: Some(Arc::new(Mutex::new(vu))), ++ acked_protocol_features, ++ socket_path: path.to_string(), ++ vu_num_queues: NUM_QUEUES as usize, ++ ..Default::default() ++ }, ++ id, ++ slave_req_support: acked_protocol_features ++ & VhostUserProtocolFeatures::SLAVE_REQ.bits() ++ != 0, ++ cache, ++ seccomp_action, ++ epoll_thread: None, ++ exit_evt, ++ }) ++ } ++} ++ ++impl Drop for Gpu { ++ fn drop(&mut self) { ++ if let Some(kill_evt) = self.common.kill_evt.take() { ++ // Ignore the result because there is nothing we can do about it. ++ let _ = kill_evt.write(1); ++ } ++ } ++} ++ ++impl VirtioDevice for Gpu { ++ fn device_type(&self) -> u32 { ++ self.common.device_type ++ } ++ ++ fn queue_max_sizes(&self) -> &[u16] { ++ &self.common.queue_sizes ++ } ++ ++ fn features(&self) -> u64 { ++ self.common.avail_features ++ } ++ ++ fn ack_features(&mut self, value: u64) { ++ self.common.ack_features(value) ++ } ++ ++ fn read_config(&self, offset: u64, mut data: &mut [u8]) { ++ if let Some(vu) = &self.vu_common.vu { ++ if let Err(e) = vu ++ .lock() ++ .unwrap() ++ .socket_handle() ++ .get_config( ++ offset as u32, ++ data.len() as u32, ++ VhostUserConfigFlags::WRITABLE, ++ data, ++ ) ++ .map_err(|e| format!("{:?}", e)) ++ .and_then(|(_, config)| data.write_all(&config).map_err(|e| format!("{:?}", e))) ++ { ++ error!("Failed getting vhost-user-gpu configuration: {:?}", e); ++ } else { ++ eprintln!("read_config({}, {:?})", offset, data); ++ } ++ } ++ } ++ ++ fn activate( ++ &mut self, ++ mem: GuestMemoryAtomic<GuestMemoryMmap>, ++ interrupt_cb: Arc<dyn VirtioInterrupt>, ++ queues: Vec<(usize, Queue, EventFd)>, ++ ) -> ActivateResult { ++ self.common.activate(&queues, &interrupt_cb)?; ++ ++ // Initialize slave communication. ++ let slave_req_handler = if self.slave_req_support { ++ if let Some(cache) = self.cache.as_ref() { ++ let vu_master_req_handler = Arc::new(SlaveReqHandler { ++ cache_size: cache.0.len, ++ mmap_cache_addr: cache.0.host_addr, ++ }); ++ ++ let mut req_handler = ++ MasterReqHandler::new(vu_master_req_handler).map_err(|e| { ++ ActivateError::VhostUserGpuSetup(Error::MasterReqHandlerCreation(e)) ++ })?; ++ ++ if self.vu_common.acked_protocol_features ++ & VhostUserProtocolFeatures::REPLY_ACK.bits() ++ != 0 ++ { ++ req_handler.set_reply_ack_flag(true); ++ } ++ ++ Some(req_handler) ++ } else { ++ None ++ } ++ } else { ++ None ++ }; ++ ++ // Run a dedicated thread for handling potential reconnections with ++ // the backend. ++ let (kill_evt, pause_evt) = self.common.dup_eventfds(); ++ ++ let mut handler = self.vu_common.activate( ++ mem, ++ queues, ++ interrupt_cb, ++ self.common.acked_features, ++ slave_req_handler, ++ kill_evt, ++ pause_evt, ++ )?; ++ ++ let paused = self.common.paused.clone(); ++ let paused_sync = self.common.paused_sync.clone(); ++ ++ let mut epoll_threads = Vec::new(); ++ spawn_virtio_thread( ++ &self.id, ++ &self.seccomp_action, ++ Thread::VirtioVhostGpu, ++ &mut epoll_threads, ++ &self.exit_evt, ++ move || handler.run(paused, paused_sync.unwrap()), ++ )?; ++ self.epoll_thread = Some(epoll_threads.remove(0)); ++ ++ event!("virtio-device", "activated", "id", &self.id); ++ Ok(()) ++ } ++ ++ fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { ++ // We first must resume the virtio thread if it was paused. ++ if self.common.pause_evt.take().is_some() { ++ self.common.resume().ok()?; ++ } ++ ++ if let Some(vu) = &self.vu_common.vu { ++ if let Err(e) = vu.lock().unwrap().reset_vhost_user() { ++ error!("Failed to reset vhost-user daemon: {:?}", e); ++ return None; ++ } ++ } ++ ++ if let Some(kill_evt) = self.common.kill_evt.take() { ++ // Ignore the result because there is nothing we can do about it. ++ let _ = kill_evt.write(1); ++ } ++ ++ event!("virtio-device", "reset", "id", &self.id); ++ ++ // Return the interrupt ++ Some(self.common.interrupt_cb.take().unwrap()) ++ } ++ ++ fn shutdown(&mut self) { ++ self.vu_common.shutdown() ++ } ++ ++ fn get_shm_regions(&self) -> Option<VirtioSharedMemoryList> { ++ // It would be possible to get the size of the region from the ++ // backend over vhost-user, but since we need to know the size ++ // up front in cloud-hypervisor to construct Self, it wouldn't ++ // help. The user is thereforce responsible for configuring ++ // the correct region size in VM configuration. ++ self.cache.as_ref().map(|cache| cache.0.clone()) ++ } ++ ++ fn set_shm_regions( ++ &mut self, ++ shm_regions: VirtioSharedMemoryList, ++ ) -> std::result::Result<(), crate::Error> { ++ todo!("set_shm_regions({:?})", shm_regions) ++ } ++ ++ fn add_memory_region( ++ &mut self, ++ region: &Arc<GuestRegionMmap>, ++ ) -> std::result::Result<(), crate::Error> { ++ todo!("add_memory_region({:?})", region) ++ } ++ ++ fn userspace_mappings(&self) -> Vec<UserspaceMapping> { ++ todo!() ++ } ++} +diff --git a/virtio-devices/src/vhost_user/mod.rs b/virtio-devices/src/vhost_user/mod.rs +index 6ad88b44..981eca93 100644 +--- a/virtio-devices/src/vhost_user/mod.rs ++++ b/virtio-devices/src/vhost_user/mod.rs +@@ -31,11 +31,13 @@ use vu_common_ctrl::VhostUserHandle; + + pub mod blk; + pub mod fs; ++pub mod gpu; + pub mod net; + pub mod vu_common_ctrl; + + pub use self::blk::Blk; + pub use self::fs::*; ++pub use self::gpu::*; + pub use self::net::Net; + pub use self::vu_common_ctrl::VhostUserConfig; + +diff --git a/virtio-devices/src/vhost_user/vu_common_ctrl.rs b/virtio-devices/src/vhost_user/vu_common_ctrl.rs +index 7bbf936c..68d3a521 100644 +--- a/virtio-devices/src/vhost_user/vu_common_ctrl.rs ++++ b/virtio-devices/src/vhost_user/vu_common_ctrl.rs +@@ -199,6 +199,14 @@ impl VhostUserHandle { + .map_err(Error::VhostUserSetInflight)?; + } + ++ // FIXME: crosvm's vhost-user backend requires this to come first. ++ // Can we fix that in crosvm? ++ if let Some(slave_req_handler) = slave_req_handler { ++ self.vu ++ .set_slave_request_fd(&slave_req_handler.get_tx_raw_fd()) ++ .map_err(Error::VhostUserSetSlaveRequestFd)?; ++ } ++ + let mut vrings_info = Vec::new(); + for (queue_index, queue, queue_evt) in queues.iter() { + let actual_size: usize = queue.size().try_into().unwrap(); +@@ -267,12 +275,6 @@ impl VhostUserHandle { + + self.enable_vhost_user_vrings(self.queue_indexes.clone(), true)?; + +- if let Some(slave_req_handler) = slave_req_handler { +- self.vu +- .set_slave_request_fd(&slave_req_handler.get_tx_raw_fd()) +- .map_err(Error::VhostUserSetSlaveRequestFd)?; +- } +- + self.vrings_info = Some(vrings_info); + self.ready = true; + +diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml +index a2ef658e..305b439e 100644 +--- a/vmm/Cargo.toml ++++ b/vmm/Cargo.toml +@@ -47,6 +47,7 @@ versionize_derive = "0.1.4" + vfio-ioctls = { git = "https://github.com/rust-vmm/vfio", branch = "main", default-features = false } + vfio_user = { git = "https://github.com/rust-vmm/vfio-user", branch = "main" } + vhdx = { path = "../vhdx" } ++virtio-bindings = "0.2.0" + virtio-devices = { path = "../virtio-devices" } + virtio-queue = "0.7.0" + vm-allocator = { path = "../vm-allocator" } +diff --git a/vmm/src/api/mod.rs b/vmm/src/api/mod.rs +index c1b5ef0d..17520216 100644 +--- a/vmm/src/api/mod.rs ++++ b/vmm/src/api/mod.rs +@@ -34,6 +34,7 @@ pub use self::http::start_http_path_thread; + pub mod http; + pub mod http_endpoint; + ++use crate::config::GpuConfig; + use crate::config::{ + DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, RestoreConfig, UserDeviceConfig, + VdpaConfig, VmConfig, VsockConfig, +@@ -132,6 +133,9 @@ pub enum ApiError { + /// The fs could not be added to the VM. + VmAddFs(VmError), + ++ /// The gpu could not be added to the VM. ++ VmAddGpu(VmError), ++ + /// The pmem device could not be added to the VM. + VmAddPmem(VmError), + +@@ -301,6 +305,9 @@ pub enum ApiRequest { + /// Add a fs to the VM. + VmAddFs(Arc<FsConfig>, Sender<ApiResponse>), + ++ /// Add a gpu to the VM. ++ VmAddGpu(Arc<GpuConfig>, Sender<ApiResponse>), ++ + /// Add a pmem device to the VM. + VmAddPmem(Arc<PmemConfig>, Sender<ApiResponse>), + +diff --git a/vmm/src/config.rs b/vmm/src/config.rs +index f2476da8..a878b351 100644 +--- a/vmm/src/config.rs ++++ b/vmm/src/config.rs +@@ -26,6 +26,8 @@ pub enum Error { + ParseFsTagMissing, + /// Filesystem socket is missing + ParseFsSockMissing, ++ /// GPU socket is missing ++ ParseGpuSockMissing, + /// Missing persistent memory file parameter. + ParsePmemFileMissing, + /// Missing vsock socket path parameter. +@@ -54,6 +56,8 @@ pub enum Error { + ParseBalloon(OptionParserError), + /// Error parsing filesystem parameters + ParseFileSystem(OptionParserError), ++ /// Error parsing GPU parameters ++ ParseGpu(OptionParserError), + /// Error parsing persistent memory parameters + ParsePersistentMemory(OptionParserError), + /// Failed parsing console +@@ -301,7 +305,9 @@ impl fmt::Display for Error { + ParseFileSystem(o) => write!(f, "Error parsing --fs: {o}"), + ParseFsSockMissing => write!(f, "Error parsing --fs: socket missing"), + ParseFsTagMissing => write!(f, "Error parsing --fs: tag missing"), +- ParsePersistentMemory(o) => write!(f, "Error parsing --pmem: {o}"), ++ ParseGpu(o) => write!(f, "Error parsing --gpu: {o}"), ++ ParseGpuSockMissing => write!(f, "Error parsing --gpu: socket missing"), ++ ParsePersistentMemory(o) => write!(f, "Error parsing --pmem: {}", o), + ParsePmemFileMissing => write!(f, "Error parsing --pmem: file missing"), + ParseVsock(o) => write!(f, "Error parsing --vsock: {o}"), + ParseVsockCidMissing => write!(f, "Error parsing --vsock: cid missing"), +@@ -363,6 +369,7 @@ pub struct VmParams<'a> { + pub rng: &'a str, + pub balloon: Option<&'a str>, + pub fs: Option<Vec<&'a str>>, ++ pub gpu: Option<Vec<&'a str>>, + pub pmem: Option<Vec<&'a str>>, + pub serial: &'a str, + pub console: &'a str, +@@ -1285,6 +1292,56 @@ impl FsConfig { + } + } + ++impl GpuConfig { ++ pub fn parse(gpu: &str) -> Result<Self> { ++ let mut parser = OptionParser::new(); ++ parser ++ .add("socket") ++ .add("cache_size") ++ .add("id") ++ .add("pci_segment"); ++ parser.parse(gpu).map_err(Error::ParseGpu)?; ++ ++ let socket = PathBuf::from(parser.get("socket").ok_or(Error::ParseGpuSockMissing)?); ++ let cache_size = parser ++ .convert::<ByteSized>("cache_size") ++ .map_err(Error::ParseGpu)? ++ .unwrap_or_else(|| ByteSized(default_gpuconfig_cache_size())) ++ .0; ++ let id = parser.get("id"); ++ ++ let pci_segment = parser ++ .convert("pci_segment") ++ .map_err(Error::ParseGpu)? ++ .unwrap_or_default(); ++ ++ Ok(GpuConfig { ++ socket, ++ cache_size, ++ id, ++ pci_segment, ++ }) ++ } ++ ++ pub fn validate(&self, vm_config: &VmConfig) -> ValidationResult<()> { ++ if let Some(platform_config) = vm_config.platform.as_ref() { ++ if self.pci_segment >= platform_config.num_pci_segments { ++ return Err(ValidationError::InvalidPciSegment(self.pci_segment)); ++ } ++ ++ if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { ++ if iommu_segments.contains(&self.pci_segment) { ++ return Err(ValidationError::IommuNotSupportedOnSegment( ++ self.pci_segment, ++ )); ++ } ++ } ++ } ++ ++ Ok(()) ++ } ++} ++ + impl PmemConfig { + pub fn parse(pmem: &str) -> Result<Self> { + let mut parser = OptionParser::new(); +@@ -1818,6 +1875,17 @@ impl VmConfig { + } + } + ++ if let Some(gpus) = &self.gpu { ++ if !gpus.is_empty() && !self.memory.shared { ++ return Err(ValidationError::VhostUserRequiresSharedMemory); ++ } ++ for gpu in gpus { ++ gpu.validate(self)?; ++ ++ Self::validate_identifier(&mut id_list, &gpu.id)?; ++ } ++ } ++ + if let Some(pmems) = &self.pmem { + for pmem in pmems { + pmem.validate(self)?; +@@ -2003,6 +2071,15 @@ impl VmConfig { + fs = Some(fs_config_list); + } + ++ let mut gpu: Option<Vec<GpuConfig>> = None; ++ if let Some(gpu_list) = &vm_params.gpu { ++ let mut gpu_config_list = Vec::new(); ++ for item in gpu_list.iter() { ++ gpu_config_list.push(GpuConfig::parse(item)?); ++ } ++ gpu = Some(gpu_config_list); ++ } ++ + let mut pmem: Option<Vec<PmemConfig>> = None; + if let Some(pmem_list) = &vm_params.pmem { + let mut pmem_config_list = Vec::new(); +@@ -2109,6 +2186,7 @@ impl VmConfig { + rng, + balloon, + fs, ++ gpu, + pmem, + serial, + console, +@@ -2516,6 +2594,21 @@ mod tests { + Ok(()) + } + ++ #[test] ++ fn test_parse_gpu() -> Result<()> { ++ // "socket" must be supplied ++ assert!(GpuConfig::parse("").is_err()); ++ assert_eq!( ++ GpuConfig::parse("socket=/tmp/sock")?, ++ GpuConfig { ++ socket: PathBuf::from("/tmp/sock"), ++ ..Default::default() ++ } ++ ); ++ ++ Ok(()) ++ } ++ + #[test] + fn test_pmem_parsing() -> Result<()> { + // Must always give a file and size +@@ -2750,6 +2843,7 @@ mod tests { + }, + balloon: None, + fs: None, ++ gpu: None, + pmem: None, + serial: ConsoleConfig { + file: None, +@@ -2927,6 +3021,15 @@ mod tests { + Err(ValidationError::VhostUserRequiresSharedMemory) + ); + ++ let mut invalid_config = valid_config.clone(); ++ invalid_config.gpu = Some(vec![GpuConfig { ++ ..Default::default() ++ }]); ++ assert_eq!( ++ invalid_config.validate(), ++ Err(ValidationError::VhostUserRequiresSharedMemory) ++ ); ++ + let mut still_valid_config = valid_config.clone(); + still_valid_config.memory.shared = true; + assert!(still_valid_config.validate().is_ok()); +diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs +index 76ca3775..1cec7c6a 100644 +--- a/vmm/src/device_manager.rs ++++ b/vmm/src/device_manager.rs +@@ -10,8 +10,8 @@ + // + + use crate::config::{ +- ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, +- VdpaConfig, VhostMode, VmConfig, VsockConfig, ++ ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, GpuConfig, NetConfig, PmemConfig, ++ UserDeviceConfig, VdpaConfig, VhostMode, VmConfig, VsockConfig, + }; + use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE}; + use crate::device_tree::{DeviceNode, DeviceTree}; +@@ -64,6 +64,7 @@ use serde::{Deserialize, Serialize}; + use std::collections::{BTreeSet, HashMap}; + use std::fs::{read_link, File, OpenOptions}; + use std::io::{self, stdout, Seek, SeekFrom}; ++use std::iter::once; + use std::mem::zeroed; + use std::num::Wrapping; + use std::os::unix::fs::OpenOptionsExt; +@@ -74,11 +75,13 @@ use std::sync::{Arc, Mutex}; + use std::time::Instant; + use tracer::trace_scoped; + use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; ++use virtio_bindings::virtio_gpu::virtio_gpu_shm_id_VIRTIO_GPU_SHM_ID_HOST_VISIBLE as VIRTIO_GPU_SHM_ID_HOST_VISIBLE; + use virtio_devices::transport::VirtioTransport; + use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator}; + use virtio_devices::vhost_user::VhostUserConfig; + use virtio_devices::{ + AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource, ++ VirtioSharedMemory, VirtioSharedMemoryList, + }; + use virtio_devices::{Endpoint, IommuMapping}; + use vm_allocator::{AddressAllocator, SystemAllocator}; +@@ -119,6 +122,7 @@ const CONSOLE_DEVICE_NAME: &str = "__console"; + // identifiers if the user doesn't give one + const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; + const FS_DEVICE_NAME_PREFIX: &str = "_fs"; ++const GPU_DEVICE_NAME_PREFIX: &str = "_gpu"; + const NET_DEVICE_NAME_PREFIX: &str = "_net"; + const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; + const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; +@@ -155,9 +159,15 @@ pub enum DeviceManagerError { + /// Cannot create virtio-fs device + CreateVirtioFs(virtio_devices::vhost_user::Error), + ++ /// Cannot create virtio-gpu device ++ CreateVirtioGpu(virtio_devices::vhost_user::Error), ++ + /// Virtio-fs device was created without a socket. + NoVirtioFsSock, + ++ /// Virtio-gpu device was created without a socket. ++ NoVirtioGpuSock, ++ + /// Cannot create vhost-user-blk device + CreateVhostUserBlk(virtio_devices::vhost_user::Error), + +@@ -242,6 +252,9 @@ pub enum DeviceManagerError { + /// Cannot find a memory range for virtio-fs + FsRangeAllocation, + ++ /// Cannot find a memory range for virtio-gpu ++ GpuRangeAllocation, ++ + /// Error creating serial output file + SerialOutputFileOpen(io::Error), + +@@ -2085,6 +2098,9 @@ impl DeviceManager { + // Add virtio-fs if required + devices.append(&mut self.make_virtio_fs_devices()?); + ++ // Add virtio-gpu if required ++ devices.append(&mut self.make_virtio_gpu_devices()?); ++ + // Add virtio-pmem if required + devices.append(&mut self.make_virtio_pmem_devices()?); + +@@ -2576,6 +2592,118 @@ impl DeviceManager { + Ok(devices) + } + ++ fn make_virtio_gpu_device( ++ &mut self, ++ gpu_cfg: &mut GpuConfig, ++ ) -> DeviceManagerResult<MetaVirtioDevice> { ++ let id = if let Some(id) = &gpu_cfg.id { ++ id.clone() ++ } else { ++ let id = self.next_device_name(GPU_DEVICE_NAME_PREFIX)?; ++ gpu_cfg.id = Some(id.clone()); ++ id ++ }; ++ ++ info!("Creating virtio-gpu device: {:?}", gpu_cfg); ++ ++ let mut node = device_node!(id); ++ ++ if let Some(gpu_socket) = gpu_cfg.socket.to_str() { ++ let cache_size = gpu_cfg.cache_size; ++ // In crosvm, the 8 GiB bar is 8 GiB-aligned. ++ let cache_base = self.pci_segments[gpu_cfg.pci_segment as usize] ++ .allocator ++ .lock() ++ .unwrap() ++ .allocate(None, cache_size as GuestUsize, Some(cache_size)) ++ .ok_or(DeviceManagerError::GpuRangeAllocation)? ++ .raw_value(); ++ ++ // Update the node with correct resource information. ++ node.resources.push(Resource::MmioAddressRange { ++ base: cache_base, ++ size: cache_size, ++ }); ++ ++ let mmap_region = MmapRegion::build( ++ None, ++ cache_size as usize, ++ libc::PROT_NONE, ++ libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, ++ ) ++ .map_err(DeviceManagerError::NewMmapRegion)?; ++ let host_addr: u64 = mmap_region.as_ptr() as u64; ++ ++ let mem_slot = self ++ .memory_manager ++ .lock() ++ .unwrap() ++ .create_userspace_mapping(cache_base, cache_size, host_addr, false, false, false) ++ .map_err(DeviceManagerError::MemoryManager)?; ++ ++ let region_list = once(( ++ VIRTIO_GPU_SHM_ID_HOST_VISIBLE as u8, ++ VirtioSharedMemory { ++ offset: 0, ++ len: cache_size, ++ }, ++ )) ++ .collect(); ++ ++ let cache = Some(( ++ VirtioSharedMemoryList { ++ host_addr, ++ mem_slot, ++ addr: GuestAddress(cache_base), ++ len: cache_size as GuestUsize, ++ region_list, ++ }, ++ mmap_region, ++ )); ++ ++ let virtio_gpu_device = Arc::new(Mutex::new( ++ virtio_devices::vhost_user::Gpu::new( ++ id.clone(), ++ gpu_socket, ++ cache, ++ self.seccomp_action.clone(), ++ self.exit_evt ++ .try_clone() ++ .map_err(DeviceManagerError::EventFd)?, ++ self.force_iommu, ++ ) ++ .map_err(DeviceManagerError::CreateVirtioGpu)?, ++ )); ++ ++ self.device_tree.lock().unwrap().insert(id.clone(), node); ++ ++ Ok(MetaVirtioDevice { ++ virtio_device: Arc::clone(&virtio_gpu_device) ++ as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, ++ iommu: false, ++ id, ++ pci_segment: gpu_cfg.pci_segment, ++ dma_handler: None, ++ }) ++ } else { ++ Err(DeviceManagerError::NoVirtioGpuSock) ++ } ++ } ++ ++ fn make_virtio_gpu_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { ++ let mut devices = Vec::new(); ++ ++ let mut gpu_devices = self.config.lock().unwrap().gpu.clone(); ++ if let Some(gpu_list_cfg) = &mut gpu_devices { ++ for gpu_cfg in gpu_list_cfg.iter_mut() { ++ devices.push(self.make_virtio_gpu_device(gpu_cfg)?); ++ } ++ } ++ self.config.lock().unwrap().gpu = gpu_devices; ++ ++ Ok(devices) ++ } ++ + fn make_virtio_pmem_device( + &mut self, + pmem_cfg: &mut PmemConfig, +@@ -4066,6 +4194,13 @@ impl DeviceManager { + self.hotplug_virtio_pci_device(device) + } + ++ pub fn add_gpu(&mut self, gpu_cfg: &mut GpuConfig) -> DeviceManagerResult<PciDeviceInfo> { ++ self.validate_identifier(&gpu_cfg.id)?; ++ ++ let device = self.make_virtio_gpu_device(gpu_cfg)?; ++ self.hotplug_virtio_pci_device(device) ++ } ++ + pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { + self.validate_identifier(&pmem_cfg.id)?; + +diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs +index a25cab35..6a1b4a82 100644 +--- a/vmm/src/lib.rs ++++ b/vmm/src/lib.rs +@@ -25,6 +25,7 @@ use crate::migration::{recv_vm_config, recv_vm_state}; + use crate::seccomp_filters::{get_seccomp_filter, Thread}; + use crate::vm::{Error as VmError, Vm, VmState}; + use anyhow::anyhow; ++use config::GpuConfig; + use libc::{EFD_NONBLOCK, SIGINT, SIGTERM}; + use memory_manager::MemoryManagerSnapshotData; + use pci::PciBdf; +@@ -1013,6 +1014,32 @@ impl Vmm { + } + } + ++ fn vm_add_gpu(&mut self, gpu_cfg: GpuConfig) -> result::Result<Option<Vec<u8>>, VmError> { ++ self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; ++ ++ { ++ // Validate the configuration change in a cloned configuration ++ let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); ++ add_to_config(&mut config.gpu, gpu_cfg.clone()); ++ config.validate().map_err(VmError::ConfigValidation)?; ++ } ++ ++ if let Some(ref mut vm) = self.vm { ++ let info = vm.add_gpu(gpu_cfg).map_err(|e| { ++ error!("Error when adding new gpu to the VM: {:?}", e); ++ e ++ })?; ++ serde_json::to_vec(&info) ++ .map(Some) ++ .map_err(VmError::SerializeJson) ++ } else { ++ // Update VmConfig by adding the new device. ++ let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); ++ add_to_config(&mut config.gpu, gpu_cfg); ++ Ok(None) ++ } ++ } ++ + fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> { + self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; + +@@ -1941,6 +1968,13 @@ impl Vmm { + .map(ApiResponsePayload::VmAction); + sender.send(response).map_err(Error::ApiResponseSend)?; + } ++ ApiRequest::VmAddGpu(add_gpu_data, sender) => { ++ let response = self ++ .vm_add_gpu(add_gpu_data.as_ref().clone()) ++ .map_err(ApiError::VmAddGpu) ++ .map(ApiResponsePayload::VmAction); ++ sender.send(response).map_err(Error::ApiResponseSend)?; ++ } + ApiRequest::VmAddPmem(add_pmem_data, sender) => { + let response = self + .vm_add_pmem(add_pmem_data.as_ref().clone()) +@@ -2106,6 +2140,7 @@ mod unit_tests { + }, + balloon: None, + fs: None, ++ gpu: None, + pmem: None, + serial: ConsoleConfig { + file: None, +@@ -2331,6 +2366,54 @@ mod unit_tests { + ); + } + ++ #[test] ++ fn test_vmm_vm_cold_add_gpu() { ++ let mut vmm = create_dummy_vmm(); ++ let gpu_config = GpuConfig::parse("socket=/tmp/sock").unwrap(); ++ ++ assert!(matches!( ++ vmm.vm_add_gpu(gpu_config.clone()), ++ Err(VmError::VmNotCreated) ++ )); ++ ++ let _ = vmm.vm_create(create_dummy_vm_config()); ++ assert!(vmm ++ .vm_config ++ .as_ref() ++ .unwrap() ++ .lock() ++ .unwrap() ++ .gpu ++ .is_none()); ++ ++ let result = vmm.vm_add_gpu(gpu_config.clone()); ++ assert!(result.is_ok()); ++ assert!(result.unwrap().is_none()); ++ assert_eq!( ++ vmm.vm_config ++ .as_ref() ++ .unwrap() ++ .lock() ++ .unwrap() ++ .gpu ++ .clone() ++ .unwrap() ++ .len(), ++ 1 ++ ); ++ assert_eq!( ++ vmm.vm_config ++ .as_ref() ++ .unwrap() ++ .lock() ++ .unwrap() ++ .gpu ++ .clone() ++ .unwrap()[0], ++ gpu_config ++ ); ++ } ++ + #[test] + fn test_vmm_vm_cold_add_pmem() { + let mut vmm = create_dummy_vmm(); +diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs +index 21417309..2e23a48a 100644 +--- a/vmm/src/vm.rs ++++ b/vmm/src/vm.rs +@@ -12,8 +12,8 @@ + // + + use crate::config::{ +- add_to_config, DeviceConfig, DiskConfig, FsConfig, HotplugMethod, NetConfig, PmemConfig, +- UserDeviceConfig, ValidationError, VdpaConfig, VmConfig, VsockConfig, ++ add_to_config, DeviceConfig, DiskConfig, FsConfig, GpuConfig, HotplugMethod, NetConfig, ++ PmemConfig, UserDeviceConfig, ValidationError, VdpaConfig, VmConfig, VsockConfig, + }; + use crate::config::{NumaConfig, PayloadConfig}; + #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] +@@ -1454,6 +1454,11 @@ impl Vm { + fs.retain(|dev| dev.id.as_ref() != Some(&id)); + } + ++ // Remove if gpu device ++ if let Some(gpu) = config.gpu.as_mut() { ++ gpu.retain(|dev| dev.id.as_ref() != Some(&id)); ++ } ++ + // Remove if net device + if let Some(net) = config.net.as_mut() { + net.retain(|dev| dev.id.as_ref() != Some(&id)); +@@ -1532,6 +1537,30 @@ impl Vm { + Ok(pci_device_info) + } + ++ pub fn add_gpu(&mut self, mut gpu_cfg: GpuConfig) -> Result<PciDeviceInfo> { ++ let pci_device_info = self ++ .device_manager ++ .lock() ++ .unwrap() ++ .add_gpu(&mut gpu_cfg) ++ .map_err(Error::DeviceManager)?; ++ ++ // Update VmConfig by adding the new device. This is important to ++ // ensure the device would be created in case of a reboot. ++ { ++ let mut config = self.config.lock().unwrap(); ++ add_to_config(&mut config.gpu, gpu_cfg); ++ } ++ ++ self.device_manager ++ .lock() ++ .unwrap() ++ .notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED) ++ .map_err(Error::DeviceManager)?; ++ ++ Ok(pci_device_info) ++ } ++ + pub fn add_pmem(&mut self, mut pmem_cfg: PmemConfig) -> Result<PciDeviceInfo> { + let pci_device_info = self + .device_manager +diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs +index ed7e4c10..f733979a 100644 +--- a/vmm/src/vm_config.rs ++++ b/vmm/src/vm_config.rs +@@ -411,6 +411,30 @@ impl Default for FsConfig { + } + } + ++#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] ++pub struct GpuConfig { ++ pub socket: PathBuf, ++ #[serde(default = "default_gpuconfig_cache_size")] ++ pub cache_size: u64, ++ pub id: Option<String>, ++ pub pci_segment: u16, ++} ++ ++impl Default for GpuConfig { ++ fn default() -> Self { ++ Self { ++ socket: PathBuf::new(), ++ cache_size: default_gpuconfig_cache_size(), ++ id: None, ++ pci_segment: 0, ++ } ++ } ++} ++ ++pub fn default_gpuconfig_cache_size() -> u64 { ++ 1 << 33 ++} ++ + #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] + pub struct PmemConfig { + pub file: PathBuf, +@@ -576,6 +600,7 @@ pub struct VmConfig { + pub rng: RngConfig, + pub balloon: Option<BalloonConfig>, + pub fs: Option<Vec<FsConfig>>, ++ pub gpu: Option<Vec<GpuConfig>>, + pub pmem: Option<Vec<PmemConfig>>, + #[serde(default = "default_serial")] + pub serial: ConsoleConfig, +-- +2.37.1 + diff --git a/pkgs/applications/virtualization/cloud-hypervisor/Cargo.lock b/pkgs/applications/virtualization/cloud-hypervisor/Cargo.lock index f4e667f7776..bc51e03bc87 100644 --- a/pkgs/applications/virtualization/cloud-hypervisor/Cargo.lock +++ b/pkgs/applications/virtualization/cloud-hypervisor/Cargo.lock @@ -1413,8 +1413,6 @@ dependencies = [ [[package]] name = "vhost" version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9b791c5b0717a0558888a4cf7240cea836f39a99cb342e12ce633dcaa078072" dependencies = [ "bitflags", "libc", @@ -1632,6 +1630,7 @@ dependencies = [ "vfio-ioctls", "vfio_user", "vhdx", + "virtio-bindings 0.2.0", "virtio-devices", "virtio-queue", "vm-allocator", diff --git a/pkgs/applications/virtualization/cloud-hypervisor/default.nix b/pkgs/applications/virtualization/cloud-hypervisor/default.nix index dc40b4d078d..19e8bbd6f0e 100644 --- a/pkgs/applications/virtualization/cloud-hypervisor/default.nix +++ b/pkgs/applications/virtualization/cloud-hypervisor/default.nix @@ -1,4 +1,6 @@ -{ lib, stdenv, fetchFromGitHub, rustPlatform, pkg-config, dtc, openssl }: +{ lib, stdenv, fetchFromGitHub, fetchCrate, rustPlatform, pkg-config +, dtc, openssl +}: rustPlatform.buildRustPackage rec { pname = "cloud-hypervisor"; @@ -13,6 +15,40 @@ rustPlatform.buildRustPackage rec { separateDebugInfo = true; + vhost = fetchCrate { + pname = "vhost"; + version = "0.6.0"; + sha256 = "cbB1MVYIqOY2voiZt8jsGMAt54oU62cDdJq2mtj/1BA="; + }; + + postUnpack = '' + mkdir -p vhost/crates + pushd vhost/crates + unpackFile ${vhost} + mv * vhost + chmod -R +w vhost + popd + ''; + + cargoPatches = [ + ./0001-build-use-local-vhost.patch + ./0002-virtio-devices-add-a-GPU-device.patch + ]; + + vhostPatches = [ + vhost/0001-vhost_user-add-shared-memory-region-support.patch + vhost/0002-devices-vhost-user-add-protocol-flag-for-shmem.patch + ]; + + postPatch = '' + pushd ../vhost/crates/vhost + for patch in $vhostPatches; do + echo applying patch $patch + patch -p1 < $patch + done + popd + ''; + nativeBuildInputs = [ pkg-config ]; buildInputs = [ openssl ] ++ lib.optional stdenv.isAarch64 dtc; diff --git a/pkgs/applications/virtualization/cloud-hypervisor/vhost/0001-vhost_user-add-shared-memory-region-support.patch b/pkgs/applications/virtualization/cloud-hypervisor/vhost/0001-vhost_user-add-shared-memory-region-support.patch new file mode 100644 index 00000000000..2729f88d9e5 --- /dev/null +++ b/pkgs/applications/virtualization/cloud-hypervisor/vhost/0001-vhost_user-add-shared-memory-region-support.patch @@ -0,0 +1,709 @@ +From ad7be9c6a962c1bc0c2dce6dc633e24805ad9e31 Mon Sep 17 00:00:00 2001 +From: David Stevens <stevensd@chromium.org> +Date: Wed, 15 Jun 2022 16:45:12 +0900 +Subject: [PATCH 1/2] vhost_user: add shared memory region support + +Add support for shared memory regions to vhost-user. This is adding +support for a front-end message to query for necessary shared memory +regions plus back-end message to support mapping/unmapping files from +the shared memory region. + +go/vvu-shared-memory + +BUG=b:201745804 +TEST=compiles + +Change-Id: I35c5d260ee09175b68f6778b81883e0070ee0265 +Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/3716344 +Reviewed-by: Keiichi Watanabe <keiichiw@chromium.org> +Commit-Queue: David Stevens <stevensd@chromium.org> +Reviewed-by: Alexandre Courbot <acourbot@chromium.org> +Tested-by: kokoro <noreply+kokoro@google.com> +(cherry-picked from commit f436e2706011fa5f34dc415972434aa3299ebc43) +Signed-off-by: Alyssa Ross <alyssa.ross@unikie.com> +--- + src/vhost_user/dummy_slave.rs | 4 + + src/vhost_user/master.rs | 25 +++++ + src/vhost_user/master_req_handler.rs | 66 ++++++++++--- + src/vhost_user/message.rs | 140 +++++++++++++++++++++++++-- + src/vhost_user/mod.rs | 2 +- + src/vhost_user/slave_fs_cache.rs | 63 +++++++----- + src/vhost_user/slave_req_handler.rs | 27 +++++- + 7 files changed, 277 insertions(+), 50 deletions(-) + +diff --git a/src/vhost_user/dummy_slave.rs b/src/vhost_user/dummy_slave.rs +index ae728a0..00a1ae8 100644 +--- a/src/vhost_user/dummy_slave.rs ++++ b/src/vhost_user/dummy_slave.rs +@@ -291,4 +291,8 @@ impl VhostUserSlaveReqHandlerMut for DummySlaveReqHandler { + fn remove_mem_region(&mut self, _region: &VhostUserSingleMemoryRegion) -> Result<()> { + Ok(()) + } ++ ++ fn get_shared_memory_regions(&mut self) -> Result<Vec<VhostSharedMemoryRegion>> { ++ Ok(Vec::new()) ++ } + } +diff --git a/src/vhost_user/master.rs b/src/vhost_user/master.rs +index 87fef33..deab6a7 100644 +--- a/src/vhost_user/master.rs ++++ b/src/vhost_user/master.rs +@@ -72,6 +72,9 @@ pub trait VhostUserMaster: VhostBackend { + + /// Remove a guest memory mapping from vhost. + fn remove_mem_region(&mut self, region: &VhostUserMemoryRegionInfo) -> Result<()>; ++ ++ /// Gets the shared memory regions used by the device. ++ fn get_shared_memory_regions(&self) -> Result<Vec<VhostSharedMemoryRegion>>; + } + + fn error_code<T>(err: VhostUserError) -> Result<T> { +@@ -527,6 +530,28 @@ impl VhostUserMaster for Master { + let hdr = node.send_request_with_body(MasterReq::REM_MEM_REG, &body, None)?; + node.wait_for_ack(&hdr).map_err(|e| e.into()) + } ++ ++ fn get_shared_memory_regions(&self) -> Result<Vec<VhostSharedMemoryRegion>> { ++ let mut node = self.node(); ++ let hdr = node.send_request_header(MasterReq::GET_SHARED_MEMORY_REGIONS, None)?; ++ let (body_reply, buf_reply, rfds) = node.recv_reply_with_payload::<VhostUserU64>(&hdr)?; ++ let struct_size = mem::size_of::<VhostSharedMemoryRegion>(); ++ if rfds.is_some() || buf_reply.len() != body_reply.value as usize * struct_size { ++ return error_code(VhostUserError::InvalidMessage); ++ } ++ let mut regions = Vec::new(); ++ let mut offset = 0; ++ for _ in 0..body_reply.value { ++ regions.push( ++ // Can't fail because the input is the correct size. ++ VhostSharedMemoryRegion::from_slice(&buf_reply[offset..(offset + struct_size)]) ++ .unwrap() ++ .clone(), ++ ); ++ offset += struct_size; ++ } ++ Ok(regions) ++ } + } + + impl AsRawFd for Master { +diff --git a/src/vhost_user/master_req_handler.rs b/src/vhost_user/master_req_handler.rs +index 0ecda4e..54cc280 100644 +--- a/src/vhost_user/master_req_handler.rs ++++ b/src/vhost_user/master_req_handler.rs +@@ -17,7 +17,7 @@ use super::{Error, HandlerResult, Result}; + /// request services from masters. The [VhostUserMasterReqHandler] trait defines services provided + /// by masters, and it's used both on the master side and slave side. + /// - on the slave side, a stub forwarder implementing [VhostUserMasterReqHandler] will proxy +-/// service requests to masters. The [SlaveFsCacheReq] is an example stub forwarder. ++/// service requests to masters. The [Slave] is an example stub forwarder. + /// - on the master side, the [MasterReqHandler] will forward service requests to a handler + /// implementing [VhostUserMasterReqHandler]. + /// +@@ -26,13 +26,23 @@ use super::{Error, HandlerResult, Result}; + /// + /// [VhostUserMasterReqHandler]: trait.VhostUserMasterReqHandler.html + /// [MasterReqHandler]: struct.MasterReqHandler.html +-/// [SlaveFsCacheReq]: struct.SlaveFsCacheReq.html ++/// [Slave]: struct.Slave.html + pub trait VhostUserMasterReqHandler { + /// Handle device configuration change notifications. + fn handle_config_change(&self) -> HandlerResult<u64> { + Err(std::io::Error::from_raw_os_error(libc::ENOSYS)) + } + ++ /// Handle shared memory region mapping requests. ++ fn shmem_map(&self, _req: &VhostUserShmemMapMsg, _fd: &dyn AsRawFd) -> HandlerResult<u64> { ++ Err(std::io::Error::from_raw_os_error(libc::ENOSYS)) ++ } ++ ++ /// Handle shared memory region unmapping requests. ++ fn shmem_unmap(&self, _req: &VhostUserShmemUnmapMsg) -> HandlerResult<u64> { ++ Err(std::io::Error::from_raw_os_error(libc::ENOSYS)) ++ } ++ + /// Handle virtio-fs map file requests. + fn fs_slave_map(&self, _fs: &VhostUserFSSlaveMsg, _fd: &dyn AsRawFd) -> HandlerResult<u64> { + Err(std::io::Error::from_raw_os_error(libc::ENOSYS)) +@@ -66,6 +76,16 @@ pub trait VhostUserMasterReqHandlerMut { + Err(std::io::Error::from_raw_os_error(libc::ENOSYS)) + } + ++ /// Handle shared memory region mapping requests. ++ fn shmem_map(&mut self, _req: &VhostUserShmemMapMsg, _fd: &dyn AsRawFd) -> HandlerResult<u64> { ++ Err(std::io::Error::from_raw_os_error(libc::ENOSYS)) ++ } ++ ++ /// Handle shared memory region unmapping requests. ++ fn shmem_unmap(&mut self, _req: &VhostUserShmemUnmapMsg) -> HandlerResult<u64> { ++ Err(std::io::Error::from_raw_os_error(libc::ENOSYS)) ++ } ++ + /// Handle virtio-fs map file requests. + fn fs_slave_map(&mut self, _fs: &VhostUserFSSlaveMsg, _fd: &dyn AsRawFd) -> HandlerResult<u64> { + Err(std::io::Error::from_raw_os_error(libc::ENOSYS)) +@@ -95,6 +115,14 @@ impl<S: VhostUserMasterReqHandlerMut> VhostUserMasterReqHandler for Mutex<S> { + self.lock().unwrap().handle_config_change() + } + ++ fn shmem_map(&self, req: &VhostUserShmemMapMsg, fd: &dyn AsRawFd) -> HandlerResult<u64> { ++ self.lock().unwrap().shmem_map(req, fd) ++ } ++ ++ fn shmem_unmap(&self, req: &VhostUserShmemUnmapMsg) -> HandlerResult<u64> { ++ self.lock().unwrap().shmem_unmap(req) ++ } ++ + fn fs_slave_map(&self, fs: &VhostUserFSSlaveMsg, fd: &dyn AsRawFd) -> HandlerResult<u64> { + self.lock().unwrap().fs_slave_map(fs, fd) + } +@@ -222,6 +250,19 @@ impl<S: VhostUserMasterReqHandler> MasterReqHandler<S> { + .handle_config_change() + .map_err(Error::ReqHandlerError) + } ++ SlaveReq::SHMEM_MAP => { ++ let msg = self.extract_msg_body::<VhostUserShmemMapMsg>(&hdr, size, &buf)?; ++ // check_attached_files() has validated files ++ self.backend ++ .shmem_map(&msg, &files.unwrap()[0]) ++ .map_err(Error::ReqHandlerError) ++ } ++ SlaveReq::SHMEM_UNMAP => { ++ let msg = self.extract_msg_body::<VhostUserShmemUnmapMsg>(&hdr, size, &buf)?; ++ self.backend ++ .shmem_unmap(&msg) ++ .map_err(Error::ReqHandlerError) ++ } + SlaveReq::FS_MAP => { + let msg = self.extract_msg_body::<VhostUserFSSlaveMsg>(&hdr, size, &buf)?; + // check_attached_files() has validated files +@@ -251,7 +292,7 @@ impl<S: VhostUserMasterReqHandler> MasterReqHandler<S> { + _ => Err(Error::InvalidMessage), + }; + +- self.send_ack_message(&hdr, &res)?; ++ self.send_reply(&hdr, &res)?; + + res + } +@@ -285,7 +326,7 @@ impl<S: VhostUserMasterReqHandler> MasterReqHandler<S> { + files: &Option<Vec<File>>, + ) -> Result<()> { + match hdr.get_code() { +- SlaveReq::FS_MAP | SlaveReq::FS_IO => { ++ SlaveReq::SHMEM_MAP | SlaveReq::FS_MAP | SlaveReq::FS_IO => { + // Expect a single file is passed. + match files { + Some(files) if files.len() == 1 => Ok(()), +@@ -326,12 +367,11 @@ impl<S: VhostUserMasterReqHandler> MasterReqHandler<S> { + )) + } + +- fn send_ack_message( +- &mut self, +- req: &VhostUserMsgHeader<SlaveReq>, +- res: &Result<u64>, +- ) -> Result<()> { +- if self.reply_ack_negotiated && req.is_need_reply() { ++ fn send_reply(&mut self, req: &VhostUserMsgHeader<SlaveReq>, res: &Result<u64>) -> Result<()> { ++ if req.get_code() == SlaveReq::SHMEM_MAP ++ || req.get_code() == SlaveReq::SHMEM_UNMAP ++ || (self.reply_ack_negotiated && req.is_need_reply()) ++ { + let hdr = self.new_reply_header::<VhostUserU64>(req)?; + let def_err = libc::EINVAL; + let val = match res { +@@ -362,7 +402,7 @@ mod tests { + use super::*; + + #[cfg(feature = "vhost-user-slave")] +- use crate::vhost_user::SlaveFsCacheReq; ++ use crate::vhost_user::Slave; + #[cfg(feature = "vhost-user-slave")] + use std::os::unix::io::FromRawFd; + +@@ -410,7 +450,7 @@ mod tests { + panic!("failed to duplicated tx fd!"); + } + let stream = unsafe { UnixStream::from_raw_fd(fd) }; +- let fs_cache = SlaveFsCacheReq::from_stream(stream); ++ let fs_cache = Slave::from_stream(stream); + + std::thread::spawn(move || { + let res = handler.handle_request().unwrap(); +@@ -440,7 +480,7 @@ mod tests { + panic!("failed to duplicated tx fd!"); + } + let stream = unsafe { UnixStream::from_raw_fd(fd) }; +- let fs_cache = SlaveFsCacheReq::from_stream(stream); ++ let fs_cache = Slave::from_stream(stream); + + std::thread::spawn(move || { + let res = handler.handle_request().unwrap(); +diff --git a/src/vhost_user/message.rs b/src/vhost_user/message.rs +index 6ccf926..adb485b 100644 +--- a/src/vhost_user/message.rs ++++ b/src/vhost_user/message.rs +@@ -139,8 +139,10 @@ pub enum MasterReq { + /// Query the backend for its device status as defined in the VIRTIO + /// specification. + GET_STATUS = 40, ++ /// Get a list of the device's shared memory regions. ++ GET_SHARED_MEMORY_REGIONS = 41, + /// Upper bound of valid commands. +- MAX_CMD = 41, ++ MAX_CMD = 42, + } + + impl From<MasterReq> for u32 { +@@ -171,16 +173,20 @@ pub enum SlaveReq { + VRING_CALL = 4, + /// Indicate that an error occurred on the specific vring. + VRING_ERR = 5, ++ /// Indicates a request to map a fd into a shared memory region. ++ SHMEM_MAP = 6, ++ /// Indicates a request to unmap part of a shared memory region. ++ SHMEM_UNMAP = 7, + /// Virtio-fs draft: map file content into the window. +- FS_MAP = 6, ++ FS_MAP = 8, + /// Virtio-fs draft: unmap file content from the window. +- FS_UNMAP = 7, ++ FS_UNMAP = 9, + /// Virtio-fs draft: sync file content. +- FS_SYNC = 8, ++ FS_SYNC = 10, + /// Virtio-fs draft: perform a read/write from an fd directly to GPA. +- FS_IO = 9, ++ FS_IO = 11, + /// Upper bound of valid commands. +- MAX_CMD = 10, ++ MAX_CMD = 12, + } + + impl From<SlaveReq> for u32 { +@@ -817,7 +823,7 @@ pub const VHOST_USER_FS_SLAVE_ENTRIES: usize = 8; + + /// Slave request message to update the MMIO window. + #[repr(packed)] +-#[derive(Default)] ++#[derive(Clone, Copy, Default)] + pub struct VhostUserFSSlaveMsg { + /// File offset. + pub fd_offset: [u64; VHOST_USER_FS_SLAVE_ENTRIES], +@@ -828,6 +834,8 @@ pub struct VhostUserFSSlaveMsg { + /// Flags for the mmap operation + pub flags: [VhostUserFSSlaveMsgFlags; VHOST_USER_FS_SLAVE_ENTRIES], + } ++// Safe because it only has data and has no implicit padding. ++unsafe impl ByteValued for VhostUserFSSlaveMsg {} + + impl VhostUserMsgValidator for VhostUserFSSlaveMsg { + fn is_valid(&self) -> bool { +@@ -843,6 +851,99 @@ impl VhostUserMsgValidator for VhostUserFSSlaveMsg { + } + } + ++bitflags! { ++ #[derive(Default)] ++ /// Flags for SHMEM_MAP messages. ++ pub struct VhostUserShmemMapMsgFlags: u8 { ++ /// Empty permission. ++ const EMPTY = 0x0; ++ /// Read permission. ++ const MAP_R = 0x1; ++ /// Write permission. ++ const MAP_W = 0x2; ++ } ++} ++ ++/// Slave request message to map a file into a shared memory region. ++#[repr(C, packed)] ++#[derive(Default, Copy, Clone)] ++pub struct VhostUserShmemMapMsg { ++ /// Flags for the mmap operation ++ pub flags: VhostUserShmemMapMsgFlags, ++ /// Shared memory region id. ++ pub shmid: u8, ++ padding: [u8; 6], ++ /// Offset into the shared memory region. ++ pub shm_offset: u64, ++ /// File offset. ++ pub fd_offset: u64, ++ /// Size of region to map. ++ pub len: u64, ++} ++// Safe because it only has data and has no implicit padding. ++unsafe impl ByteValued for VhostUserShmemMapMsg {} ++ ++impl VhostUserMsgValidator for VhostUserShmemMapMsg { ++ fn is_valid(&self) -> bool { ++ (self.flags.bits() & !VhostUserFSSlaveMsgFlags::all().bits() as u8) == 0 ++ && self.fd_offset.checked_add(self.len).is_some() ++ && self.shm_offset.checked_add(self.len).is_some() ++ } ++} ++ ++impl VhostUserShmemMapMsg { ++ /// New instance of VhostUserShmemMapMsg struct ++ pub fn new( ++ shmid: u8, ++ shm_offset: u64, ++ fd_offset: u64, ++ len: u64, ++ flags: VhostUserShmemMapMsgFlags, ++ ) -> Self { ++ Self { ++ flags, ++ shmid, ++ padding: [0; 6], ++ shm_offset, ++ fd_offset, ++ len, ++ } ++ } ++} ++ ++/// Slave request message to unmap part of a shared memory region. ++#[repr(C, packed)] ++#[derive(Default, Copy, Clone)] ++pub struct VhostUserShmemUnmapMsg { ++ /// Shared memory region id. ++ pub shmid: u8, ++ padding: [u8; 7], ++ /// Offset into the shared memory region. ++ pub shm_offset: u64, ++ /// Size of region to unmap. ++ pub len: u64, ++} ++// Safe because it only has data and has no implicit padding. ++unsafe impl ByteValued for VhostUserShmemUnmapMsg {} ++ ++impl VhostUserMsgValidator for VhostUserShmemUnmapMsg { ++ fn is_valid(&self) -> bool { ++ self.shm_offset.checked_add(self.len).is_some() ++ } ++} ++ ++impl VhostUserShmemUnmapMsg { ++ /// New instance of VhostUserShmemUnmapMsg struct ++ pub fn new(shmid: u8, shm_offset: u64, len: u64) -> Self { ++ Self { ++ shmid, ++ padding: [0; 7], ++ shm_offset, ++ len, ++ } ++ } ++} ++ + /// Inflight I/O descriptor state for split virtqueues + #[repr(packed)] + #[derive(Clone, Copy, Default)] +@@ -974,6 +1075,31 @@ impl QueueRegionPacked { + } + } + ++/// Virtio shared memory descriptor. ++#[repr(packed)] ++#[derive(Default, Copy, Clone)] ++pub struct VhostSharedMemoryRegion { ++ /// The shared memory region's shmid. ++ pub id: u8, ++ /// Padding ++ padding: [u8; 7], ++ /// The length of the shared memory region. ++ pub length: u64, ++} ++// Safe because it only has data and has no implicit padding. ++unsafe impl ByteValued for VhostSharedMemoryRegion {} ++ ++impl VhostSharedMemoryRegion { ++ /// New instance of VhostSharedMemoryRegion struct ++ pub fn new(id: u8, length: u64) -> Self { ++ VhostSharedMemoryRegion { ++ id, ++ padding: [0; 7], ++ length, ++ } ++ } ++} ++ + #[cfg(test)] + mod tests { + use super::*; +diff --git a/src/vhost_user/mod.rs b/src/vhost_user/mod.rs +index ff583b9..18a4bf2 100644 +--- a/src/vhost_user/mod.rs ++++ b/src/vhost_user/mod.rs +@@ -51,7 +51,7 @@ pub use self::slave_req_handler::{ + #[cfg(feature = "vhost-user-slave")] + mod slave_fs_cache; + #[cfg(feature = "vhost-user-slave")] +-pub use self::slave_fs_cache::SlaveFsCacheReq; ++pub use self::slave_fs_cache::Slave; + + /// Errors for vhost-user operations + #[derive(Debug)] +diff --git a/src/vhost_user/slave_fs_cache.rs b/src/vhost_user/slave_fs_cache.rs +index e9ad7cf..6811f1c 100644 +--- a/src/vhost_user/slave_fs_cache.rs ++++ b/src/vhost_user/slave_fs_cache.rs +@@ -7,11 +7,13 @@ use std::os::unix::io::{AsRawFd, RawFd}; + use std::os::unix::net::UnixStream; + use std::sync::{Arc, Mutex, MutexGuard}; + ++use vm_memory::ByteValued; ++ + use super::connection::Endpoint; + use super::message::*; + use super::{Error, HandlerResult, Result, VhostUserMasterReqHandler}; + +-struct SlaveFsCacheReqInternal { ++struct SlaveInternal { + sock: Endpoint<SlaveReq>, + + // Protocol feature VHOST_USER_PROTOCOL_F_REPLY_ACK has been negotiated. +@@ -21,7 +23,7 @@ struct SlaveFsCacheReqInternal { + error: Option<i32>, + } + +-impl SlaveFsCacheReqInternal { ++impl SlaveInternal { + fn check_state(&self) -> Result<u64> { + match self.error { + Some(e) => Err(Error::SocketBroken(std::io::Error::from_raw_os_error(e))), +@@ -29,27 +31,30 @@ impl SlaveFsCacheReqInternal { + } + } + +- fn send_message( ++ fn send_message<T: ByteValued>( + &mut self, + request: SlaveReq, +- fs: &VhostUserFSSlaveMsg, ++ msg: &T, + fds: Option<&[RawFd]>, + ) -> Result<u64> { + self.check_state()?; + +- let len = mem::size_of::<VhostUserFSSlaveMsg>(); ++ let len = mem::size_of::<T>(); + let mut hdr = VhostUserMsgHeader::new(request, 0, len as u32); + if self.reply_ack_negotiated { + hdr.set_need_reply(true); + } +- self.sock.send_message(&hdr, fs, fds)?; ++ self.sock.send_message(&hdr, msg, fds)?; + +- self.wait_for_ack(&hdr) ++ self.wait_for_reply(&hdr) + } + +- fn wait_for_ack(&mut self, hdr: &VhostUserMsgHeader<SlaveReq>) -> Result<u64> { ++ fn wait_for_reply(&mut self, hdr: &VhostUserMsgHeader<SlaveReq>) -> Result<u64> { + self.check_state()?; +- if !self.reply_ack_negotiated { ++ if hdr.get_code() != SlaveReq::SHMEM_MAP ++ && hdr.get_code() != SlaveReq::SHMEM_UNMAP ++ && !self.reply_ack_negotiated ++ { + return Ok(0); + } + +@@ -68,22 +73,22 @@ impl SlaveFsCacheReqInternal { + /// Request proxy to send vhost-user-fs slave requests to the master through the slave + /// communication channel. + /// +-/// The [SlaveFsCacheReq] acts as a message proxy to forward vhost-user-fs slave requests to the ++/// The [Slave] acts as a message proxy to forward vhost-user-fs slave requests to the + /// master through the vhost-user slave communication channel. The forwarded messages will be + /// handled by the [MasterReqHandler] server. + /// +-/// [SlaveFsCacheReq]: struct.SlaveFsCacheReq.html ++/// [Slave]: struct.Slave.html + /// [MasterReqHandler]: struct.MasterReqHandler.html + #[derive(Clone)] +-pub struct SlaveFsCacheReq { ++pub struct Slave { + // underlying Unix domain socket for communication +- node: Arc<Mutex<SlaveFsCacheReqInternal>>, ++ node: Arc<Mutex<SlaveInternal>>, + } + +-impl SlaveFsCacheReq { ++impl Slave { + fn new(ep: Endpoint<SlaveReq>) -> Self { +- SlaveFsCacheReq { +- node: Arc::new(Mutex::new(SlaveFsCacheReqInternal { ++ Slave { ++ node: Arc::new(Mutex::new(SlaveInternal { + sock: ep, + reply_ack_negotiated: false, + error: None, +@@ -91,18 +96,18 @@ impl SlaveFsCacheReq { + } + } + +- fn node(&self) -> MutexGuard<SlaveFsCacheReqInternal> { ++ fn node(&self) -> MutexGuard<SlaveInternal> { + self.node.lock().unwrap() + } + +- fn send_message( ++ fn send_message<T: ByteValued>( + &self, + request: SlaveReq, +- fs: &VhostUserFSSlaveMsg, ++ msg: &T, + fds: Option<&[RawFd]>, + ) -> io::Result<u64> { + self.node() +- .send_message(request, fs, fds) ++ .send_message(request, msg, fds) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("{}", e))) + } + +@@ -126,7 +131,17 @@ impl SlaveFsCacheReq { + } + } + +-impl VhostUserMasterReqHandler for SlaveFsCacheReq { ++impl VhostUserMasterReqHandler for Slave { ++ /// Handle shared memory region mapping requests. ++ fn shmem_map(&self, req: &VhostUserShmemMapMsg, fd: &dyn AsRawFd) -> HandlerResult<u64> { ++ self.send_message(SlaveReq::SHMEM_MAP, req, Some(&[fd.as_raw_fd()])) ++ } ++ ++ /// Handle shared memory region unmapping requests. ++ fn shmem_unmap(&self, req: &VhostUserShmemUnmapMsg) -> HandlerResult<u64> { ++ self.send_message(SlaveReq::SHMEM_UNMAP, req, None) ++ } ++ + /// Forward vhost-user-fs map file requests to the slave. + fn fs_slave_map(&self, fs: &VhostUserFSSlaveMsg, fd: &dyn AsRawFd) -> HandlerResult<u64> { + self.send_message(SlaveReq::FS_MAP, fs, Some(&[fd.as_raw_fd()])) +@@ -147,7 +162,7 @@ mod tests { + #[test] + fn test_slave_fs_cache_req_set_failed() { + let (p1, _p2) = UnixStream::pair().unwrap(); +- let fs_cache = SlaveFsCacheReq::from_stream(p1); ++ let fs_cache = Slave::from_stream(p1); + + assert!(fs_cache.node().error.is_none()); + fs_cache.set_failed(libc::EAGAIN); +@@ -157,7 +172,7 @@ mod tests { + #[test] + fn test_slave_fs_cache_send_failure() { + let (p1, p2) = UnixStream::pair().unwrap(); +- let fs_cache = SlaveFsCacheReq::from_stream(p1); ++ let fs_cache = Slave::from_stream(p1); + + fs_cache.set_failed(libc::ECONNRESET); + fs_cache +@@ -172,7 +187,7 @@ mod tests { + #[test] + fn test_slave_fs_cache_recv_negative() { + let (p1, p2) = UnixStream::pair().unwrap(); +- let fs_cache = SlaveFsCacheReq::from_stream(p1); ++ let fs_cache = Slave::from_stream(p1); + let mut master = Endpoint::<SlaveReq>::from_stream(p2); + + let len = mem::size_of::<VhostUserFSSlaveMsg>(); +diff --git a/src/vhost_user/slave_req_handler.rs b/src/vhost_user/slave_req_handler.rs +index b6f01de..f729e9d 100644 +--- a/src/vhost_user/slave_req_handler.rs ++++ b/src/vhost_user/slave_req_handler.rs +@@ -8,9 +8,11 @@ use std::os::unix::net::UnixStream; + use std::slice; + use std::sync::{Arc, Mutex}; + ++use vm_memory::ByteValued; ++ + use super::connection::Endpoint; + use super::message::*; +-use super::slave_fs_cache::SlaveFsCacheReq; ++use super::slave_fs_cache::Slave; + use super::{take_single_file, Error, Result}; + + /// Services provided to the master by the slave with interior mutability. +@@ -62,12 +64,13 @@ pub trait VhostUserSlaveReqHandler { + fn set_vring_enable(&self, index: u32, enable: bool) -> Result<()>; + fn get_config(&self, offset: u32, size: u32, flags: VhostUserConfigFlags) -> Result<Vec<u8>>; + fn set_config(&self, offset: u32, buf: &[u8], flags: VhostUserConfigFlags) -> Result<()>; +- fn set_slave_req_fd(&self, _vu_req: SlaveFsCacheReq) {} ++ fn set_slave_req_fd(&self, _vu_req: Slave) {} + fn get_inflight_fd(&self, inflight: &VhostUserInflight) -> Result<(VhostUserInflight, File)>; + fn set_inflight_fd(&self, inflight: &VhostUserInflight, file: File) -> Result<()>; + fn get_max_mem_slots(&self) -> Result<u64>; + fn add_mem_region(&self, region: &VhostUserSingleMemoryRegion, fd: File) -> Result<()>; + fn remove_mem_region(&self, region: &VhostUserSingleMemoryRegion) -> Result<()>; ++ fn get_shared_memory_regions(&self) -> Result<Vec<VhostSharedMemoryRegion>>; + } + + /// Services provided to the master by the slave without interior mutability. +@@ -107,7 +110,7 @@ pub trait VhostUserSlaveReqHandlerMut { + flags: VhostUserConfigFlags, + ) -> Result<Vec<u8>>; + fn set_config(&mut self, offset: u32, buf: &[u8], flags: VhostUserConfigFlags) -> Result<()>; +- fn set_slave_req_fd(&mut self, _vu_req: SlaveFsCacheReq) {} ++ fn set_slave_req_fd(&mut self, _vu_req: Slave) {} + fn get_inflight_fd( + &mut self, + inflight: &VhostUserInflight, +@@ -116,6 +119,7 @@ pub trait VhostUserSlaveReqHandlerMut { + fn get_max_mem_slots(&mut self) -> Result<u64>; + fn add_mem_region(&mut self, region: &VhostUserSingleMemoryRegion, fd: File) -> Result<()>; + fn remove_mem_region(&mut self, region: &VhostUserSingleMemoryRegion) -> Result<()>; ++ fn get_shared_memory_regions(&mut self) -> Result<Vec<VhostSharedMemoryRegion>>; + } + + impl<T: VhostUserSlaveReqHandlerMut> VhostUserSlaveReqHandler for Mutex<T> { +@@ -201,7 +205,7 @@ impl<T: VhostUserSlaveReqHandlerMut> VhostUserSlaveReqHandler for Mutex<T> { + self.lock().unwrap().set_config(offset, buf, flags) + } + +- fn set_slave_req_fd(&self, vu_req: SlaveFsCacheReq) { ++ fn set_slave_req_fd(&self, vu_req: Slave) { + self.lock().unwrap().set_slave_req_fd(vu_req) + } + +@@ -224,6 +228,10 @@ impl<T: VhostUserSlaveReqHandlerMut> VhostUserSlaveReqHandler for Mutex<T> { + fn remove_mem_region(&self, region: &VhostUserSingleMemoryRegion) -> Result<()> { + self.lock().unwrap().remove_mem_region(region) + } ++ ++ fn get_shared_memory_regions(&self) -> Result<Vec<VhostSharedMemoryRegion>> { ++ self.lock().unwrap().get_shared_memory_regions() ++ } + } + + /// Server to handle service requests from masters from the master communication channel. +@@ -509,6 +517,15 @@ impl<S: VhostUserSlaveReqHandler> SlaveReqHandler<S> { + let res = self.backend.remove_mem_region(&msg); + self.send_ack_message(&hdr, res)?; + } ++ MasterReq::GET_SHARED_MEMORY_REGIONS => { ++ let regions = self.backend.get_shared_memory_regions()?; ++ let mut buf = Vec::new(); ++ let msg = VhostUserU64::new(regions.len() as u64); ++ for r in regions { ++ buf.extend_from_slice(r.as_slice()) ++ } ++ self.send_reply_with_payload(&hdr, &msg, buf.as_slice())?; ++ } + _ => { + return Err(Error::InvalidMessage); + } +@@ -622,7 +639,7 @@ impl<S: VhostUserSlaveReqHandler> SlaveReqHandler<S> { + fn set_slave_req_fd(&mut self, files: Option<Vec<File>>) -> Result<()> { + let file = take_single_file(files).ok_or(Error::InvalidMessage)?; + let sock = unsafe { UnixStream::from_raw_fd(file.into_raw_fd()) }; +- let vu_req = SlaveFsCacheReq::from_stream(sock); ++ let vu_req = Slave::from_stream(sock); + self.backend.set_slave_req_fd(vu_req); + Ok(()) + } +-- +2.37.1 + diff --git a/pkgs/applications/virtualization/cloud-hypervisor/vhost/0002-devices-vhost-user-add-protocol-flag-for-shmem.patch b/pkgs/applications/virtualization/cloud-hypervisor/vhost/0002-devices-vhost-user-add-protocol-flag-for-shmem.patch new file mode 100644 index 00000000000..87e29939715 --- /dev/null +++ b/pkgs/applications/virtualization/cloud-hypervisor/vhost/0002-devices-vhost-user-add-protocol-flag-for-shmem.patch @@ -0,0 +1,55 @@ +From ee17b58f30e65a37a0526a4df60f9810fa19b138 Mon Sep 17 00:00:00 2001 +From: David Stevens <stevensd@chromium.org> +Date: Thu, 13 Oct 2022 10:37:47 +0900 +Subject: [PATCH 2/2] devices: vhost-user: add protocol flag for shmem + +Add a vhost protocol feature flag for shared memory region support. This +is necessary to avoid sending the GET_SHARED_MEMORY_REGIONS message to +backends which don't support it. + +BUG=b:252901073 +TEST=crosvm device wl + +Change-Id: I044926e982526c3c76063b5386cab0db72524707 +Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/3951472 +Reviewed-by: Daniel Verkamp <dverkamp@chromium.org> +Commit-Queue: David Stevens <stevensd@chromium.org> +(cherry-picked from commit 60aa43629ae9be2cc3df37c648ab7e0e5ff2172c) +Signed-off-by: Alyssa Ross <hi@alyssa.is> +--- + src/vhost_user/master.rs | 5 +++++ + src/vhost_user/message.rs | 2 ++ + 2 files changed, 7 insertions(+) + +diff --git a/src/vhost_user/master.rs b/src/vhost_user/master.rs +index deab6a7..2bbf8d6 100644 +--- a/src/vhost_user/master.rs ++++ b/src/vhost_user/master.rs +@@ -357,6 +357,11 @@ impl VhostUserMaster for Master { + fn set_protocol_features(&mut self, features: VhostUserProtocolFeatures) -> Result<()> { + let mut node = self.node(); + node.check_feature(VhostUserVirtioFeatures::PROTOCOL_FEATURES)?; ++ if features.contains(VhostUserProtocolFeatures::SHARED_MEMORY_REGIONS) ++ && !features.contains(VhostUserProtocolFeatures::SLAVE_REQ) ++ { ++ return error_code(VhostUserError::FeatureMismatch); ++ } + let val = VhostUserU64::new(features.bits()); + let hdr = node.send_request_with_body(MasterReq::SET_PROTOCOL_FEATURES, &val, None)?; + // Don't wait for ACK here because the protocol feature negotiation process hasn't been +diff --git a/src/vhost_user/message.rs b/src/vhost_user/message.rs +index adb485b..09362fb 100644 +--- a/src/vhost_user/message.rs ++++ b/src/vhost_user/message.rs +@@ -417,6 +417,8 @@ bitflags! { + const CONFIGURE_MEM_SLOTS = 0x0000_8000; + /// Support reporting status. + const STATUS = 0x0001_0000; ++ /// Support shared memory regions. ++ const SHARED_MEMORY_REGIONS = 0x0002_0000; + } + } + +-- +2.37.1 + |