diff options
-rw-r--r-- | devices/src/pci/vfio_pci.rs | 106 | ||||
-rw-r--r-- | devices/src/vfio.rs | 131 | ||||
-rw-r--r-- | src/linux.rs | 10 | ||||
-rw-r--r-- | vfio_sys/src/vfio.rs | 6 | ||||
-rw-r--r-- | vm_control/src/lib.rs | 22 |
5 files changed, 270 insertions, 5 deletions
diff --git a/devices/src/pci/vfio_pci.rs b/devices/src/pci/vfio_pci.rs index fd1b3ca..07c7b60 100644 --- a/devices/src/pci/vfio_pci.rs +++ b/devices/src/pci/vfio_pci.rs @@ -9,10 +9,13 @@ use std::u32; use kvm::Datamatch; use msg_socket::{MsgReceiver, MsgSender}; use resources::{Alloc, MmioType, SystemAllocator}; -use sys_util::{error, EventFd}; +use sys_util::{error, EventFd, MemoryMapping}; use vfio_sys::*; -use vm_control::{MaybeOwnedFd, VmIrqRequest, VmIrqRequestSocket, VmIrqResponse}; +use vm_control::{ + MaybeOwnedFd, VmIrqRequest, VmIrqRequestSocket, VmIrqResponse, VmMemoryControlRequestSocket, + VmMemoryRequest, VmMemoryResponse, +}; use crate::pci::pci_device::{Error as PciDeviceError, PciDevice}; use crate::pci::PciInterruptPin; @@ -306,11 +309,19 @@ pub struct VfioPciDevice { io_regions: Vec<IoInfo>, msi_cap: Option<VfioMsiCap>, irq_type: Option<VfioIrqType>, + vm_socket_mem: VmMemoryControlRequestSocket, + + // scratch MemoryMapping to avoid unmap beform vm exit + mem: Vec<MemoryMapping>, } impl VfioPciDevice { /// Constructs a new Vfio Pci device for the give Vfio device - pub fn new(device: VfioDevice, vfio_device_socket_irq: VmIrqRequestSocket) -> Self { + pub fn new( + device: VfioDevice, + vfio_device_socket_irq: VmIrqRequestSocket, + vfio_device_socket_mem: VmMemoryControlRequestSocket, + ) -> Self { let dev = Arc::new(device); let config = VfioPciConfig::new(Arc::clone(&dev)); let msi_cap = VfioMsiCap::new(&config, vfio_device_socket_irq); @@ -325,6 +336,8 @@ impl VfioPciDevice { io_regions: Vec::new(), msi_cap, irq_type: None, + vm_socket_mem: vfio_device_socket_mem, + mem: Vec::new(), } } @@ -421,8 +434,85 @@ impl VfioPciDevice { self.enable_intx(); } + + fn add_bar_mmap(&self, index: u32, bar_addr: u64) -> Vec<MemoryMapping> { + let mut mem_map: Vec<MemoryMapping> = Vec::new(); + if self.device.get_region_flags(index) & VFIO_REGION_INFO_FLAG_MMAP != 0 { + let mmaps = self.device.get_region_mmap(index); + if mmaps.is_empty() { + return mem_map; + } + + for mmap in mmaps.iter() { + let mmap_offset = mmap.offset; + let mmap_size = mmap.size; + let guest_map_start = bar_addr + mmap_offset; + let region_offset = self.device.get_region_offset(index); + let offset: usize = (region_offset + mmap_offset) as usize; + if self + .vm_socket_mem + .send(&VmMemoryRequest::RegisterMmapMemory { + fd: MaybeOwnedFd::Borrowed(self.device.as_raw_fd()), + size: mmap_size as usize, + offset, + gpa: guest_map_start, + }) + .is_err() + { + break; + } + + let response = match self.vm_socket_mem.recv() { + Ok(res) => res, + Err(_) => break, + }; + match response { + VmMemoryResponse::Ok => { + // Even if vm has mapped this region, but it is in vm main process, + // device process doesn't has this mapping, but vfio_dma_map() need it + // in device process, so here map it again. + let mmap = match MemoryMapping::from_fd_offset( + self.device.as_ref(), + mmap_size as usize, + offset, + ) { + Ok(v) => v, + Err(_e) => break, + }; + let host = (&mmap).as_ptr() as u64; + // Safe because the given guest_map_start is valid guest bar address. and + // the host pointer is correct and valid guaranteed by MemoryMapping interface. + match unsafe { self.device.vfio_dma_map(guest_map_start, mmap_size, host) } + { + Ok(_) => mem_map.push(mmap), + Err(e) => { + error!( + "{}, index: {}, bar_addr:0x{:x}, host:0x{:x}", + e, index, bar_addr, host + ); + break; + } + } + } + _ => break, + } + } + } + + mem_map + } + + fn enable_bars_mmap(&mut self) { + for mmio_info in self.mmio_regions.iter() { + let mut mem_map = self.add_bar_mmap(mmio_info.bar_index, mmio_info.start); + self.mem.append(&mut mem_map); + } + } } +const PCI_COMMAND: u8 = 0x4; +const PCI_COMMAND_MEMORY: u8 = 0x2; + impl PciDevice for VfioPciDevice { fn debug_label(&self) -> String { "vfio pci device".to_string() @@ -443,6 +533,7 @@ impl PciDevice for VfioPciDevice { if let Some(msi_cap) = &self.msi_cap { fds.push(msi_cap.get_vm_socket()); } + fds.push(self.vm_socket_mem.as_raw_fd()); fds } @@ -601,6 +692,15 @@ impl PciDevice for VfioPciDevice { None => (), } + // if guest enable memory access, then enable bar mappable once + if start == PCI_COMMAND as u64 + && data.len() == 2 + && data[0] & PCI_COMMAND_MEMORY == PCI_COMMAND_MEMORY + && self.mem.is_empty() + { + self.enable_bars_mmap(); + } + self.device .region_write(VFIO_PCI_CONFIG_REGION_INDEX, data, start); } diff --git a/devices/src/vfio.rs b/devices/src/vfio.rs index d376cf9..c42c622 100644 --- a/devices/src/vfio.rs +++ b/devices/src/vfio.rs @@ -281,15 +281,20 @@ pub enum VfioIrqType { } struct VfioRegion { + // flags for this region: read/write/mmap flags: u32, size: u64, + // region offset used to read/write with vfio device fd offset: u64, + // vectors for mmap offset and size + mmaps: Vec<vfio_region_sparse_mmap_area>, } /// Vfio device for exposing regions which could be read/write to kernel vfio device. pub struct VfioDevice { dev: File, group: VfioGroup, + // vec for vfio device's regions regions: Vec<VfioRegion>, guest_mem: GuestMemory, } @@ -453,6 +458,7 @@ impl VfioDevice { } } + #[allow(clippy::cast_ptr_alignment)] fn get_regions(dev: &File) -> Result<Vec<VfioRegion>, VfioError> { let mut regions: Vec<VfioRegion> = Vec::new(); let mut dev_info = vfio_device_info { @@ -473,8 +479,9 @@ impl VfioDevice { } for i in VFIO_PCI_BAR0_REGION_INDEX..dev_info.num_regions { + let argsz = mem::size_of::<vfio_region_info>() as u32; let mut reg_info = vfio_region_info { - argsz: mem::size_of::<vfio_region_info>() as u32, + argsz, flags: 0, index: i, cap_offset: 0, @@ -488,10 +495,93 @@ impl VfioDevice { continue; } + let mut mmaps: Vec<vfio_region_sparse_mmap_area> = Vec::new(); + if reg_info.argsz > argsz { + let cap_len: usize = (reg_info.argsz - argsz) as usize; + let mut region_with_cap = + vec_with_array_field::<vfio_region_info_with_cap, u8>(cap_len); + region_with_cap[0].region_info.argsz = reg_info.argsz; + region_with_cap[0].region_info.flags = 0; + region_with_cap[0].region_info.index = i; + region_with_cap[0].region_info.cap_offset = 0; + region_with_cap[0].region_info.size = 0; + region_with_cap[0].region_info.offset = 0; + // Safe as we are the owner of dev and region_info which are valid value, + // and we verify the return value. + ret = unsafe { + ioctl_with_mut_ref( + dev, + VFIO_DEVICE_GET_REGION_INFO(), + &mut (region_with_cap[0].region_info), + ) + }; + if ret < 0 { + return Err(VfioError::VfioDeviceGetRegionInfo(get_error())); + } + + if region_with_cap[0].region_info.flags & VFIO_REGION_INFO_FLAG_CAPS == 0 { + continue; + } + + let cap_header_sz = mem::size_of::<vfio_info_cap_header>() as u32; + let mmap_cap_sz = mem::size_of::<vfio_region_info_cap_sparse_mmap>() as u32; + let mmap_area_sz = mem::size_of::<vfio_region_sparse_mmap_area>() as u32; + let region_info_sz = reg_info.argsz; + + // region_with_cap[0].cap_info may contain many structures, like + // vfio_region_info_cap_sparse_mmap struct or vfio_region_info_cap_type struct. + // Both of them begin with vfio_info_cap_header, so we will get individual cap from + // vfio_into_cap_header. + // Go through all the cap structs. + let info_ptr = region_with_cap.as_ptr() as *mut u8; + let mut offset = region_with_cap[0].region_info.cap_offset; + while offset != 0 { + if offset + cap_header_sz >= region_info_sz { + break; + } + // Safe, as cap_header struct is in this function allocated region_with_cap + // vec. + let cap_ptr = unsafe { info_ptr.offset(offset as isize) }; + let cap_header = + unsafe { &*(cap_ptr as *mut u8 as *const vfio_info_cap_header) }; + if cap_header.id as u32 == VFIO_REGION_INFO_CAP_SPARSE_MMAP { + if offset + mmap_cap_sz >= region_info_sz { + break; + } + // cap_ptr is vfio_region_info_cap_sparse_mmap here + // Safe, this vfio_region_info_cap_sparse_mmap is in this function allocated + // region_with_cap vec. + let sparse_mmap = unsafe { + &*(cap_ptr as *mut u8 as *const vfio_region_info_cap_sparse_mmap) + }; + + let area_num = sparse_mmap.nr_areas; + if offset + mmap_cap_sz + area_num * mmap_area_sz > region_info_sz { + break; + } + // Safe, these vfio_region_sparse_mmap_area are in this function allocated + // region_with_cap vec. + let areas = + unsafe { sparse_mmap.areas.as_slice(sparse_mmap.nr_areas as usize) }; + for area in areas.iter() { + mmaps.push(area.clone()); + } + } + + offset = cap_header.next; + } + } else if reg_info.flags & VFIO_REGION_INFO_FLAG_MMAP != 0 { + mmaps.push(vfio_region_sparse_mmap_area { + offset: 0, + size: reg_info.size, + }); + } + let region = VfioRegion { flags: reg_info.flags, size: reg_info.size, offset: reg_info.offset, + mmaps, }; regions.push(region); } @@ -499,6 +589,45 @@ impl VfioDevice { Ok(regions) } + /// get a region's flag + /// the return's value may conatin: + /// VFIO_REGION_INFO_FLAG_READ: region supports read + /// VFIO_REGION_INFO_FLAG_WRITE: region supports write + /// VFIO_REGION_INFO_FLAG_MMAP: region supports mmap + /// VFIO_REGION_INFO_FLAG_CAPS: region's info supports caps + pub fn get_region_flags(&self, index: u32) -> u32 { + match self.regions.get(index as usize) { + Some(v) => v.flags, + None => { + warn!("get_region_flags() with invalid index: {}", index); + 0 + } + } + } + + /// get a region's offset + /// return: Region offset from the start of vfio device fd + pub fn get_region_offset(&self, index: u32) -> u64 { + match self.regions.get(index as usize) { + Some(v) => v.offset, + None => { + warn!("get_region_offset with invalid index: {}", index); + 0 + } + } + } + + /// get a region's mmap info vector + pub fn get_region_mmap(&self, index: u32) -> Vec<vfio_region_sparse_mmap_area> { + match self.regions.get(index as usize) { + Some(v) => v.mmaps.clone(), + None => { + warn!("get_region_mmap with invalid index: {}", index); + Vec::new() + } + } + } + /// Read region's data from VFIO device into buf /// index: region num /// buf: data destination and buf length is read size diff --git a/src/linux.rs b/src/linux.rs index 9b984d1..7257520 100644 --- a/src/linux.rs +++ b/src/linux.rs @@ -1084,10 +1084,18 @@ fn create_devices( msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?; control_sockets.push(TaggedControlSocket::VmIrq(vfio_host_socket_irq)); + let (vfio_host_socket_mem, vfio_device_socket_mem) = + msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?; + control_sockets.push(TaggedControlSocket::VmMemory(vfio_host_socket_mem)); + let vfio_path = cfg.vfio.as_ref().unwrap().as_path(); let vfiodevice = VfioDevice::new(vfio_path, vm, mem.clone()).map_err(Error::CreateVfioDevice)?; - let vfiopcidevice = Box::new(VfioPciDevice::new(vfiodevice, vfio_device_socket_irq)); + let vfiopcidevice = Box::new(VfioPciDevice::new( + vfiodevice, + vfio_device_socket_irq, + vfio_device_socket_mem, + )); pci_devices.push((vfiopcidevice, simple_jail(&cfg, "vfio_device.policy")?)); } diff --git a/vfio_sys/src/vfio.rs b/vfio_sys/src/vfio.rs index 3b88848..622b5db 100644 --- a/vfio_sys/src/vfio.rs +++ b/vfio_sys/src/vfio.rs @@ -225,6 +225,12 @@ pub struct vfio_region_info { pub offset: __u64, } #[repr(C)] +#[derive(Debug, Default)] +pub struct vfio_region_info_with_cap { + pub region_info: vfio_region_info, + pub cap_info: __IncompleteArrayField<u8>, +} +#[repr(C)] #[derive(Debug, Default, Copy, Clone)] pub struct vfio_region_sparse_mmap_area { pub offset: __u64, diff --git a/vm_control/src/lib.rs b/vm_control/src/lib.rs index eccee10..7e5faf5 100644 --- a/vm_control/src/lib.rs +++ b/vm_control/src/lib.rs @@ -201,6 +201,13 @@ pub enum VmMemoryRequest { height: u32, format: u32, }, + /// Register mmaped memory into kvm's EPT. + RegisterMmapMemory { + fd: MaybeOwnedFd, + size: usize, + offset: usize, + gpa: u64, + }, } impl VmMemoryRequest { @@ -260,6 +267,21 @@ impl VmMemoryRequest { Err(e) => VmMemoryResponse::Err(e), } } + RegisterMmapMemory { + ref fd, + size, + offset, + gpa, + } => { + let mmap = match MemoryMapping::from_fd_offset(fd, size, offset) { + Ok(v) => v, + Err(_e) => return VmMemoryResponse::Err(SysError::new(EINVAL)), + }; + match vm.add_mmio_memory(GuestAddress(gpa), mmap, false, false) { + Ok(_) => VmMemoryResponse::Ok, + Err(e) => VmMemoryResponse::Err(e), + } + } } } } |