diff options
author | Xiong Zhang <xiong.y.zhang@intel.corp-partner.google.com> | 2019-04-23 17:15:00 +0800 |
---|---|---|
committer | Commit Bot <commit-bot@chromium.org> | 2019-10-17 00:17:07 +0000 |
commit | bed8b0017d2cb283c20dc50241adb4f5b2668489 (patch) | |
tree | 7a76e936a9e1aede56c08c112ca7d436ab8d9e98 | |
parent | 04a82c7be173b2068c4254ed4a129e24e9e3a2e4 (diff) | |
download | crosvm-bed8b0017d2cb283c20dc50241adb4f5b2668489.tar crosvm-bed8b0017d2cb283c20dc50241adb4f5b2668489.tar.gz crosvm-bed8b0017d2cb283c20dc50241adb4f5b2668489.tar.bz2 crosvm-bed8b0017d2cb283c20dc50241adb4f5b2668489.tar.lz crosvm-bed8b0017d2cb283c20dc50241adb4f5b2668489.tar.xz crosvm-bed8b0017d2cb283c20dc50241adb4f5b2668489.tar.zst crosvm-bed8b0017d2cb283c20dc50241adb4f5b2668489.zip |
vfio: Add msi support
crosvm doesn't support MSI/MSI-x, but kvmgt vgpu support MSI only through cfg msi capability. This is a simple msi implementation, it detects msi capability and track msi control, data and address info, then call vfio kernel to enable / disable msi interrupt. Currently it supports one vetor per MSI. It could extend to multi vetors and MSI-x. BUG=chromium:992270 TEST=none Change-Id: I04fc95f23a07f9698237c014d9f909d011f447ef Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.corp-partner.google.com> Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/1581142 Reviewed-by: Daniel Verkamp <dverkamp@chromium.org> Tested-by: kokoro <noreply+kokoro@google.com> Commit-Queue: Daniel Verkamp <dverkamp@chromium.org>
-rw-r--r-- | devices/src/pci/vfio_pci.rs | 172 | ||||
-rw-r--r-- | devices/src/vfio.rs | 52 | ||||
-rw-r--r-- | kvm/src/lib.rs | 35 | ||||
-rw-r--r-- | sys_util/src/struct_util.rs | 41 |
4 files changed, 252 insertions, 48 deletions
diff --git a/devices/src/pci/vfio_pci.rs b/devices/src/pci/vfio_pci.rs index 5b0474a..645ee19 100644 --- a/devices/src/pci/vfio_pci.rs +++ b/devices/src/pci/vfio_pci.rs @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -use std::os::unix::io::RawFd; +use std::os::unix::io::{AsRawFd, RawFd}; use std::sync::Arc; use std::u32; @@ -77,6 +77,134 @@ impl VfioPciConfig { } } +const PCI_CAPABILITY_LIST: u32 = 0x34; +const PCI_CAP_ID_MSI: u8 = 0x05; + +// MSI registers +const PCI_MSI_NEXT_POINTER: u32 = 0x1; // Next cap pointer +const PCI_MSI_FLAGS: u32 = 0x2; // Message Control +const PCI_MSI_FLAGS_ENABLE: u16 = 0x0001; // MSI feature enabled +const PCI_MSI_FLAGS_64BIT: u16 = 0x0080; // 64-bit addresses allowed +const PCI_MSI_FLAGS_MASKBIT: u16 = 0x0100; // Per-vector masking capable +const PCI_MSI_ADDRESS_LO: u32 = 0x4; // MSI address lower 32 bits +const PCI_MSI_ADDRESS_HI: u32 = 0x8; // MSI address upper 32 bits (if 64 bit allowed) +const PCI_MSI_DATA_32: u32 = 0x8; // 16 bits of data for 32-bit message address +const PCI_MSI_DATA_64: u32 = 0xC; // 16 bits of date for 64-bit message address + +// MSI length +const MSI_LENGTH_32BIT: u32 = 0xA; +const MSI_LENGTH_64BIT_WITHOUT_MASK: u32 = 0xE; +const MSI_LENGTH_64BIT_WITH_MASK: u32 = 0x18; + +enum VfioMsiChange { + Disable, + Enable, +} + +struct VfioMsiCap { + offset: u32, + size: u32, + ctl: u16, + address: u64, + data: u16, +} + +impl VfioMsiCap { + fn new(config: &VfioPciConfig) -> Option<Self> { + // msi minimum size is 0xa + let mut msi_len: u32 = MSI_LENGTH_32BIT; + let mut cap_next: u32 = config.read_config_byte(PCI_CAPABILITY_LIST).into(); + while cap_next != 0 { + let cap_id = config.read_config_byte(cap_next); + // find msi cap + if cap_id == PCI_CAP_ID_MSI { + let msi_ctl = config.read_config_word(cap_next + PCI_MSI_FLAGS); + if msi_ctl & PCI_MSI_FLAGS_64BIT != 0 { + msi_len = MSI_LENGTH_64BIT_WITHOUT_MASK; + } + if msi_ctl & PCI_MSI_FLAGS_MASKBIT != 0 { + msi_len = MSI_LENGTH_64BIT_WITH_MASK; + } + return Some(VfioMsiCap { + offset: cap_next, + size: msi_len, + ctl: 0, + address: 0, + data: 0, + }); + } + let offset = cap_next + PCI_MSI_NEXT_POINTER; + cap_next = config.read_config_byte(offset).into(); + } + + None + } + + fn is_msi_reg(&self, index: u64, len: usize) -> bool { + if index >= self.offset as u64 + && index + len as u64 <= (self.offset + self.size) as u64 + && len as u32 <= self.size + { + true + } else { + false + } + } + + fn write_msi_reg(&mut self, index: u64, data: &[u8]) -> Option<VfioMsiChange> { + let len = data.len(); + let offset = index as u32 - self.offset; + let mut ret: Option<VfioMsiChange> = None; + + // write msi ctl + if len == 2 && offset == PCI_MSI_FLAGS { + let was_enabled = self.is_msi_enabled(); + let value: [u8; 2] = [data[0], data[1]]; + self.ctl = u16::from_le_bytes(value); + let is_enabled = self.is_msi_enabled(); + if !was_enabled && is_enabled { + ret = Some(VfioMsiChange::Enable); + } else if was_enabled && !is_enabled { + ret = Some(VfioMsiChange::Disable) + } + } else if len == 4 && offset == PCI_MSI_ADDRESS_LO && self.size == MSI_LENGTH_32BIT { + //write 32 bit message address + let value: [u8; 8] = [data[0], data[1], data[2], data[3], 0, 0, 0, 0]; + self.address = u64::from_le_bytes(value); + } else if len == 4 && offset == PCI_MSI_ADDRESS_LO && self.size != MSI_LENGTH_32BIT { + // write 64 bit message address low part + let value: [u8; 8] = [data[0], data[1], data[2], data[3], 0, 0, 0, 0]; + self.address &= !0xffffffff; + self.address |= u64::from_le_bytes(value); + } else if len == 4 && offset == PCI_MSI_ADDRESS_HI && self.size != MSI_LENGTH_32BIT { + //write 64 bit message address high part + let value: [u8; 8] = [0, 0, 0, 0, data[0], data[1], data[2], data[3]]; + self.address &= 0xffffffff; + self.address |= u64::from_le_bytes(value); + } else if len == 8 && offset == PCI_MSI_ADDRESS_LO && self.size != MSI_LENGTH_32BIT { + // write 64 bit message address + let value: [u8; 8] = [ + data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7], + ]; + self.address = u64::from_le_bytes(value); + } else if len == 2 + && ((offset == PCI_MSI_DATA_32 && self.size == MSI_LENGTH_32BIT) + || (offset == PCI_MSI_DATA_64 && self.size == MSI_LENGTH_64BIT_WITH_MASK) + || (offset == PCI_MSI_DATA_64 && self.size == MSI_LENGTH_64BIT_WITHOUT_MASK)) + { + // write message data + let value: [u8; 2] = [data[0], data[1]]; + self.data = u16::from_le_bytes(value); + } + + ret + } + + fn is_msi_enabled(&self) -> bool { + self.ctl & PCI_MSI_FLAGS_ENABLE == PCI_MSI_FLAGS_ENABLE + } +} + struct MmioInfo { bar_index: u32, start: u64, @@ -96,6 +224,7 @@ pub struct VfioPciDevice { interrupt_resample_evt: Option<EventFd>, mmio_regions: Vec<MmioInfo>, io_regions: Vec<IoInfo>, + msi_cap: Option<VfioMsiCap>, } impl VfioPciDevice { @@ -103,6 +232,8 @@ impl VfioPciDevice { pub fn new(device: VfioDevice) -> Self { let dev = Arc::new(device); let config = VfioPciConfig::new(Arc::clone(&dev)); + let msi_cap = VfioMsiCap::new(&config); + VfioPciDevice { device: dev, config, @@ -111,6 +242,7 @@ impl VfioPciDevice { interrupt_resample_evt: None, mmio_regions: Vec::new(), io_regions: Vec::new(), + msi_cap, } } @@ -139,7 +271,14 @@ impl PciDevice for VfioPciDevice { } fn keep_fds(&self) -> Vec<RawFd> { - self.device.keep_fds() + let mut fds = self.device.keep_fds(); + if let Some(ref interrupt_evt) = self.interrupt_evt { + fds.push(interrupt_evt.as_raw_fd()); + } + if let Some(ref interrupt_resample_evt) = self.interrupt_resample_evt { + fds.push(interrupt_resample_evt.as_raw_fd()); + } + fds } fn assign_irq( @@ -274,11 +413,30 @@ impl PciDevice for VfioPciDevice { } fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) { - self.device.region_write( - VFIO_PCI_CONFIG_REGION_INDEX, - data, - (reg_idx * 4) as u64 + offset, - ) + let start = (reg_idx * 4) as u64 + offset; + + if let Some(msi_cap) = self.msi_cap.as_mut() { + if msi_cap.is_msi_reg(start, data.len()) { + if let Some(ref interrupt_evt) = self.interrupt_evt { + match msi_cap.write_msi_reg(start, data) { + Some(VfioMsiChange::Enable) => { + if let Err(e) = self.device.msi_enable(interrupt_evt) { + error!("{}", e); + } + } + Some(VfioMsiChange::Disable) => { + if let Err(e) = self.device.msi_disable() { + error!("{}", e); + } + } + None => (), + } + } + } + } + + self.device + .region_write(VFIO_PCI_CONFIG_REGION_INDEX, data, start); } fn read_bar(&mut self, addr: u64, data: &mut [u8]) { diff --git a/devices/src/vfio.rs b/devices/src/vfio.rs index 966085f..cdaadca 100644 --- a/devices/src/vfio.rs +++ b/devices/src/vfio.rs @@ -14,8 +14,8 @@ use std::u32; use kvm::Vm; use sys_util::{ - ioctl, ioctl_with_mut_ref, ioctl_with_ptr, ioctl_with_ref, ioctl_with_val, warn, Error, - GuestMemory, + ioctl, ioctl_with_mut_ref, ioctl_with_ptr, ioctl_with_ref, ioctl_with_val, + vec_with_array_field, warn, Error, EventFd, GuestMemory, }; use vfio_sys::*; @@ -38,6 +38,8 @@ pub enum VfioError { InvalidPath, IommuDmaMap(Error), IommuDmaUnmap(Error), + VfioMsiEnable(Error), + VfioMsiDisable(Error), } impl fmt::Display for VfioError { @@ -59,6 +61,8 @@ impl fmt::Display for VfioError { VfioError::InvalidPath => write!(f,"invalid file path"), VfioError::IommuDmaMap(e) => write!(f, "failed to add guest memory map into iommu table: {}", e), VfioError::IommuDmaUnmap(e) => write!(f, "failed to remove guest memory map from iommu table: {}", e), + VfioError::VfioMsiEnable(e) => write!(f, "failed to enable vfio deviece's MSI: {}", e), + VfioError::VfioMsiDisable(e) => write!(f, "failed to disable vfio deviece's MSI: {}", e), } } } @@ -306,6 +310,50 @@ impl VfioDevice { }) } + /// enable vfio device's MSI and associate EventFd with this MSI + pub fn msi_enable(&self, fd: &EventFd) -> Result<(), VfioError> { + let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(1); + irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + mem::size_of::<u32>()) as u32; + irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set[0].index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set[0].start = 0; + irq_set[0].count = 1; + + { + // irq_set.data could be none, bool or fd according to flags, so irq_set.data + // is u8 default, here irq_set.data is fd as u32, so 4 default u8 are combined + // together as u32. It is safe as enough space is reserved through + // vec_with_array_field(u32)<1>. + let fds = unsafe { irq_set[0].data.as_mut_slice(4) }; + fds.copy_from_slice(&fd.as_raw_fd().to_le_bytes()[..]); + } + + // Safe as we are the owner of self and irq_set which are valid value + let ret = unsafe { ioctl_with_ref(self, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) }; + if ret < 0 { + Err(VfioError::VfioMsiEnable(get_error())) + } else { + Ok(()) + } + } + + pub fn msi_disable(&self) -> Result<(), VfioError> { + let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0); + irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32; + irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set[0].index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set[0].start = 0; + irq_set[0].count = 0; + + // Safe as we are the owner of self and irq_set which are valid value + let ret = unsafe { ioctl_with_ref(self, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) }; + if ret < 0 { + Err(VfioError::VfioMsiDisable(get_error())) + } else { + Ok(()) + } + } + fn get_regions(dev: &File) -> Result<Vec<VfioRegion>, VfioError> { let mut regions: Vec<VfioRegion> = Vec::new(); let mut dev_info = vfio_device_info { diff --git a/kvm/src/lib.rs b/kvm/src/lib.rs index bafc79d..b8475d3 100644 --- a/kvm/src/lib.rs +++ b/kvm/src/lib.rs @@ -23,8 +23,8 @@ use msg_socket::MsgOnSocket; #[allow(unused_imports)] use sys_util::{ ioctl, ioctl_with_mut_ptr, ioctl_with_mut_ref, ioctl_with_ptr, ioctl_with_ref, ioctl_with_val, - pagesize, signal, warn, Error, EventFd, GuestAddress, GuestMemory, MemoryMapping, - MemoryMappingArena, Result, + pagesize, signal, vec_with_array_field, warn, Error, EventFd, GuestAddress, GuestMemory, + MemoryMapping, MemoryMappingArena, Result, }; pub use crate::cap::*; @@ -33,37 +33,6 @@ fn errno_result<T>() -> Result<T> { Err(Error::last()) } -// Returns a `Vec<T>` with a size in ytes at least as large as `size_in_bytes`. -fn vec_with_size_in_bytes<T: Default>(size_in_bytes: usize) -> Vec<T> { - let rounded_size = (size_in_bytes + size_of::<T>() - 1) / size_of::<T>(); - let mut v = Vec::with_capacity(rounded_size); - for _ in 0..rounded_size { - v.push(T::default()) - } - v -} - -// The kvm API has many structs that resemble the following `Foo` structure: -// -// ``` -// #[repr(C)] -// struct Foo { -// some_data: u32 -// entries: __IncompleteArrayField<__u32>, -// } -// ``` -// -// In order to allocate such a structure, `size_of::<Foo>()` would be too small because it would not -// include any space for `entries`. To make the allocation large enough while still being aligned -// for `Foo`, a `Vec<Foo>` is created. Only the first element of `Vec<Foo>` would actually be used -// as a `Foo`. The remaining memory in the `Vec<Foo>` is for `entries`, which must be contiguous -// with `Foo`. This function is used to make the `Vec<Foo>` with enough space for `count` entries. -fn vec_with_array_field<T: Default, F>(count: usize) -> Vec<T> { - let element_space = count * size_of::<F>(); - let vec_size_bytes = size_of::<T>() + element_space; - vec_with_size_in_bytes(vec_size_bytes) -} - unsafe fn set_user_memory_region<F: AsRawFd>( fd: &F, slot: u32, diff --git a/sys_util/src/struct_util.rs b/sys_util/src/struct_util.rs index 3d8def2..22411a1 100644 --- a/sys_util/src/struct_util.rs +++ b/sys_util/src/struct_util.rs @@ -4,7 +4,38 @@ use std; use std::io::Read; -use std::mem; +use std::mem::size_of; + +// Returns a `Vec<T>` with a size in ytes at least as large as `size_in_bytes`. +fn vec_with_size_in_bytes<T: Default>(size_in_bytes: usize) -> Vec<T> { + let rounded_size = (size_in_bytes + size_of::<T>() - 1) / size_of::<T>(); + let mut v = Vec::with_capacity(rounded_size); + for _ in 0..rounded_size { + v.push(T::default()) + } + v +} + +/// The kvm API has many structs that resemble the following `Foo` structure: +/// +/// ``` +/// #[repr(C)] +/// struct Foo { +/// some_data: u32 +/// entries: __IncompleteArrayField<__u32>, +/// } +/// ``` +/// +/// In order to allocate such a structure, `size_of::<Foo>()` would be too small because it would not +/// include any space for `entries`. To make the allocation large enough while still being aligned +/// for `Foo`, a `Vec<Foo>` is created. Only the first element of `Vec<Foo>` would actually be used +/// as a `Foo`. The remaining memory in the `Vec<Foo>` is for `entries`, which must be contiguous +/// with `Foo`. This function is used to make the `Vec<Foo>` with enough space for `count` entries. +pub fn vec_with_array_field<T: Default, F>(count: usize) -> Vec<T> { + let element_space = count * size_of::<F>(); + let vec_size_bytes = size_of::<T>() + element_space; + vec_with_size_in_bytes(vec_size_bytes) +} #[derive(Debug)] pub enum Error { @@ -21,7 +52,7 @@ pub type Result<T> = std::result::Result<T, Error>; /// * `f` - The input to read from. Often this is a file. /// * `out` - The struct to fill with data read from `f`. pub unsafe fn read_struct<T: Copy, F: Read>(f: &mut F, out: &mut T) -> Result<()> { - let out_slice = std::slice::from_raw_parts_mut(out as *mut T as *mut u8, mem::size_of::<T>()); + let out_slice = std::slice::from_raw_parts_mut(out as *mut T as *mut u8, size_of::<T>()); f.read_exact(out_slice).map_err(|_| Error::ReadStruct)?; Ok(()) } @@ -38,10 +69,8 @@ pub unsafe fn read_struct<T: Copy, F: Read>(f: &mut F, out: &mut T) -> Result<() pub unsafe fn read_struct_slice<T: Copy, F: Read>(f: &mut F, len: usize) -> Result<Vec<T>> { let mut out: Vec<T> = Vec::with_capacity(len); out.set_len(len); - let out_slice = std::slice::from_raw_parts_mut( - out.as_ptr() as *mut T as *mut u8, - mem::size_of::<T>() * len, - ); + let out_slice = + std::slice::from_raw_parts_mut(out.as_ptr() as *mut T as *mut u8, size_of::<T>() * len); f.read_exact(out_slice).map_err(|_| Error::ReadStruct)?; Ok(out) } |