diff options
author | Xiong Zhang <xiong.y.zhang@intel.corp-partner.google.com> | 2019-11-08 18:36:55 +0800 |
---|---|---|
committer | Commit Bot <commit-bot@chromium.org> | 2020-02-20 06:42:13 +0000 |
commit | 521646a401f8ce66cf26ed21abe5e18ac929fe33 (patch) | |
tree | b128b3b97907acca67ff7bcbc12a4b286aadf6ec | |
parent | e73414db487afd4bebd2fb60ca80693ee6349cf5 (diff) | |
download | crosvm-521646a401f8ce66cf26ed21abe5e18ac929fe33.tar crosvm-521646a401f8ce66cf26ed21abe5e18ac929fe33.tar.gz crosvm-521646a401f8ce66cf26ed21abe5e18ac929fe33.tar.bz2 crosvm-521646a401f8ce66cf26ed21abe5e18ac929fe33.tar.lz crosvm-521646a401f8ce66cf26ed21abe5e18ac929fe33.tar.xz crosvm-521646a401f8ce66cf26ed21abe5e18ac929fe33.tar.zst crosvm-521646a401f8ce66cf26ed21abe5e18ac929fe33.zip |
Vfio: Emulate MSI-x
When VFIO device have msix capability, vfio kernel doesn't emulate msix, so all the msix emulation are handled by crosvm. This include msix capability register read/write, msix table read/write, msix pba table read/write. BUG=chromium:992270 TEST=passthrough a device with msix capabilty to guest, and check device msix function in guest Change-Id: Ic39737662a5051ac6b9e29aad227d3d4946190a8 Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.corp-partner.google.com> Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/1987814 Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Daniel Verkamp <dverkamp@chromium.org>
-rw-r--r-- | devices/src/pci/msix.rs | 17 | ||||
-rw-r--r-- | devices/src/pci/vfio_pci.rs | 194 |
2 files changed, 197 insertions, 14 deletions
diff --git a/devices/src/pci/msix.rs b/devices/src/pci/msix.rs index 0b9f391..b1dc672 100644 --- a/devices/src/pci/msix.rs +++ b/devices/src/pci/msix.rs @@ -13,9 +13,9 @@ use vm_control::{MaybeOwnedFd, VmIrqRequest, VmIrqRequestSocket, VmIrqResponse}; use data_model::DataInit; const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048; -const MSIX_TABLE_ENTRIES_MODULO: u64 = 16; -const MSIX_PBA_ENTRIES_MODULO: u64 = 8; -const BITS_PER_PBA_ENTRY: usize = 64; +pub const MSIX_TABLE_ENTRIES_MODULO: u64 = 16; +pub const MSIX_PBA_ENTRIES_MODULO: u64 = 8; +pub const BITS_PER_PBA_ENTRY: usize = 64; const FUNCTION_MASK_BIT: u16 = 0x4000; const MSIX_ENABLE_BIT: u16 = 0x8000; @@ -433,6 +433,17 @@ impl MsixConfig { pub fn get_msi_socket(&self) -> RawFd { self.msi_device_socket.as_ref().as_raw_fd() } + + /// Return irqfd of MSI-X Table entry + /// + /// # Arguments + /// * 'vector' - the index to the MSI-X table entry + pub fn get_irqfd(&self, vector: usize) -> Option<&EventFd> { + match self.irq_vec.get(vector) { + Some(irq) => Some(&irq.irqfd), + None => None, + } + } } // It is safe to implement DataInit; all members are simple numbers and any value is valid. diff --git a/devices/src/pci/vfio_pci.rs b/devices/src/pci/vfio_pci.rs index 766cbcf..178d4ae 100644 --- a/devices/src/pci/vfio_pci.rs +++ b/devices/src/pci/vfio_pci.rs @@ -17,7 +17,9 @@ use vm_control::{ VmMemoryRequest, VmMemoryResponse, }; -use crate::pci::msix::MsixConfig; +use crate::pci::msix::{ + MsixConfig, BITS_PER_PBA_ENTRY, MSIX_PBA_ENTRIES_MODULO, MSIX_TABLE_ENTRIES_MODULO, +}; use crate::pci::pci_device::{Error as PciDeviceError, PciDevice}; use crate::pci::{PciClassCode, PciInterruptPin}; @@ -299,7 +301,6 @@ const PCI_MSIX_PBA: u32 = 0x08; // Pending bit Array offset const PCI_MSIX_PBA_BIR: u32 = 0x07; // BAR index const PCI_MSIX_PBA_OFFSET: u32 = 0xFFFFFFF8; // Offset into specified BAR -#[allow(dead_code)] struct VfioMsixCap { config: MsixConfig, offset: u32, @@ -335,6 +336,99 @@ impl VfioMsixCap { pba_offset, } } + + // only msix control register is writable and need special handle in pci r/w + fn is_msix_control_reg(&self, offset: u32, size: u32) -> bool { + let control_start = self.offset + PCI_MSIX_FLAGS; + let control_end = control_start + 2; + + if offset < control_end && offset + size > control_start { + true + } else { + false + } + } + + fn read_msix_control(&self, data: &mut u32) { + *data = self.config.read_msix_capability(*data); + } + + fn write_msix_control(&mut self, data: &[u8]) -> Option<VfioMsiChange> { + let old_enabled = self.config.enabled(); + + self.config + .write_msix_capability(PCI_MSIX_FLAGS.into(), data); + + let new_enabled = self.config.enabled(); + if !old_enabled && new_enabled { + Some(VfioMsiChange::Enable) + } else if old_enabled && !new_enabled { + Some(VfioMsiChange::Disable) + } else { + None + } + } + + fn is_msix_table(&self, bar_index: u32, offset: u64) -> bool { + let table_size: u64 = (self.table_size * (MSIX_TABLE_ENTRIES_MODULO as u16)).into(); + if bar_index != self.table_pci_bar + || offset < self.table_offset + || offset >= self.table_offset + table_size + { + false + } else { + true + } + } + + fn read_table(&self, offset: u64, data: &mut [u8]) { + let offset = offset - self.table_offset; + self.config.read_msix_table(offset, data); + } + + fn write_table(&mut self, offset: u64, data: &[u8]) { + let offset = offset - self.table_offset; + self.config.write_msix_table(offset, data); + } + + fn is_msix_pba(&self, bar_index: u32, offset: u64) -> bool { + let pba_size: u64 = (((self.table_size + BITS_PER_PBA_ENTRY as u16 - 1) + / BITS_PER_PBA_ENTRY as u16) + * MSIX_PBA_ENTRIES_MODULO as u16) as u64; + if bar_index != self.pba_pci_bar + || offset < self.pba_offset + || offset >= self.pba_offset + pba_size + { + false + } else { + true + } + } + + fn read_pba(&self, offset: u64, data: &mut [u8]) { + let offset = offset - self.pba_offset; + self.config.read_pba_entries(offset, data); + } + + fn write_pba(&mut self, offset: u64, data: &[u8]) { + let offset = offset - self.pba_offset; + self.config.write_pba_entries(offset, data); + } + + fn get_msix_irqfds(&self) -> Option<Vec<&EventFd>> { + let mut irqfds = Vec::new(); + + for i in 0..self.table_size { + let irqfd = self.config.get_irqfd(i as usize); + if let Some(fd) = irqfd { + irqfds.push(fd); + } else { + return None; + } + } + + Some(irqfds) + } } struct MmioInfo { @@ -352,7 +446,6 @@ enum DeviceData { } /// Implements the Vfio Pci device, then a pci device is added into vm -#[allow(dead_code)] pub struct VfioPciDevice { device: Arc<VfioDevice>, config: VfioPciConfig, @@ -503,14 +596,24 @@ impl VfioPciDevice { self.irq_type = None; } - fn enable_msi(&mut self) { - if let Some(irq_type) = &self.irq_type { - match irq_type { - VfioIrqType::Intx => self.disable_intx(), - _ => return, - } + fn disable_irqs(&mut self) { + match self.irq_type { + Some(VfioIrqType::Msi) => self.disable_msi(), + Some(VfioIrqType::Msix) => self.disable_msix(), + _ => (), } + // Above disable_msi() or disable_msix() will enable intx again. + // so disable_intx here again. + match self.irq_type { + Some(VfioIrqType::Intx) => self.disable_intx(), + _ => (), + } + } + + fn enable_msi(&mut self) { + self.disable_irqs(); + let irqfd = match &self.msi_cap { Some(cap) => { if let Some(fd) = cap.get_msi_irqfd() { @@ -546,6 +649,37 @@ impl VfioPciDevice { self.enable_intx(); } + fn enable_msix(&mut self) { + self.disable_irqs(); + + let irqfds = match &self.msix_cap { + Some(cap) => cap.get_msix_irqfds(), + None => return, + }; + + if let Some(fds) = irqfds { + if let Err(e) = self.device.irq_enable(fds, VfioIrqType::Msix) { + error!("failed to enable msix: {}", e); + self.enable_intx(); + return; + } + } else { + self.enable_intx(); + return; + } + + self.irq_type = Some(VfioIrqType::Msix); + } + + fn disable_msix(&mut self) { + if let Err(e) = self.device.irq_disable(VfioIrqType::Msix) { + error!("failed to disable msix: {}", e); + return; + } + + self.enable_intx(); + } + fn add_bar_mmap(&self, index: u32, bar_addr: u64) -> Vec<MemoryMapping> { let mut mem_map: Vec<MemoryMapping> = Vec::new(); if self.device.get_region_flags(index) & VFIO_REGION_INFO_FLAG_MMAP != 0 { @@ -829,6 +963,10 @@ impl PciDevice for VfioPciDevice { // Clear multifunction flags as pci_root doesn't // support multifunction. config &= !PCI_MULTI_FLAG; + } else if let Some(msix_cap) = &self.msix_cap { + if msix_cap.is_msix_control_reg(reg, 4) { + msix_cap.read_msix_control(&mut config); + } } // Quirk for intel graphic, set stolen memory size to 0 in pci_cfg[0x51] @@ -855,6 +993,18 @@ impl PciDevice for VfioPciDevice { None => (), } + msi_change = None; + if let Some(msix_cap) = self.msix_cap.as_mut() { + if msix_cap.is_msix_control_reg(start as u32, data.len() as u32) { + msi_change = msix_cap.write_msix_control(data); + } + } + match msi_change { + Some(VfioMsiChange::Enable) => self.enable_msix(), + Some(VfioMsiChange::Disable) => self.disable_msix(), + None => (), + } + // if guest enable memory access, then enable bar mappable once if start == PCI_COMMAND as u64 && data.len() == 2 @@ -871,7 +1021,17 @@ impl PciDevice for VfioPciDevice { fn read_bar(&mut self, addr: u64, data: &mut [u8]) { if let Some(mmio_info) = self.find_region(addr) { let offset = addr - mmio_info.start; - self.device.region_read(mmio_info.bar_index, data, offset); + let bar_index = mmio_info.bar_index; + if let Some(msix_cap) = &self.msix_cap { + if msix_cap.is_msix_table(bar_index, offset) { + msix_cap.read_table(offset, data); + return; + } else if msix_cap.is_msix_pba(bar_index, offset) { + msix_cap.read_pba(offset, data); + return; + } + } + self.device.region_read(bar_index, data, offset); } } @@ -889,7 +1049,19 @@ impl PciDevice for VfioPciDevice { } let offset = addr - mmio_info.start; - self.device.region_write(mmio_info.bar_index, data, offset); + let bar_index = mmio_info.bar_index; + + if let Some(msix_cap) = self.msix_cap.as_mut() { + if msix_cap.is_msix_table(bar_index, offset) { + msix_cap.write_table(offset, data); + return; + } else if msix_cap.is_msix_pba(bar_index, offset) { + msix_cap.write_pba(offset, data); + return; + } + } + + self.device.region_write(bar_index, data, offset); } } } |