summary refs log tree commit diff
diff options
context:
space:
mode:
authorXiong Zhang <xiong.y.zhang@intel.corp-partner.google.com>2019-11-08 18:36:55 +0800
committerCommit Bot <commit-bot@chromium.org>2020-02-20 06:42:13 +0000
commit521646a401f8ce66cf26ed21abe5e18ac929fe33 (patch)
treeb128b3b97907acca67ff7bcbc12a4b286aadf6ec
parente73414db487afd4bebd2fb60ca80693ee6349cf5 (diff)
downloadcrosvm-521646a401f8ce66cf26ed21abe5e18ac929fe33.tar
crosvm-521646a401f8ce66cf26ed21abe5e18ac929fe33.tar.gz
crosvm-521646a401f8ce66cf26ed21abe5e18ac929fe33.tar.bz2
crosvm-521646a401f8ce66cf26ed21abe5e18ac929fe33.tar.lz
crosvm-521646a401f8ce66cf26ed21abe5e18ac929fe33.tar.xz
crosvm-521646a401f8ce66cf26ed21abe5e18ac929fe33.tar.zst
crosvm-521646a401f8ce66cf26ed21abe5e18ac929fe33.zip
Vfio: Emulate MSI-x
When VFIO device have msix capability, vfio kernel doesn't emulate msix,
so all the msix emulation are handled by crosvm. This include
msix capability register read/write, msix table read/write, msix pba
table read/write.

BUG=chromium:992270
TEST=passthrough a device with msix capabilty to guest, and check device
msix function in guest

Change-Id: Ic39737662a5051ac6b9e29aad227d3d4946190a8
Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.corp-partner.google.com>
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/1987814
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Daniel Verkamp <dverkamp@chromium.org>
-rw-r--r--devices/src/pci/msix.rs17
-rw-r--r--devices/src/pci/vfio_pci.rs194
2 files changed, 197 insertions, 14 deletions
diff --git a/devices/src/pci/msix.rs b/devices/src/pci/msix.rs
index 0b9f391..b1dc672 100644
--- a/devices/src/pci/msix.rs
+++ b/devices/src/pci/msix.rs
@@ -13,9 +13,9 @@ use vm_control::{MaybeOwnedFd, VmIrqRequest, VmIrqRequestSocket, VmIrqResponse};
 use data_model::DataInit;
 
 const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048;
-const MSIX_TABLE_ENTRIES_MODULO: u64 = 16;
-const MSIX_PBA_ENTRIES_MODULO: u64 = 8;
-const BITS_PER_PBA_ENTRY: usize = 64;
+pub const MSIX_TABLE_ENTRIES_MODULO: u64 = 16;
+pub const MSIX_PBA_ENTRIES_MODULO: u64 = 8;
+pub const BITS_PER_PBA_ENTRY: usize = 64;
 const FUNCTION_MASK_BIT: u16 = 0x4000;
 const MSIX_ENABLE_BIT: u16 = 0x8000;
 
@@ -433,6 +433,17 @@ impl MsixConfig {
     pub fn get_msi_socket(&self) -> RawFd {
         self.msi_device_socket.as_ref().as_raw_fd()
     }
+
+    /// Return irqfd of MSI-X Table entry
+    ///
+    ///  # Arguments
+    ///  * 'vector' - the index to the MSI-X table entry
+    pub fn get_irqfd(&self, vector: usize) -> Option<&EventFd> {
+        match self.irq_vec.get(vector) {
+            Some(irq) => Some(&irq.irqfd),
+            None => None,
+        }
+    }
 }
 
 // It is safe to implement DataInit; all members are simple numbers and any value is valid.
diff --git a/devices/src/pci/vfio_pci.rs b/devices/src/pci/vfio_pci.rs
index 766cbcf..178d4ae 100644
--- a/devices/src/pci/vfio_pci.rs
+++ b/devices/src/pci/vfio_pci.rs
@@ -17,7 +17,9 @@ use vm_control::{
     VmMemoryRequest, VmMemoryResponse,
 };
 
-use crate::pci::msix::MsixConfig;
+use crate::pci::msix::{
+    MsixConfig, BITS_PER_PBA_ENTRY, MSIX_PBA_ENTRIES_MODULO, MSIX_TABLE_ENTRIES_MODULO,
+};
 
 use crate::pci::pci_device::{Error as PciDeviceError, PciDevice};
 use crate::pci::{PciClassCode, PciInterruptPin};
@@ -299,7 +301,6 @@ const PCI_MSIX_PBA: u32 = 0x08; // Pending bit Array offset
 const PCI_MSIX_PBA_BIR: u32 = 0x07; // BAR index
 const PCI_MSIX_PBA_OFFSET: u32 = 0xFFFFFFF8; // Offset into specified BAR
 
-#[allow(dead_code)]
 struct VfioMsixCap {
     config: MsixConfig,
     offset: u32,
@@ -335,6 +336,99 @@ impl VfioMsixCap {
             pba_offset,
         }
     }
+
+    // only msix control register is writable and need special handle in pci r/w
+    fn is_msix_control_reg(&self, offset: u32, size: u32) -> bool {
+        let control_start = self.offset + PCI_MSIX_FLAGS;
+        let control_end = control_start + 2;
+
+        if offset < control_end && offset + size > control_start {
+            true
+        } else {
+            false
+        }
+    }
+
+    fn read_msix_control(&self, data: &mut u32) {
+        *data = self.config.read_msix_capability(*data);
+    }
+
+    fn write_msix_control(&mut self, data: &[u8]) -> Option<VfioMsiChange> {
+        let old_enabled = self.config.enabled();
+
+        self.config
+            .write_msix_capability(PCI_MSIX_FLAGS.into(), data);
+
+        let new_enabled = self.config.enabled();
+        if !old_enabled && new_enabled {
+            Some(VfioMsiChange::Enable)
+        } else if old_enabled && !new_enabled {
+            Some(VfioMsiChange::Disable)
+        } else {
+            None
+        }
+    }
+
+    fn is_msix_table(&self, bar_index: u32, offset: u64) -> bool {
+        let table_size: u64 = (self.table_size * (MSIX_TABLE_ENTRIES_MODULO as u16)).into();
+        if bar_index != self.table_pci_bar
+            || offset < self.table_offset
+            || offset >= self.table_offset + table_size
+        {
+            false
+        } else {
+            true
+        }
+    }
+
+    fn read_table(&self, offset: u64, data: &mut [u8]) {
+        let offset = offset - self.table_offset;
+        self.config.read_msix_table(offset, data);
+    }
+
+    fn write_table(&mut self, offset: u64, data: &[u8]) {
+        let offset = offset - self.table_offset;
+        self.config.write_msix_table(offset, data);
+    }
+
+    fn is_msix_pba(&self, bar_index: u32, offset: u64) -> bool {
+        let pba_size: u64 = (((self.table_size + BITS_PER_PBA_ENTRY as u16 - 1)
+            / BITS_PER_PBA_ENTRY as u16)
+            * MSIX_PBA_ENTRIES_MODULO as u16) as u64;
+        if bar_index != self.pba_pci_bar
+            || offset < self.pba_offset
+            || offset >= self.pba_offset + pba_size
+        {
+            false
+        } else {
+            true
+        }
+    }
+
+    fn read_pba(&self, offset: u64, data: &mut [u8]) {
+        let offset = offset - self.pba_offset;
+        self.config.read_pba_entries(offset, data);
+    }
+
+    fn write_pba(&mut self, offset: u64, data: &[u8]) {
+        let offset = offset - self.pba_offset;
+        self.config.write_pba_entries(offset, data);
+    }
+
+    fn get_msix_irqfds(&self) -> Option<Vec<&EventFd>> {
+        let mut irqfds = Vec::new();
+
+        for i in 0..self.table_size {
+            let irqfd = self.config.get_irqfd(i as usize);
+            if let Some(fd) = irqfd {
+                irqfds.push(fd);
+            } else {
+                return None;
+            }
+        }
+
+        Some(irqfds)
+    }
 }
 
 struct MmioInfo {
@@ -352,7 +446,6 @@ enum DeviceData {
 }
 
 /// Implements the Vfio Pci device, then a pci device is added into vm
-#[allow(dead_code)]
 pub struct VfioPciDevice {
     device: Arc<VfioDevice>,
     config: VfioPciConfig,
@@ -503,14 +596,24 @@ impl VfioPciDevice {
         self.irq_type = None;
     }
 
-    fn enable_msi(&mut self) {
-        if let Some(irq_type) = &self.irq_type {
-            match irq_type {
-                VfioIrqType::Intx => self.disable_intx(),
-                _ => return,
-            }
+    fn disable_irqs(&mut self) {
+        match self.irq_type {
+            Some(VfioIrqType::Msi) => self.disable_msi(),
+            Some(VfioIrqType::Msix) => self.disable_msix(),
+            _ => (),
         }
 
+        // Above disable_msi() or disable_msix() will enable intx again.
+        // so disable_intx here again.
+        match self.irq_type {
+            Some(VfioIrqType::Intx) => self.disable_intx(),
+            _ => (),
+        }
+    }
+
+    fn enable_msi(&mut self) {
+        self.disable_irqs();
+
         let irqfd = match &self.msi_cap {
             Some(cap) => {
                 if let Some(fd) = cap.get_msi_irqfd() {
@@ -546,6 +649,37 @@ impl VfioPciDevice {
         self.enable_intx();
     }
 
+    fn enable_msix(&mut self) {
+        self.disable_irqs();
+
+        let irqfds = match &self.msix_cap {
+            Some(cap) => cap.get_msix_irqfds(),
+            None => return,
+        };
+
+        if let Some(fds) = irqfds {
+            if let Err(e) = self.device.irq_enable(fds, VfioIrqType::Msix) {
+                error!("failed to enable msix: {}", e);
+                self.enable_intx();
+                return;
+            }
+        } else {
+            self.enable_intx();
+            return;
+        }
+
+        self.irq_type = Some(VfioIrqType::Msix);
+    }
+
+    fn disable_msix(&mut self) {
+        if let Err(e) = self.device.irq_disable(VfioIrqType::Msix) {
+            error!("failed to disable msix: {}", e);
+            return;
+        }
+
+        self.enable_intx();
+    }
+
     fn add_bar_mmap(&self, index: u32, bar_addr: u64) -> Vec<MemoryMapping> {
         let mut mem_map: Vec<MemoryMapping> = Vec::new();
         if self.device.get_region_flags(index) & VFIO_REGION_INFO_FLAG_MMAP != 0 {
@@ -829,6 +963,10 @@ impl PciDevice for VfioPciDevice {
             // Clear multifunction flags as pci_root doesn't
             // support multifunction.
             config &= !PCI_MULTI_FLAG;
+        } else if let Some(msix_cap) = &self.msix_cap {
+            if msix_cap.is_msix_control_reg(reg, 4) {
+                msix_cap.read_msix_control(&mut config);
+            }
         }
 
         // Quirk for intel graphic, set stolen memory size to 0 in pci_cfg[0x51]
@@ -855,6 +993,18 @@ impl PciDevice for VfioPciDevice {
             None => (),
         }
 
+        msi_change = None;
+        if let Some(msix_cap) = self.msix_cap.as_mut() {
+            if msix_cap.is_msix_control_reg(start as u32, data.len() as u32) {
+                msi_change = msix_cap.write_msix_control(data);
+            }
+        }
+        match msi_change {
+            Some(VfioMsiChange::Enable) => self.enable_msix(),
+            Some(VfioMsiChange::Disable) => self.disable_msix(),
+            None => (),
+        }
+
         // if guest enable memory access, then enable bar mappable once
         if start == PCI_COMMAND as u64
             && data.len() == 2
@@ -871,7 +1021,17 @@ impl PciDevice for VfioPciDevice {
     fn read_bar(&mut self, addr: u64, data: &mut [u8]) {
         if let Some(mmio_info) = self.find_region(addr) {
             let offset = addr - mmio_info.start;
-            self.device.region_read(mmio_info.bar_index, data, offset);
+            let bar_index = mmio_info.bar_index;
+            if let Some(msix_cap) = &self.msix_cap {
+                if msix_cap.is_msix_table(bar_index, offset) {
+                    msix_cap.read_table(offset, data);
+                    return;
+                } else if msix_cap.is_msix_pba(bar_index, offset) {
+                    msix_cap.read_pba(offset, data);
+                    return;
+                }
+            }
+            self.device.region_read(bar_index, data, offset);
         }
     }
 
@@ -889,7 +1049,19 @@ impl PciDevice for VfioPciDevice {
             }
 
             let offset = addr - mmio_info.start;
-            self.device.region_write(mmio_info.bar_index, data, offset);
+            let bar_index = mmio_info.bar_index;
+
+            if let Some(msix_cap) = self.msix_cap.as_mut() {
+                if msix_cap.is_msix_table(bar_index, offset) {
+                    msix_cap.write_table(offset, data);
+                    return;
+                } else if msix_cap.is_msix_pba(bar_index, offset) {
+                    msix_cap.write_pba(offset, data);
+                    return;
+                }
+            }
+
+            self.device.region_write(bar_index, data, offset);
         }
     }
 }