summary refs log tree commit diff
diff options
context:
space:
mode:
authorXiong Zhang <xiong.y.zhang@intel.corp-partner.google.com>2019-04-23 17:15:00 +0800
committerCommit Bot <commit-bot@chromium.org>2019-10-17 00:17:07 +0000
commitbed8b0017d2cb283c20dc50241adb4f5b2668489 (patch)
tree7a76e936a9e1aede56c08c112ca7d436ab8d9e98
parent04a82c7be173b2068c4254ed4a129e24e9e3a2e4 (diff)
downloadcrosvm-bed8b0017d2cb283c20dc50241adb4f5b2668489.tar
crosvm-bed8b0017d2cb283c20dc50241adb4f5b2668489.tar.gz
crosvm-bed8b0017d2cb283c20dc50241adb4f5b2668489.tar.bz2
crosvm-bed8b0017d2cb283c20dc50241adb4f5b2668489.tar.lz
crosvm-bed8b0017d2cb283c20dc50241adb4f5b2668489.tar.xz
crosvm-bed8b0017d2cb283c20dc50241adb4f5b2668489.tar.zst
crosvm-bed8b0017d2cb283c20dc50241adb4f5b2668489.zip
vfio: Add msi support
crosvm doesn't support MSI/MSI-x, but kvmgt vgpu support MSI only
through cfg msi capability. This is a simple msi implementation, it
detects msi capability and track msi control, data and address info, then
call vfio kernel to enable / disable msi interrupt.

Currently it supports one vetor per MSI. It could extend to multi vetors and
MSI-x.

BUG=chromium:992270
TEST=none

Change-Id: I04fc95f23a07f9698237c014d9f909d011f447ef
Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.corp-partner.google.com>
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/1581142
Reviewed-by: Daniel Verkamp <dverkamp@chromium.org>
Tested-by: kokoro <noreply+kokoro@google.com>
Commit-Queue: Daniel Verkamp <dverkamp@chromium.org>
-rw-r--r--devices/src/pci/vfio_pci.rs172
-rw-r--r--devices/src/vfio.rs52
-rw-r--r--kvm/src/lib.rs35
-rw-r--r--sys_util/src/struct_util.rs41
4 files changed, 252 insertions, 48 deletions
diff --git a/devices/src/pci/vfio_pci.rs b/devices/src/pci/vfio_pci.rs
index 5b0474a..645ee19 100644
--- a/devices/src/pci/vfio_pci.rs
+++ b/devices/src/pci/vfio_pci.rs
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-use std::os::unix::io::RawFd;
+use std::os::unix::io::{AsRawFd, RawFd};
 use std::sync::Arc;
 use std::u32;
 
@@ -77,6 +77,134 @@ impl VfioPciConfig {
     }
 }
 
+const PCI_CAPABILITY_LIST: u32 = 0x34;
+const PCI_CAP_ID_MSI: u8 = 0x05;
+
+// MSI registers
+const PCI_MSI_NEXT_POINTER: u32 = 0x1; // Next cap pointer
+const PCI_MSI_FLAGS: u32 = 0x2; // Message Control
+const PCI_MSI_FLAGS_ENABLE: u16 = 0x0001; // MSI feature enabled
+const PCI_MSI_FLAGS_64BIT: u16 = 0x0080; // 64-bit addresses allowed
+const PCI_MSI_FLAGS_MASKBIT: u16 = 0x0100; // Per-vector masking capable
+const PCI_MSI_ADDRESS_LO: u32 = 0x4; // MSI address lower 32 bits
+const PCI_MSI_ADDRESS_HI: u32 = 0x8; // MSI address upper 32 bits (if 64 bit allowed)
+const PCI_MSI_DATA_32: u32 = 0x8; // 16 bits of data for 32-bit message address
+const PCI_MSI_DATA_64: u32 = 0xC; // 16 bits of date for 64-bit message address
+
+// MSI length
+const MSI_LENGTH_32BIT: u32 = 0xA;
+const MSI_LENGTH_64BIT_WITHOUT_MASK: u32 = 0xE;
+const MSI_LENGTH_64BIT_WITH_MASK: u32 = 0x18;
+
+enum VfioMsiChange {
+    Disable,
+    Enable,
+}
+
+struct VfioMsiCap {
+    offset: u32,
+    size: u32,
+    ctl: u16,
+    address: u64,
+    data: u16,
+}
+
+impl VfioMsiCap {
+    fn new(config: &VfioPciConfig) -> Option<Self> {
+        // msi minimum size is 0xa
+        let mut msi_len: u32 = MSI_LENGTH_32BIT;
+        let mut cap_next: u32 = config.read_config_byte(PCI_CAPABILITY_LIST).into();
+        while cap_next != 0 {
+            let cap_id = config.read_config_byte(cap_next);
+            // find msi cap
+            if cap_id == PCI_CAP_ID_MSI {
+                let msi_ctl = config.read_config_word(cap_next + PCI_MSI_FLAGS);
+                if msi_ctl & PCI_MSI_FLAGS_64BIT != 0 {
+                    msi_len = MSI_LENGTH_64BIT_WITHOUT_MASK;
+                }
+                if msi_ctl & PCI_MSI_FLAGS_MASKBIT != 0 {
+                    msi_len = MSI_LENGTH_64BIT_WITH_MASK;
+                }
+                return Some(VfioMsiCap {
+                    offset: cap_next,
+                    size: msi_len,
+                    ctl: 0,
+                    address: 0,
+                    data: 0,
+                });
+            }
+            let offset = cap_next + PCI_MSI_NEXT_POINTER;
+            cap_next = config.read_config_byte(offset).into();
+        }
+
+        None
+    }
+
+    fn is_msi_reg(&self, index: u64, len: usize) -> bool {
+        if index >= self.offset as u64
+            && index + len as u64 <= (self.offset + self.size) as u64
+            && len as u32 <= self.size
+        {
+            true
+        } else {
+            false
+        }
+    }
+
+    fn write_msi_reg(&mut self, index: u64, data: &[u8]) -> Option<VfioMsiChange> {
+        let len = data.len();
+        let offset = index as u32 - self.offset;
+        let mut ret: Option<VfioMsiChange> = None;
+
+        // write msi ctl
+        if len == 2 && offset == PCI_MSI_FLAGS {
+            let was_enabled = self.is_msi_enabled();
+            let value: [u8; 2] = [data[0], data[1]];
+            self.ctl = u16::from_le_bytes(value);
+            let is_enabled = self.is_msi_enabled();
+            if !was_enabled && is_enabled {
+                ret = Some(VfioMsiChange::Enable);
+            } else if was_enabled && !is_enabled {
+                ret = Some(VfioMsiChange::Disable)
+            }
+        } else if len == 4 && offset == PCI_MSI_ADDRESS_LO && self.size == MSI_LENGTH_32BIT {
+            //write 32 bit message address
+            let value: [u8; 8] = [data[0], data[1], data[2], data[3], 0, 0, 0, 0];
+            self.address = u64::from_le_bytes(value);
+        } else if len == 4 && offset == PCI_MSI_ADDRESS_LO && self.size != MSI_LENGTH_32BIT {
+            // write 64 bit message address low part
+            let value: [u8; 8] = [data[0], data[1], data[2], data[3], 0, 0, 0, 0];
+            self.address &= !0xffffffff;
+            self.address |= u64::from_le_bytes(value);
+        } else if len == 4 && offset == PCI_MSI_ADDRESS_HI && self.size != MSI_LENGTH_32BIT {
+            //write 64 bit message address high part
+            let value: [u8; 8] = [0, 0, 0, 0, data[0], data[1], data[2], data[3]];
+            self.address &= 0xffffffff;
+            self.address |= u64::from_le_bytes(value);
+        } else if len == 8 && offset == PCI_MSI_ADDRESS_LO && self.size != MSI_LENGTH_32BIT {
+            // write 64 bit message address
+            let value: [u8; 8] = [
+                data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7],
+            ];
+            self.address = u64::from_le_bytes(value);
+        } else if len == 2
+            && ((offset == PCI_MSI_DATA_32 && self.size == MSI_LENGTH_32BIT)
+                || (offset == PCI_MSI_DATA_64 && self.size == MSI_LENGTH_64BIT_WITH_MASK)
+                || (offset == PCI_MSI_DATA_64 && self.size == MSI_LENGTH_64BIT_WITHOUT_MASK))
+        {
+            // write message data
+            let value: [u8; 2] = [data[0], data[1]];
+            self.data = u16::from_le_bytes(value);
+        }
+
+        ret
+    }
+
+    fn is_msi_enabled(&self) -> bool {
+        self.ctl & PCI_MSI_FLAGS_ENABLE == PCI_MSI_FLAGS_ENABLE
+    }
+}
+
 struct MmioInfo {
     bar_index: u32,
     start: u64,
@@ -96,6 +224,7 @@ pub struct VfioPciDevice {
     interrupt_resample_evt: Option<EventFd>,
     mmio_regions: Vec<MmioInfo>,
     io_regions: Vec<IoInfo>,
+    msi_cap: Option<VfioMsiCap>,
 }
 
 impl VfioPciDevice {
@@ -103,6 +232,8 @@ impl VfioPciDevice {
     pub fn new(device: VfioDevice) -> Self {
         let dev = Arc::new(device);
         let config = VfioPciConfig::new(Arc::clone(&dev));
+        let msi_cap = VfioMsiCap::new(&config);
+
         VfioPciDevice {
             device: dev,
             config,
@@ -111,6 +242,7 @@ impl VfioPciDevice {
             interrupt_resample_evt: None,
             mmio_regions: Vec::new(),
             io_regions: Vec::new(),
+            msi_cap,
         }
     }
 
@@ -139,7 +271,14 @@ impl PciDevice for VfioPciDevice {
     }
 
     fn keep_fds(&self) -> Vec<RawFd> {
-        self.device.keep_fds()
+        let mut fds = self.device.keep_fds();
+        if let Some(ref interrupt_evt) = self.interrupt_evt {
+            fds.push(interrupt_evt.as_raw_fd());
+        }
+        if let Some(ref interrupt_resample_evt) = self.interrupt_resample_evt {
+            fds.push(interrupt_resample_evt.as_raw_fd());
+        }
+        fds
     }
 
     fn assign_irq(
@@ -274,11 +413,30 @@ impl PciDevice for VfioPciDevice {
     }
 
     fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) {
-        self.device.region_write(
-            VFIO_PCI_CONFIG_REGION_INDEX,
-            data,
-            (reg_idx * 4) as u64 + offset,
-        )
+        let start = (reg_idx * 4) as u64 + offset;
+
+        if let Some(msi_cap) = self.msi_cap.as_mut() {
+            if msi_cap.is_msi_reg(start, data.len()) {
+                if let Some(ref interrupt_evt) = self.interrupt_evt {
+                    match msi_cap.write_msi_reg(start, data) {
+                        Some(VfioMsiChange::Enable) => {
+                            if let Err(e) = self.device.msi_enable(interrupt_evt) {
+                                error!("{}", e);
+                            }
+                        }
+                        Some(VfioMsiChange::Disable) => {
+                            if let Err(e) = self.device.msi_disable() {
+                                error!("{}", e);
+                            }
+                        }
+                        None => (),
+                    }
+                }
+            }
+        }
+
+        self.device
+            .region_write(VFIO_PCI_CONFIG_REGION_INDEX, data, start);
     }
 
     fn read_bar(&mut self, addr: u64, data: &mut [u8]) {
diff --git a/devices/src/vfio.rs b/devices/src/vfio.rs
index 966085f..cdaadca 100644
--- a/devices/src/vfio.rs
+++ b/devices/src/vfio.rs
@@ -14,8 +14,8 @@ use std::u32;
 
 use kvm::Vm;
 use sys_util::{
-    ioctl, ioctl_with_mut_ref, ioctl_with_ptr, ioctl_with_ref, ioctl_with_val, warn, Error,
-    GuestMemory,
+    ioctl, ioctl_with_mut_ref, ioctl_with_ptr, ioctl_with_ref, ioctl_with_val,
+    vec_with_array_field, warn, Error, EventFd, GuestMemory,
 };
 
 use vfio_sys::*;
@@ -38,6 +38,8 @@ pub enum VfioError {
     InvalidPath,
     IommuDmaMap(Error),
     IommuDmaUnmap(Error),
+    VfioMsiEnable(Error),
+    VfioMsiDisable(Error),
 }
 
 impl fmt::Display for VfioError {
@@ -59,6 +61,8 @@ impl fmt::Display for VfioError {
             VfioError::InvalidPath => write!(f,"invalid file path"),
             VfioError::IommuDmaMap(e) => write!(f, "failed to add guest memory map into iommu table: {}", e),
             VfioError::IommuDmaUnmap(e) => write!(f, "failed to remove guest memory map from iommu table: {}", e),
+            VfioError::VfioMsiEnable(e) => write!(f, "failed to enable vfio deviece's MSI: {}", e),
+            VfioError::VfioMsiDisable(e) => write!(f, "failed to disable vfio deviece's MSI: {}", e),
         }
     }
 }
@@ -306,6 +310,50 @@ impl VfioDevice {
         })
     }
 
+    /// enable vfio device's MSI and associate EventFd with this MSI
+    pub fn msi_enable(&self, fd: &EventFd) -> Result<(), VfioError> {
+        let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(1);
+        irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + mem::size_of::<u32>()) as u32;
+        irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+        irq_set[0].index = VFIO_PCI_MSI_IRQ_INDEX;
+        irq_set[0].start = 0;
+        irq_set[0].count = 1;
+
+        {
+            // irq_set.data could be none, bool or fd according to flags, so irq_set.data
+            // is u8 default, here irq_set.data is fd as u32, so 4 default u8 are combined
+            // together as u32. It is safe as enough space is reserved through
+            // vec_with_array_field(u32)<1>.
+            let fds = unsafe { irq_set[0].data.as_mut_slice(4) };
+            fds.copy_from_slice(&fd.as_raw_fd().to_le_bytes()[..]);
+        }
+
+        // Safe as we are the owner of self and irq_set which are valid value
+        let ret = unsafe { ioctl_with_ref(self, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
+        if ret < 0 {
+            Err(VfioError::VfioMsiEnable(get_error()))
+        } else {
+            Ok(())
+        }
+    }
+
+    pub fn msi_disable(&self) -> Result<(), VfioError> {
+        let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
+        irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
+        irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
+        irq_set[0].index = VFIO_PCI_MSI_IRQ_INDEX;
+        irq_set[0].start = 0;
+        irq_set[0].count = 0;
+
+        // Safe as we are the owner of self and irq_set which are valid value
+        let ret = unsafe { ioctl_with_ref(self, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) };
+        if ret < 0 {
+            Err(VfioError::VfioMsiDisable(get_error()))
+        } else {
+            Ok(())
+        }
+    }
+
     fn get_regions(dev: &File) -> Result<Vec<VfioRegion>, VfioError> {
         let mut regions: Vec<VfioRegion> = Vec::new();
         let mut dev_info = vfio_device_info {
diff --git a/kvm/src/lib.rs b/kvm/src/lib.rs
index bafc79d..b8475d3 100644
--- a/kvm/src/lib.rs
+++ b/kvm/src/lib.rs
@@ -23,8 +23,8 @@ use msg_socket::MsgOnSocket;
 #[allow(unused_imports)]
 use sys_util::{
     ioctl, ioctl_with_mut_ptr, ioctl_with_mut_ref, ioctl_with_ptr, ioctl_with_ref, ioctl_with_val,
-    pagesize, signal, warn, Error, EventFd, GuestAddress, GuestMemory, MemoryMapping,
-    MemoryMappingArena, Result,
+    pagesize, signal, vec_with_array_field, warn, Error, EventFd, GuestAddress, GuestMemory,
+    MemoryMapping, MemoryMappingArena, Result,
 };
 
 pub use crate::cap::*;
@@ -33,37 +33,6 @@ fn errno_result<T>() -> Result<T> {
     Err(Error::last())
 }
 
-// Returns a `Vec<T>` with a size in ytes at least as large as `size_in_bytes`.
-fn vec_with_size_in_bytes<T: Default>(size_in_bytes: usize) -> Vec<T> {
-    let rounded_size = (size_in_bytes + size_of::<T>() - 1) / size_of::<T>();
-    let mut v = Vec::with_capacity(rounded_size);
-    for _ in 0..rounded_size {
-        v.push(T::default())
-    }
-    v
-}
-
-// The kvm API has many structs that resemble the following `Foo` structure:
-//
-// ```
-// #[repr(C)]
-// struct Foo {
-//    some_data: u32
-//    entries: __IncompleteArrayField<__u32>,
-// }
-// ```
-//
-// In order to allocate such a structure, `size_of::<Foo>()` would be too small because it would not
-// include any space for `entries`. To make the allocation large enough while still being aligned
-// for `Foo`, a `Vec<Foo>` is created. Only the first element of `Vec<Foo>` would actually be used
-// as a `Foo`. The remaining memory in the `Vec<Foo>` is for `entries`, which must be contiguous
-// with `Foo`. This function is used to make the `Vec<Foo>` with enough space for `count` entries.
-fn vec_with_array_field<T: Default, F>(count: usize) -> Vec<T> {
-    let element_space = count * size_of::<F>();
-    let vec_size_bytes = size_of::<T>() + element_space;
-    vec_with_size_in_bytes(vec_size_bytes)
-}
-
 unsafe fn set_user_memory_region<F: AsRawFd>(
     fd: &F,
     slot: u32,
diff --git a/sys_util/src/struct_util.rs b/sys_util/src/struct_util.rs
index 3d8def2..22411a1 100644
--- a/sys_util/src/struct_util.rs
+++ b/sys_util/src/struct_util.rs
@@ -4,7 +4,38 @@
 
 use std;
 use std::io::Read;
-use std::mem;
+use std::mem::size_of;
+
+// Returns a `Vec<T>` with a size in ytes at least as large as `size_in_bytes`.
+fn vec_with_size_in_bytes<T: Default>(size_in_bytes: usize) -> Vec<T> {
+    let rounded_size = (size_in_bytes + size_of::<T>() - 1) / size_of::<T>();
+    let mut v = Vec::with_capacity(rounded_size);
+    for _ in 0..rounded_size {
+        v.push(T::default())
+    }
+    v
+}
+
+/// The kvm API has many structs that resemble the following `Foo` structure:
+///
+/// ```
+/// #[repr(C)]
+/// struct Foo {
+///    some_data: u32
+///    entries: __IncompleteArrayField<__u32>,
+/// }
+/// ```
+///
+/// In order to allocate such a structure, `size_of::<Foo>()` would be too small because it would not
+/// include any space for `entries`. To make the allocation large enough while still being aligned
+/// for `Foo`, a `Vec<Foo>` is created. Only the first element of `Vec<Foo>` would actually be used
+/// as a `Foo`. The remaining memory in the `Vec<Foo>` is for `entries`, which must be contiguous
+/// with `Foo`. This function is used to make the `Vec<Foo>` with enough space for `count` entries.
+pub fn vec_with_array_field<T: Default, F>(count: usize) -> Vec<T> {
+    let element_space = count * size_of::<F>();
+    let vec_size_bytes = size_of::<T>() + element_space;
+    vec_with_size_in_bytes(vec_size_bytes)
+}
 
 #[derive(Debug)]
 pub enum Error {
@@ -21,7 +52,7 @@ pub type Result<T> = std::result::Result<T, Error>;
 /// * `f` - The input to read from.  Often this is a file.
 /// * `out` - The struct to fill with data read from `f`.
 pub unsafe fn read_struct<T: Copy, F: Read>(f: &mut F, out: &mut T) -> Result<()> {
-    let out_slice = std::slice::from_raw_parts_mut(out as *mut T as *mut u8, mem::size_of::<T>());
+    let out_slice = std::slice::from_raw_parts_mut(out as *mut T as *mut u8, size_of::<T>());
     f.read_exact(out_slice).map_err(|_| Error::ReadStruct)?;
     Ok(())
 }
@@ -38,10 +69,8 @@ pub unsafe fn read_struct<T: Copy, F: Read>(f: &mut F, out: &mut T) -> Result<()
 pub unsafe fn read_struct_slice<T: Copy, F: Read>(f: &mut F, len: usize) -> Result<Vec<T>> {
     let mut out: Vec<T> = Vec::with_capacity(len);
     out.set_len(len);
-    let out_slice = std::slice::from_raw_parts_mut(
-        out.as_ptr() as *mut T as *mut u8,
-        mem::size_of::<T>() * len,
-    );
+    let out_slice =
+        std::slice::from_raw_parts_mut(out.as_ptr() as *mut T as *mut u8, size_of::<T>() * len);
     f.read_exact(out_slice).map_err(|_| Error::ReadStruct)?;
     Ok(out)
 }