summary refs log tree commit diff
diff options
context:
space:
mode:
authorChuanxiao Dong <chuanxiao.dong@intel.com>2020-03-21 09:15:16 +0800
committerCommit Bot <commit-bot@chromium.org>2020-03-25 17:41:42 +0000
commit45a94bedaea17d8ae95e860b8a4beb2e465c187e (patch)
tree4662c616cbd5231504bb960fde8c66d5cc4082f0
parent57b8201eeb75d9b2cbf424fa7a91d6738fe8610c (diff)
downloadcrosvm-45a94bedaea17d8ae95e860b8a4beb2e465c187e.tar
crosvm-45a94bedaea17d8ae95e860b8a4beb2e465c187e.tar.gz
crosvm-45a94bedaea17d8ae95e860b8a4beb2e465c187e.tar.bz2
crosvm-45a94bedaea17d8ae95e860b8a4beb2e465c187e.tar.lz
crosvm-45a94bedaea17d8ae95e860b8a4beb2e465c187e.tar.xz
crosvm-45a94bedaea17d8ae95e860b8a4beb2e465c187e.tar.zst
crosvm-45a94bedaea17d8ae95e860b8a4beb2e465c187e.zip
vhost-net: implement direct msix irq fd
The current vhost-net msix irq injection flow is from vhost-kernel
to crosvm vhost-net, then to the KVM for irq injection. It still need
crosvm vhost-net to trigger irq, which is because the set_vring_call
is not directly using the msix irq fd.

To optimize this flow to be from vhost-kernel to KVM directly, need:
1. if the msix is enabled and unmasked, use the misx irq fd for the
vring_call directly so that all the misx injection can directly to
KVM from vhost-kernel.
2. if the msix is disabled or masked, use the indirect vhost_interrupt
fd to let the crosvm to control the irq injection.

BUG=None
TEST=cargo test -p devices
TEST=start crosvm with vhost-net, and run the iperf3 on the network
without any issue

Change-Id: Idb3427f69f23b728305ed63d88973156a03e7c6b
Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.corp-partner.google.com>
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2046452
Reviewed-by: Stephen Barber <smbarber@chromium.org>
Tested-by: kokoro <noreply+kokoro@google.com>
-rw-r--r--devices/src/pci/mod.rs2
-rw-r--r--devices/src/pci/msix.rs42
-rw-r--r--devices/src/virtio/interrupt.rs2
-rw-r--r--devices/src/virtio/vhost/net.rs44
-rw-r--r--devices/src/virtio/vhost/worker.rs133
-rw-r--r--devices/src/virtio/virtio_device.rs4
-rw-r--r--devices/src/virtio/virtio_pci_device.rs7
7 files changed, 212 insertions, 22 deletions
diff --git a/devices/src/pci/mod.rs b/devices/src/pci/mod.rs
index 8c5380e..b9ebc19 100644
--- a/devices/src/pci/mod.rs
+++ b/devices/src/pci/mod.rs
@@ -15,7 +15,7 @@ mod pci_root;
 mod vfio_pci;
 
 pub use self::ac97::{Ac97Backend, Ac97Dev, Ac97Parameters};
-pub use self::msix::{MsixCap, MsixConfig};
+pub use self::msix::{MsixCap, MsixConfig, MsixStatus};
 pub use self::pci_configuration::{
     PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, PciCapability, PciCapabilityID,
     PciClassCode, PciConfiguration, PciHeaderType, PciProgrammingInterface, PciSerialBusSubClass,
diff --git a/devices/src/pci/msix.rs b/devices/src/pci/msix.rs
index 6c79b4a..b69114b 100644
--- a/devices/src/pci/msix.rs
+++ b/devices/src/pci/msix.rs
@@ -89,6 +89,12 @@ impl Display for MsixError {
 
 type MsixResult<T> = std::result::Result<T, MsixError>;
 
+pub enum MsixStatus {
+    Changed,
+    EntryChanged(usize),
+    NothingToDo,
+}
+
 impl MsixConfig {
     pub fn new(msix_vectors: u16, vm_socket: Arc<VmIrqRequestSocket>) -> Self {
         assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE);
@@ -123,6 +129,18 @@ impl MsixConfig {
         self.masked
     }
 
+    /// Check whether the Function Mask bit in MSIX table Message Control
+    /// word in set or not.
+    /// If true, the vector is masked.
+    /// If false, the vector is unmasked.
+    pub fn table_masked(&self, index: usize) -> bool {
+        if index >= self.table_entries.len() {
+            true
+        } else {
+            self.table_entries[index].masked()
+        }
+    }
+
     /// Check whether the MSI-X Enable bit in Message Control word in set or not.
     /// if 1, the function is permitted to use MSI-X to request service.
     pub fn enabled(&self) -> bool {
@@ -147,7 +165,7 @@ impl MsixConfig {
 
     /// Write to the MSI-X Capability Structure.
     /// Only the top 2 bits in Message Control Word are writable.
-    pub fn write_msix_capability(&mut self, offset: u64, data: &[u8]) {
+    pub fn write_msix_capability(&mut self, offset: u64, data: &[u8]) -> MsixStatus {
         if offset == 2 && data.len() == 2 {
             let reg = u16::from_le_bytes([data[0], data[1]]);
             let old_masked = self.masked;
@@ -173,6 +191,9 @@ impl MsixConfig {
                         self.inject_msix_and_clear_pba(index);
                     }
                 }
+                return MsixStatus::Changed;
+            } else if !old_masked && self.masked {
+                return MsixStatus::Changed;
             }
         } else {
             error!(
@@ -180,6 +201,7 @@ impl MsixConfig {
                 offset
             );
         }
+        MsixStatus::NothingToDo
     }
 
     fn add_msi_route(&self, index: u16, gsi: u32) -> MsixResult<()> {
@@ -304,7 +326,7 @@ impl MsixConfig {
     /// Vector Control: only bit 0 (Mask Bit) is not reserved: when this bit
     ///     is set, the function is prohibited from sending a message using
     ///     this MSI-X Table entry.
-    pub fn write_msix_table(&mut self, offset: u64, data: &[u8]) {
+    pub fn write_msix_table(&mut self, offset: u64, data: &[u8]) -> MsixStatus {
         let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
         let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
 
@@ -360,13 +382,17 @@ impl MsixConfig {
         // device.
 
         // Check if bit has been flipped
-        if !self.masked()
-            && old_entry.masked()
-            && !self.table_entries[index].masked()
-            && self.get_pba_bit(index as u16) == 1
-        {
-            self.inject_msix_and_clear_pba(index);
+        if !self.masked() {
+            if old_entry.masked() && !self.table_entries[index].masked() {
+                if self.get_pba_bit(index as u16) == 1 {
+                    self.inject_msix_and_clear_pba(index);
+                }
+                return MsixStatus::EntryChanged(index);
+            } else if !old_entry.masked() && self.table_entries[index].masked() {
+                return MsixStatus::EntryChanged(index);
+            }
         }
+        MsixStatus::NothingToDo
     }
 
     /// Read PBA Entries
diff --git a/devices/src/virtio/interrupt.rs b/devices/src/virtio/interrupt.rs
index f808d84..91a3942 100644
--- a/devices/src/virtio/interrupt.rs
+++ b/devices/src/virtio/interrupt.rs
@@ -13,7 +13,7 @@ pub struct Interrupt {
     interrupt_status: Arc<AtomicUsize>,
     interrupt_evt: EventFd,
     interrupt_resample_evt: EventFd,
-    msix_config: Option<Arc<Mutex<MsixConfig>>>,
+    pub msix_config: Option<Arc<Mutex<MsixConfig>>>,
     config_msix_vector: u16,
 }
 
diff --git a/devices/src/virtio/vhost/net.rs b/devices/src/virtio/vhost/net.rs
index cc9521a..542423a 100644
--- a/devices/src/virtio/vhost/net.rs
+++ b/devices/src/virtio/vhost/net.rs
@@ -17,7 +17,9 @@ use virtio_sys::virtio_net;
 use super::control_socket::*;
 use super::worker::Worker;
 use super::{Error, Result};
+use crate::pci::MsixStatus;
 use crate::virtio::{Interrupt, Queue, VirtioDevice, TYPE_NET};
+use msg_socket::{MsgReceiver, MsgSender};
 
 const QUEUE_SIZE: u16 = 256;
 const NUM_QUEUES: usize = 2;
@@ -272,6 +274,48 @@ where
         }
     }
 
+    fn control_notify(&self, behavior: MsixStatus) {
+        if self.worker_thread.is_none() || self.request_socket.is_none() {
+            return;
+        }
+        if let Some(socket) = &self.request_socket {
+            match behavior {
+                MsixStatus::EntryChanged(index) => {
+                    if let Err(e) = socket.send(&VhostDevRequest::MsixEntryChanged(index)) {
+                        error!(
+                            "{} failed to send VhostMsixEntryChanged request for entry {}: {:?}",
+                            self.debug_label(),
+                            index,
+                            e
+                        );
+                        return;
+                    }
+                    if let Err(e) = socket.recv() {
+                        error!("{} failed to receive VhostMsixEntryChanged response for entry {}: {:?}", self.debug_label(), index, e);
+                    }
+                }
+                MsixStatus::Changed => {
+                    if let Err(e) = socket.send(&VhostDevRequest::MsixChanged) {
+                        error!(
+                            "{} failed to send VhostMsixChanged request: {:?}",
+                            self.debug_label(),
+                            e
+                        );
+                        return;
+                    }
+                    if let Err(e) = socket.recv() {
+                        error!(
+                            "{} failed to receive VhostMsixChanged response {:?}",
+                            self.debug_label(),
+                            e
+                        );
+                    }
+                }
+                _ => {}
+            }
+        }
+    }
+
     fn reset(&mut self) -> bool {
         // Only kill the child if it claimed its eventfd.
         if self.workers_kill_evt.is_none() && self.kill_evt.write(1).is_err() {
diff --git a/devices/src/virtio/vhost/worker.rs b/devices/src/virtio/vhost/worker.rs
index 03c1066..ca02a63 100644
--- a/devices/src/virtio/vhost/worker.rs
+++ b/devices/src/virtio/vhost/worker.rs
@@ -4,17 +4,16 @@
 
 use std::os::raw::c_ulonglong;
 
-use sys_util::{EventFd, PollContext, PollToken};
+use sys_util::{error, Error as SysError, EventFd, PollContext, PollToken};
 use vhost::Vhost;
 
-use super::control_socket::VhostDevResponseSocket;
+use super::control_socket::{VhostDevRequest, VhostDevResponse, VhostDevResponseSocket};
 use super::{Error, Result};
 use crate::virtio::{Interrupt, Queue};
+use libc::EIO;
+use msg_socket::{MsgReceiver, MsgSender};
 
-/// Worker that takes care of running the vhost device.  This mainly involves forwarding interrupts
-/// from the vhost driver to the guest VM because crosvm only supports the virtio-mmio transport,
-/// which requires a bit to be set in the interrupt status register before triggering the interrupt
-/// and the vhost driver doesn't do this for us.
+/// Worker that takes care of running the vhost device.
 pub struct Worker<T: Vhost> {
     interrupt: Interrupt,
     queues: Vec<Queue>,
@@ -91,9 +90,7 @@ impl<T: Vhost> Worker<T> {
             self.vhost_handle
                 .set_vring_base(queue_index, 0)
                 .map_err(Error::VhostSetVringBase)?;
-            self.vhost_handle
-                .set_vring_call(queue_index, &self.vhost_interrupt[queue_index])
-                .map_err(Error::VhostSetVringCall)?;
+            self.set_vring_call_for_entry(queue_index, queue.vector as usize)?;
             self.vhost_handle
                 .set_vring_kick(queue_index, &queue_evts[queue_index])
                 .map_err(Error::VhostSetVringKick)?;
@@ -106,6 +103,7 @@ impl<T: Vhost> Worker<T> {
             VhostIrqi { index: usize },
             InterruptResample,
             Kill,
+            ControlNotify,
         }
 
         let poll_ctx: PollContext<Token> = PollContext::build_with(&[
@@ -119,6 +117,11 @@ impl<T: Vhost> Worker<T> {
                 .add(vhost_int, Token::VhostIrqi { index })
                 .map_err(Error::CreatePollContext)?;
         }
+        if let Some(socket) = &self.response_socket {
+            poll_ctx
+                .add(socket, Token::ControlNotify)
+                .map_err(Error::CreatePollContext)?;
+        }
 
         'poll: loop {
             let events = poll_ctx.wait().map_err(Error::PollError)?;
@@ -138,10 +141,122 @@ impl<T: Vhost> Worker<T> {
                         let _ = self.kill_evt.read();
                         break 'poll;
                     }
+                    Token::ControlNotify => {
+                        if let Some(socket) = &self.response_socket {
+                            match socket.recv() {
+                                Ok(VhostDevRequest::MsixEntryChanged(index)) => {
+                                    let mut qindex = 0;
+                                    for (queue_index, queue) in self.queues.iter().enumerate() {
+                                        if queue.vector == index as u16 {
+                                            qindex = queue_index;
+                                            break;
+                                        }
+                                    }
+                                    let response =
+                                        match self.set_vring_call_for_entry(qindex, index) {
+                                            Ok(()) => VhostDevResponse::Ok,
+                                            Err(e) => {
+                                                error!(
+                                                "Set vring call failed for masked entry {}: {:?}",
+                                                index, e
+                                            );
+                                                VhostDevResponse::Err(SysError::new(EIO))
+                                            }
+                                        };
+                                    if let Err(e) = socket.send(&response) {
+                                        error!("Vhost failed to send VhostMsixEntryMasked Response for entry {}: {:?}", index, e);
+                                    }
+                                }
+                                Ok(VhostDevRequest::MsixChanged) => {
+                                    let response = match self.set_vring_calls() {
+                                        Ok(()) => VhostDevResponse::Ok,
+                                        Err(e) => {
+                                            error!("Set vring calls failed: {:?}", e);
+                                            VhostDevResponse::Err(SysError::new(EIO))
+                                        }
+                                    };
+                                    if let Err(e) = socket.send(&response) {
+                                        error!(
+                                            "Vhost failed to send VhostMsixMasked Response: {:?}",
+                                            e
+                                        );
+                                    }
+                                }
+                                Err(e) => {
+                                    error!("Vhost failed to receive Control request: {:?}", e);
+                                }
+                            }
+                        }
+                    }
                 }
             }
         }
         cleanup_vqs(&self.vhost_handle)?;
         Ok(())
     }
+
+    fn set_vring_call_for_entry(&self, queue_index: usize, vector: usize) -> Result<()> {
+        // No response_socket means it doesn't have any control related
+        // with the msix. Due to this, cannot use the direct irq fd but
+        // should fall back to indirect irq fd.
+        if self.response_socket.is_some() {
+            if let Some(msix_config) = &self.interrupt.msix_config {
+                let msix_config = msix_config.lock();
+                let msix_masked = msix_config.masked();
+                if msix_masked {
+                    return Ok(());
+                }
+                if !msix_config.table_masked(vector) {
+                    if let Some(irqfd) = msix_config.get_irqfd(vector) {
+                        self.vhost_handle
+                            .set_vring_call(queue_index, irqfd)
+                            .map_err(Error::VhostSetVringCall)?;
+                    } else {
+                        self.vhost_handle
+                            .set_vring_call(queue_index, &self.vhost_interrupt[queue_index])
+                            .map_err(Error::VhostSetVringCall)?;
+                    }
+                    return Ok(());
+                }
+            }
+        }
+
+        self.vhost_handle
+            .set_vring_call(queue_index, &self.vhost_interrupt[queue_index])
+            .map_err(Error::VhostSetVringCall)?;
+        Ok(())
+    }
+
+    fn set_vring_calls(&self) -> Result<()> {
+        if let Some(msix_config) = &self.interrupt.msix_config {
+            let msix_config = msix_config.lock();
+            if msix_config.masked() {
+                for (queue_index, _) in self.queues.iter().enumerate() {
+                    self.vhost_handle
+                        .set_vring_call(queue_index, &self.vhost_interrupt[queue_index])
+                        .map_err(Error::VhostSetVringCall)?;
+                }
+            } else {
+                for (queue_index, queue) in self.queues.iter().enumerate() {
+                    let vector = queue.vector as usize;
+                    if !msix_config.table_masked(vector) {
+                        if let Some(irqfd) = msix_config.get_irqfd(vector) {
+                            self.vhost_handle
+                                .set_vring_call(queue_index, irqfd)
+                                .map_err(Error::VhostSetVringCall)?;
+                        } else {
+                            self.vhost_handle
+                                .set_vring_call(queue_index, &self.vhost_interrupt[queue_index])
+                                .map_err(Error::VhostSetVringCall)?;
+                        }
+                    } else {
+                        self.vhost_handle
+                            .set_vring_call(queue_index, &self.vhost_interrupt[queue_index])
+                            .map_err(Error::VhostSetVringCall)?;
+                    }
+                }
+            }
+        }
+        Ok(())
+    }
 }
diff --git a/devices/src/virtio/virtio_device.rs b/devices/src/virtio/virtio_device.rs
index 806a98f..6eb5548 100644
--- a/devices/src/virtio/virtio_device.rs
+++ b/devices/src/virtio/virtio_device.rs
@@ -7,7 +7,7 @@ use std::os::unix::io::RawFd;
 use sys_util::{EventFd, GuestMemory};
 
 use super::*;
-use crate::pci::{PciBarConfiguration, PciCapability};
+use crate::pci::{MsixStatus, PciBarConfiguration, PciCapability};
 
 /// Trait for virtio devices to be driven by a virtio transport.
 ///
@@ -86,4 +86,6 @@ pub trait VirtioDevice: Send {
 
     /// Invoked when the device is sandboxed.
     fn on_device_sandboxed(&mut self) {}
+
+    fn control_notify(&self, _behavior: MsixStatus) {}
 }
diff --git a/devices/src/virtio/virtio_pci_device.rs b/devices/src/virtio/virtio_pci_device.rs
index c6d6786..e63abe9 100644
--- a/devices/src/virtio/virtio_pci_device.rs
+++ b/devices/src/virtio/virtio_pci_device.rs
@@ -535,7 +535,8 @@ impl PciDevice for VirtioPciDevice {
     fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) {
         if let Some(msix_cap_reg_idx) = self.msix_cap_reg_idx {
             if msix_cap_reg_idx == reg_idx {
-                self.msix_config.lock().write_msix_capability(offset, data);
+                let behavior = self.msix_config.lock().write_msix_capability(offset, data);
+                self.device.control_notify(behavior);
             }
         }
 
@@ -626,9 +627,11 @@ impl PciDevice for VirtioPciDevice {
                 // Handled with ioeventfds.
             }
             o if MSIX_TABLE_BAR_OFFSET <= o && o < MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE => {
-                self.msix_config
+                let behavior = self
+                    .msix_config
                     .lock()
                     .write_msix_table(o - MSIX_TABLE_BAR_OFFSET, data);
+                self.device.control_notify(behavior);
             }
             o if MSIX_PBA_BAR_OFFSET <= o && o < MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE => {
                 self.msix_config