From 45a94bedaea17d8ae95e860b8a4beb2e465c187e Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Sat, 21 Mar 2020 09:15:16 +0800 Subject: vhost-net: implement direct msix irq fd The current vhost-net msix irq injection flow is from vhost-kernel to crosvm vhost-net, then to the KVM for irq injection. It still need crosvm vhost-net to trigger irq, which is because the set_vring_call is not directly using the msix irq fd. To optimize this flow to be from vhost-kernel to KVM directly, need: 1. if the msix is enabled and unmasked, use the misx irq fd for the vring_call directly so that all the misx injection can directly to KVM from vhost-kernel. 2. if the msix is disabled or masked, use the indirect vhost_interrupt fd to let the crosvm to control the irq injection. BUG=None TEST=cargo test -p devices TEST=start crosvm with vhost-net, and run the iperf3 on the network without any issue Change-Id: Idb3427f69f23b728305ed63d88973156a03e7c6b Signed-off-by: Chuanxiao Dong Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2046452 Reviewed-by: Stephen Barber Tested-by: kokoro --- devices/src/pci/mod.rs | 2 +- devices/src/pci/msix.rs | 42 ++++++++-- devices/src/virtio/interrupt.rs | 2 +- devices/src/virtio/vhost/net.rs | 44 +++++++++++ devices/src/virtio/vhost/worker.rs | 133 +++++++++++++++++++++++++++++--- devices/src/virtio/virtio_device.rs | 4 +- devices/src/virtio/virtio_pci_device.rs | 7 +- 7 files changed, 212 insertions(+), 22 deletions(-) diff --git a/devices/src/pci/mod.rs b/devices/src/pci/mod.rs index 8c5380e..b9ebc19 100644 --- a/devices/src/pci/mod.rs +++ b/devices/src/pci/mod.rs @@ -15,7 +15,7 @@ mod pci_root; mod vfio_pci; pub use self::ac97::{Ac97Backend, Ac97Dev, Ac97Parameters}; -pub use self::msix::{MsixCap, MsixConfig}; +pub use self::msix::{MsixCap, MsixConfig, MsixStatus}; pub use self::pci_configuration::{ PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, PciCapability, PciCapabilityID, PciClassCode, PciConfiguration, PciHeaderType, PciProgrammingInterface, PciSerialBusSubClass, diff --git a/devices/src/pci/msix.rs b/devices/src/pci/msix.rs index 6c79b4a..b69114b 100644 --- a/devices/src/pci/msix.rs +++ b/devices/src/pci/msix.rs @@ -89,6 +89,12 @@ impl Display for MsixError { type MsixResult = std::result::Result; +pub enum MsixStatus { + Changed, + EntryChanged(usize), + NothingToDo, +} + impl MsixConfig { pub fn new(msix_vectors: u16, vm_socket: Arc) -> Self { assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE); @@ -123,6 +129,18 @@ impl MsixConfig { self.masked } + /// Check whether the Function Mask bit in MSIX table Message Control + /// word in set or not. + /// If true, the vector is masked. + /// If false, the vector is unmasked. + pub fn table_masked(&self, index: usize) -> bool { + if index >= self.table_entries.len() { + true + } else { + self.table_entries[index].masked() + } + } + /// Check whether the MSI-X Enable bit in Message Control word in set or not. /// if 1, the function is permitted to use MSI-X to request service. pub fn enabled(&self) -> bool { @@ -147,7 +165,7 @@ impl MsixConfig { /// Write to the MSI-X Capability Structure. /// Only the top 2 bits in Message Control Word are writable. - pub fn write_msix_capability(&mut self, offset: u64, data: &[u8]) { + pub fn write_msix_capability(&mut self, offset: u64, data: &[u8]) -> MsixStatus { if offset == 2 && data.len() == 2 { let reg = u16::from_le_bytes([data[0], data[1]]); let old_masked = self.masked; @@ -173,6 +191,9 @@ impl MsixConfig { self.inject_msix_and_clear_pba(index); } } + return MsixStatus::Changed; + } else if !old_masked && self.masked { + return MsixStatus::Changed; } } else { error!( @@ -180,6 +201,7 @@ impl MsixConfig { offset ); } + MsixStatus::NothingToDo } fn add_msi_route(&self, index: u16, gsi: u32) -> MsixResult<()> { @@ -304,7 +326,7 @@ impl MsixConfig { /// Vector Control: only bit 0 (Mask Bit) is not reserved: when this bit /// is set, the function is prohibited from sending a message using /// this MSI-X Table entry. - pub fn write_msix_table(&mut self, offset: u64, data: &[u8]) { + pub fn write_msix_table(&mut self, offset: u64, data: &[u8]) -> MsixStatus { let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; @@ -360,13 +382,17 @@ impl MsixConfig { // device. // Check if bit has been flipped - if !self.masked() - && old_entry.masked() - && !self.table_entries[index].masked() - && self.get_pba_bit(index as u16) == 1 - { - self.inject_msix_and_clear_pba(index); + if !self.masked() { + if old_entry.masked() && !self.table_entries[index].masked() { + if self.get_pba_bit(index as u16) == 1 { + self.inject_msix_and_clear_pba(index); + } + return MsixStatus::EntryChanged(index); + } else if !old_entry.masked() && self.table_entries[index].masked() { + return MsixStatus::EntryChanged(index); + } } + MsixStatus::NothingToDo } /// Read PBA Entries diff --git a/devices/src/virtio/interrupt.rs b/devices/src/virtio/interrupt.rs index f808d84..91a3942 100644 --- a/devices/src/virtio/interrupt.rs +++ b/devices/src/virtio/interrupt.rs @@ -13,7 +13,7 @@ pub struct Interrupt { interrupt_status: Arc, interrupt_evt: EventFd, interrupt_resample_evt: EventFd, - msix_config: Option>>, + pub msix_config: Option>>, config_msix_vector: u16, } diff --git a/devices/src/virtio/vhost/net.rs b/devices/src/virtio/vhost/net.rs index cc9521a..542423a 100644 --- a/devices/src/virtio/vhost/net.rs +++ b/devices/src/virtio/vhost/net.rs @@ -17,7 +17,9 @@ use virtio_sys::virtio_net; use super::control_socket::*; use super::worker::Worker; use super::{Error, Result}; +use crate::pci::MsixStatus; use crate::virtio::{Interrupt, Queue, VirtioDevice, TYPE_NET}; +use msg_socket::{MsgReceiver, MsgSender}; const QUEUE_SIZE: u16 = 256; const NUM_QUEUES: usize = 2; @@ -272,6 +274,48 @@ where } } + fn control_notify(&self, behavior: MsixStatus) { + if self.worker_thread.is_none() || self.request_socket.is_none() { + return; + } + if let Some(socket) = &self.request_socket { + match behavior { + MsixStatus::EntryChanged(index) => { + if let Err(e) = socket.send(&VhostDevRequest::MsixEntryChanged(index)) { + error!( + "{} failed to send VhostMsixEntryChanged request for entry {}: {:?}", + self.debug_label(), + index, + e + ); + return; + } + if let Err(e) = socket.recv() { + error!("{} failed to receive VhostMsixEntryChanged response for entry {}: {:?}", self.debug_label(), index, e); + } + } + MsixStatus::Changed => { + if let Err(e) = socket.send(&VhostDevRequest::MsixChanged) { + error!( + "{} failed to send VhostMsixChanged request: {:?}", + self.debug_label(), + e + ); + return; + } + if let Err(e) = socket.recv() { + error!( + "{} failed to receive VhostMsixChanged response {:?}", + self.debug_label(), + e + ); + } + } + _ => {} + } + } + } + fn reset(&mut self) -> bool { // Only kill the child if it claimed its eventfd. if self.workers_kill_evt.is_none() && self.kill_evt.write(1).is_err() { diff --git a/devices/src/virtio/vhost/worker.rs b/devices/src/virtio/vhost/worker.rs index 03c1066..ca02a63 100644 --- a/devices/src/virtio/vhost/worker.rs +++ b/devices/src/virtio/vhost/worker.rs @@ -4,17 +4,16 @@ use std::os::raw::c_ulonglong; -use sys_util::{EventFd, PollContext, PollToken}; +use sys_util::{error, Error as SysError, EventFd, PollContext, PollToken}; use vhost::Vhost; -use super::control_socket::VhostDevResponseSocket; +use super::control_socket::{VhostDevRequest, VhostDevResponse, VhostDevResponseSocket}; use super::{Error, Result}; use crate::virtio::{Interrupt, Queue}; +use libc::EIO; +use msg_socket::{MsgReceiver, MsgSender}; -/// Worker that takes care of running the vhost device. This mainly involves forwarding interrupts -/// from the vhost driver to the guest VM because crosvm only supports the virtio-mmio transport, -/// which requires a bit to be set in the interrupt status register before triggering the interrupt -/// and the vhost driver doesn't do this for us. +/// Worker that takes care of running the vhost device. pub struct Worker { interrupt: Interrupt, queues: Vec, @@ -91,9 +90,7 @@ impl Worker { self.vhost_handle .set_vring_base(queue_index, 0) .map_err(Error::VhostSetVringBase)?; - self.vhost_handle - .set_vring_call(queue_index, &self.vhost_interrupt[queue_index]) - .map_err(Error::VhostSetVringCall)?; + self.set_vring_call_for_entry(queue_index, queue.vector as usize)?; self.vhost_handle .set_vring_kick(queue_index, &queue_evts[queue_index]) .map_err(Error::VhostSetVringKick)?; @@ -106,6 +103,7 @@ impl Worker { VhostIrqi { index: usize }, InterruptResample, Kill, + ControlNotify, } let poll_ctx: PollContext = PollContext::build_with(&[ @@ -119,6 +117,11 @@ impl Worker { .add(vhost_int, Token::VhostIrqi { index }) .map_err(Error::CreatePollContext)?; } + if let Some(socket) = &self.response_socket { + poll_ctx + .add(socket, Token::ControlNotify) + .map_err(Error::CreatePollContext)?; + } 'poll: loop { let events = poll_ctx.wait().map_err(Error::PollError)?; @@ -138,10 +141,122 @@ impl Worker { let _ = self.kill_evt.read(); break 'poll; } + Token::ControlNotify => { + if let Some(socket) = &self.response_socket { + match socket.recv() { + Ok(VhostDevRequest::MsixEntryChanged(index)) => { + let mut qindex = 0; + for (queue_index, queue) in self.queues.iter().enumerate() { + if queue.vector == index as u16 { + qindex = queue_index; + break; + } + } + let response = + match self.set_vring_call_for_entry(qindex, index) { + Ok(()) => VhostDevResponse::Ok, + Err(e) => { + error!( + "Set vring call failed for masked entry {}: {:?}", + index, e + ); + VhostDevResponse::Err(SysError::new(EIO)) + } + }; + if let Err(e) = socket.send(&response) { + error!("Vhost failed to send VhostMsixEntryMasked Response for entry {}: {:?}", index, e); + } + } + Ok(VhostDevRequest::MsixChanged) => { + let response = match self.set_vring_calls() { + Ok(()) => VhostDevResponse::Ok, + Err(e) => { + error!("Set vring calls failed: {:?}", e); + VhostDevResponse::Err(SysError::new(EIO)) + } + }; + if let Err(e) = socket.send(&response) { + error!( + "Vhost failed to send VhostMsixMasked Response: {:?}", + e + ); + } + } + Err(e) => { + error!("Vhost failed to receive Control request: {:?}", e); + } + } + } + } } } } cleanup_vqs(&self.vhost_handle)?; Ok(()) } + + fn set_vring_call_for_entry(&self, queue_index: usize, vector: usize) -> Result<()> { + // No response_socket means it doesn't have any control related + // with the msix. Due to this, cannot use the direct irq fd but + // should fall back to indirect irq fd. + if self.response_socket.is_some() { + if let Some(msix_config) = &self.interrupt.msix_config { + let msix_config = msix_config.lock(); + let msix_masked = msix_config.masked(); + if msix_masked { + return Ok(()); + } + if !msix_config.table_masked(vector) { + if let Some(irqfd) = msix_config.get_irqfd(vector) { + self.vhost_handle + .set_vring_call(queue_index, irqfd) + .map_err(Error::VhostSetVringCall)?; + } else { + self.vhost_handle + .set_vring_call(queue_index, &self.vhost_interrupt[queue_index]) + .map_err(Error::VhostSetVringCall)?; + } + return Ok(()); + } + } + } + + self.vhost_handle + .set_vring_call(queue_index, &self.vhost_interrupt[queue_index]) + .map_err(Error::VhostSetVringCall)?; + Ok(()) + } + + fn set_vring_calls(&self) -> Result<()> { + if let Some(msix_config) = &self.interrupt.msix_config { + let msix_config = msix_config.lock(); + if msix_config.masked() { + for (queue_index, _) in self.queues.iter().enumerate() { + self.vhost_handle + .set_vring_call(queue_index, &self.vhost_interrupt[queue_index]) + .map_err(Error::VhostSetVringCall)?; + } + } else { + for (queue_index, queue) in self.queues.iter().enumerate() { + let vector = queue.vector as usize; + if !msix_config.table_masked(vector) { + if let Some(irqfd) = msix_config.get_irqfd(vector) { + self.vhost_handle + .set_vring_call(queue_index, irqfd) + .map_err(Error::VhostSetVringCall)?; + } else { + self.vhost_handle + .set_vring_call(queue_index, &self.vhost_interrupt[queue_index]) + .map_err(Error::VhostSetVringCall)?; + } + } else { + self.vhost_handle + .set_vring_call(queue_index, &self.vhost_interrupt[queue_index]) + .map_err(Error::VhostSetVringCall)?; + } + } + } + } + Ok(()) + } } diff --git a/devices/src/virtio/virtio_device.rs b/devices/src/virtio/virtio_device.rs index 806a98f..6eb5548 100644 --- a/devices/src/virtio/virtio_device.rs +++ b/devices/src/virtio/virtio_device.rs @@ -7,7 +7,7 @@ use std::os::unix::io::RawFd; use sys_util::{EventFd, GuestMemory}; use super::*; -use crate::pci::{PciBarConfiguration, PciCapability}; +use crate::pci::{MsixStatus, PciBarConfiguration, PciCapability}; /// Trait for virtio devices to be driven by a virtio transport. /// @@ -86,4 +86,6 @@ pub trait VirtioDevice: Send { /// Invoked when the device is sandboxed. fn on_device_sandboxed(&mut self) {} + + fn control_notify(&self, _behavior: MsixStatus) {} } diff --git a/devices/src/virtio/virtio_pci_device.rs b/devices/src/virtio/virtio_pci_device.rs index c6d6786..e63abe9 100644 --- a/devices/src/virtio/virtio_pci_device.rs +++ b/devices/src/virtio/virtio_pci_device.rs @@ -535,7 +535,8 @@ impl PciDevice for VirtioPciDevice { fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) { if let Some(msix_cap_reg_idx) = self.msix_cap_reg_idx { if msix_cap_reg_idx == reg_idx { - self.msix_config.lock().write_msix_capability(offset, data); + let behavior = self.msix_config.lock().write_msix_capability(offset, data); + self.device.control_notify(behavior); } } @@ -626,9 +627,11 @@ impl PciDevice for VirtioPciDevice { // Handled with ioeventfds. } o if MSIX_TABLE_BAR_OFFSET <= o && o < MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE => { - self.msix_config + let behavior = self + .msix_config .lock() .write_msix_table(o - MSIX_TABLE_BAR_OFFSET, data); + self.device.control_notify(behavior); } o if MSIX_PBA_BAR_OFFSET <= o && o < MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE => { self.msix_config -- cgit 1.4.1