summary refs log tree commit diff
diff options
context:
space:
mode:
authorZhuocheng Ding <zhuocheng.ding@intel.corp-partner.google.com>2019-12-02 15:50:28 +0800
committerCommit Bot <commit-bot@chromium.org>2020-03-05 13:12:23 +0000
commitb9f4c9bca30e65eacfb055951fa994ad5127a8f0 (patch)
tree8c8c886824f819620cf6d5c8b39ee4571ed7fb9b
parent2f7dabbd6a0d8620e4b19b92cdae24c08e4c7ccc (diff)
downloadcrosvm-b9f4c9bca30e65eacfb055951fa994ad5127a8f0.tar
crosvm-b9f4c9bca30e65eacfb055951fa994ad5127a8f0.tar.gz
crosvm-b9f4c9bca30e65eacfb055951fa994ad5127a8f0.tar.bz2
crosvm-b9f4c9bca30e65eacfb055951fa994ad5127a8f0.tar.lz
crosvm-b9f4c9bca30e65eacfb055951fa994ad5127a8f0.tar.xz
crosvm-b9f4c9bca30e65eacfb055951fa994ad5127a8f0.tar.zst
crosvm-b9f4c9bca30e65eacfb055951fa994ad5127a8f0.zip
crosvm: Add plumbing for split-irqchip interrupts
Devices use irqfd to inject interrupts, we listen to them in the main
thread and activate userspace pic/ioapic accordingly.

BUG=chromium:908689
TEST=lanuch linux guest with `--split-irqchip` flag

Change-Id: If30d17ce7ec9e26dba782c89cc1b9b2ff897a70d
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/1945798
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Stephen Barber <smbarber@chromium.org>
Reviewed-by: Daniel Verkamp <dverkamp@chromium.org>
Commit-Queue: Zhuocheng Ding <zhuocheng.ding@intel.corp-partner.google.com>
-rw-r--r--aarch64/src/lib.rs12
-rw-r--r--arch/src/lib.rs24
-rw-r--r--devices/src/ioapic.rs13
-rw-r--r--devices/src/pic.rs37
-rw-r--r--devices/src/split_irqchip_common.rs34
-rw-r--r--src/linux.rs74
-rw-r--r--x86_64/src/lib.rs73
7 files changed, 219 insertions, 48 deletions
diff --git a/aarch64/src/lib.rs b/aarch64/src/lib.rs
index d4c4a50..f8a36b9 100644
--- a/aarch64/src/lib.rs
+++ b/aarch64/src/lib.rs
@@ -245,9 +245,14 @@ impl arch::LinuxArch for AArch64 {
 
         let pci_devices = create_devices(&mem, &mut vm, &mut resources, &exit_evt)
             .map_err(|e| Error::CreateDevices(Box::new(e)))?;
-        let (pci, pci_irqs, pid_debug_label_map) =
-            arch::generate_pci_root(pci_devices, &mut mmio_bus, &mut resources, &mut vm)
-                .map_err(Error::CreatePciRoot)?;
+        let (pci, pci_irqs, pid_debug_label_map) = arch::generate_pci_root(
+            pci_devices,
+            &mut None,
+            &mut mmio_bus,
+            &mut resources,
+            &mut vm,
+        )
+        .map_err(Error::CreatePciRoot)?;
         let pci_bus = Arc::new(Mutex::new(PciConfigMmio::new(pci)));
 
         // ARM doesn't really use the io bus like x86, so just create an empty bus.
@@ -317,6 +322,7 @@ impl arch::LinuxArch for AArch64 {
             vcpu_affinity,
             irq_chip,
             split_irqchip: None,
+            gsi_relay: None,
             io_bus,
             mmio_bus,
             pid_debug_label_map,
diff --git a/arch/src/lib.rs b/arch/src/lib.rs
index 5112798..ab08c21 100644
--- a/arch/src/lib.rs
+++ b/arch/src/lib.rs
@@ -15,6 +15,7 @@ use std::os::unix::io::AsRawFd;
 use std::path::PathBuf;
 use std::sync::Arc;
 
+use devices::split_irqchip_common::GsiRelay;
 use devices::virtio::VirtioDevice;
 use devices::{
     Bus, BusDevice, BusError, PciDevice, PciDeviceError, PciInterruptPin, PciRoot, ProxyDevice,
@@ -62,6 +63,7 @@ pub struct RunnableLinuxVm {
     pub vcpu_affinity: Vec<usize>,
     pub irq_chip: Option<File>,
     pub split_irqchip: Option<(Arc<Mutex<devices::Pic>>, Arc<Mutex<devices::Ioapic>>)>,
+    pub gsi_relay: Option<Arc<GsiRelay>>,
     pub io_bus: Bus,
     pub mmio_bus: Bus,
     pub pid_debug_label_map: BTreeMap<u32, String>,
@@ -118,6 +120,8 @@ pub enum DeviceRegistrationError {
     CreatePipe(sys_util::Error),
     // Unable to create serial device from serial parameters
     CreateSerialDevice(devices::SerialError),
+    /// Could not clone an event fd.
+    EventFdClone(sys_util::Error),
     /// Could not create an event fd.
     EventFdCreate(sys_util::Error),
     /// Could not add a device to the mmio bus.
@@ -149,6 +153,7 @@ impl Display for DeviceRegistrationError {
             CreatePipe(e) => write!(f, "failed to create pipe: {}", e),
             CreateSerialDevice(e) => write!(f, "failed to create serial device: {}", e),
             Cmdline(e) => write!(f, "unable to add device to kernel command line: {}", e),
+            EventFdClone(e) => write!(f, "failed to clone eventfd: {}", e),
             EventFdCreate(e) => write!(f, "failed to create eventfd: {}", e),
             MmioInsert(e) => write!(f, "failed to add to mmio bus: {}", e),
             RegisterIoevent(e) => write!(f, "failed to register ioevent to VM: {}", e),
@@ -166,6 +171,7 @@ impl Display for DeviceRegistrationError {
 /// Creates a root PCI device for use by this Vm.
 pub fn generate_pci_root(
     devices: Vec<(Box<dyn PciDevice>, Option<Minijail>)>,
+    gsi_relay: &mut Option<GsiRelay>,
     mmio_bus: &mut Bus,
     resources: &mut SystemAllocator,
     vm: &mut Vm,
@@ -191,10 +197,22 @@ pub fn generate_pci_root(
             1 => PciInterruptPin::IntB,
             2 => PciInterruptPin::IntC,
             3 => PciInterruptPin::IntD,
-            _ => panic!(""), // Obviously not possible, but the compiler is not smart enough.
+            _ => unreachable!(), // Obviously not possible, but the compiler is not smart enough.
         };
-        vm.register_irqfd_resample(&irqfd, &irq_resample_fd, irq_num)
-            .map_err(DeviceRegistrationError::RegisterIrqfd)?;
+        if let Some(relay) = gsi_relay {
+            relay.register_irqfd_resample(
+                irqfd
+                    .try_clone()
+                    .map_err(DeviceRegistrationError::EventFdClone)?,
+                irq_resample_fd
+                    .try_clone()
+                    .map_err(DeviceRegistrationError::EventFdClone)?,
+                irq_num as usize,
+            );
+        } else {
+            vm.register_irqfd_resample(&irqfd, &irq_resample_fd, irq_num)
+                .map_err(DeviceRegistrationError::RegisterIrqfd)?;
+        }
         keep_fds.push(irqfd.as_raw_fd());
         keep_fds.push(irq_resample_fd.as_raw_fd());
         device.assign_irq(irqfd, irq_resample_fd, irq_num, pci_irq_pin);
diff --git a/devices/src/ioapic.rs b/devices/src/ioapic.rs
index e1fbfb8..09ccb89 100644
--- a/devices/src/ioapic.rs
+++ b/devices/src/ioapic.rs
@@ -10,6 +10,7 @@ use crate::BusDevice;
 use bit_field::*;
 use kvm::Vm;
 use msg_socket::{MsgReceiver, MsgSender};
+use std::sync::Arc;
 use sys_util::{error, warn, EventFd, Result};
 use vm_control::{VmIrqRequest, VmIrqRequestSocket, VmIrqResponse};
 
@@ -89,6 +90,7 @@ pub struct Ioapic {
     redirect_table: [RedirectionTableEntry; kvm::NUM_IOAPIC_PINS],
     // IOREGSEL is technically 32 bits, but only bottom 8 are writable: all others are fixed to 0.
     ioregsel: u8,
+    relay: Arc<GsiRelay>,
     irqfd: Vec<EventFd>,
     socket: VmIrqRequestSocket,
 }
@@ -166,11 +168,16 @@ impl Ioapic {
             current_interrupt_level_bitmap: 0,
             redirect_table: entries,
             ioregsel: 0,
+            relay: Default::default(),
             irqfd,
             socket,
         })
     }
 
+    pub fn register_relay(&mut self, relay: Arc<GsiRelay>) {
+        self.relay = relay;
+    }
+
     // The ioapic must be informed about EOIs in order to avoid sending multiple interrupts of the
     // same type at the same time.
     pub fn end_of_interrupt(&mut self, vector: u8) {
@@ -183,6 +190,12 @@ impl Ioapic {
             if self.redirect_table[i].get_vector() == vector
                 && self.redirect_table[i].get_trigger_mode() == TriggerMode::Level
             {
+                if self.relay.irqfd_resample[i].is_some() {
+                    self.service_irq(i, false);
+                }
+                if let Some(resample_evt) = &self.relay.irqfd_resample[i] {
+                    resample_evt.write(1).unwrap();
+                }
                 self.redirect_table[i].set_remote_irr(false);
             }
             // There is an inherent race condition in hardware if the OS is finished processing an
diff --git a/devices/src/pic.rs b/devices/src/pic.rs
index c18abef..f562be6 100644
--- a/devices/src/pic.rs
+++ b/devices/src/pic.rs
@@ -12,7 +12,9 @@
 // For the purposes of both using more descriptive terms and avoiding terms with lots of charged
 // emotional context, this file refers to them instead as "primary" and "secondary" PICs.
 
+use crate::split_irqchip_common::GsiRelay;
 use crate::BusDevice;
+use std::sync::Arc;
 use sys_util::{debug, warn};
 
 #[repr(usize)]
@@ -30,7 +32,7 @@ enum PicInitState {
     Icw4 = 3,
 }
 
-#[derive(Debug, Default, Clone, Copy, PartialEq)]
+#[derive(Default)]
 struct PicState {
     last_irr: u8,     // Edge detection.
     irr: u8,          // Interrupt Request Register.
@@ -53,6 +55,8 @@ struct PicState {
     elcr: u8,
     elcr_mask: u8,
     init_state: Option<PicInitState>,
+    is_primary: bool,
+    relay: Arc<GsiRelay>,
 }
 
 pub struct Pic {
@@ -176,12 +180,18 @@ impl Pic {
         // that should be masked here. In this case, bits 8 - 8 = 0 and 13 - 8 = 5.
         secondary_pic.elcr_mask = !((1 << 0) | (1 << 5));
 
+        primary_pic.is_primary = true;
         Pic {
             interrupt_request: false,
             pics: [primary_pic, secondary_pic],
         }
     }
 
+    pub fn register_relay(&mut self, relay: Arc<GsiRelay>) {
+        self.pics[0].relay = relay.clone();
+        self.pics[1].relay = relay;
+    }
+
     pub fn service_irq(&mut self, irq: u8, level: bool) -> bool {
         assert!(irq <= 15, "Unexpectedly high value irq: {} vs 15", irq);
 
@@ -391,6 +401,11 @@ impl Pic {
     fn clear_isr(pic: &mut PicState, irq: u8) {
         assert!(irq <= 7, "Unexpectedly high value for irq: {} vs 7", irq);
         pic.isr &= !(1 << irq);
+        Pic::set_irq_internal(pic, irq, false);
+        let irq = if pic.is_primary { irq } else { irq + 8 };
+        if let Some(resample_evt) = &pic.relay.irqfd_resample[irq as usize] {
+            resample_evt.write(1).unwrap();
+        }
     }
 
     fn update_irq(&mut self) -> bool {
@@ -1088,26 +1103,6 @@ mod tests {
         assert_eq!(data.pic.pics[PicSelect::Primary as usize].priority_add, 6);
     }
 
-    /// Verify that no-op doesn't change state.
-    #[test]
-    fn no_op_ocw2() {
-        let mut data = set_up();
-        icw_init_both_with_icw4(&mut data.pic, FULLY_NESTED_NO_AUTO_EOI);
-
-        // TODO(mutexlox): Verify APIC interaction when it is implemented.
-        data.pic.service_irq(/*irq=*/ 5, /*level=*/ true);
-        assert_eq!(data.pic.get_external_interrupt(), Some(0x08 + 5));
-        data.pic.service_irq(/*irq=*/ 5, /*level=*/ false);
-
-        let orig = data.pic.pics[PicSelect::Primary as usize].clone();
-
-        // Run a no-op.
-        data.pic.write(PIC_PRIMARY_COMMAND, &[0x40]);
-
-        // Nothing should have changed.
-        assert_eq!(orig, data.pic.pics[PicSelect::Primary as usize]);
-    }
-
     /// Tests cascade IRQ that happens on secondary PIC.
     #[test]
     fn cascade_irq() {
diff --git a/devices/src/split_irqchip_common.rs b/devices/src/split_irqchip_common.rs
index b54c35a..1e513f2 100644
--- a/devices/src/split_irqchip_common.rs
+++ b/devices/src/split_irqchip_common.rs
@@ -5,6 +5,7 @@
 // Common constants and types used for Split IRQ chip devices (e.g. PIC, PIT, IOAPIC).
 
 use bit_field::*;
+use sys_util::EventFd;
 
 #[bitfield]
 #[derive(Clone, Copy, Debug, PartialEq)]
@@ -58,3 +59,36 @@ pub struct MsiDataMessage {
     trigger: TriggerMode,
     reserved2: BitField16,
 }
+
+/// Acts as a relay of interrupt signals between devices and IRQ chips.
+#[derive(Default)]
+pub struct GsiRelay {
+    pub irqfd: [Option<EventFd>; kvm::NUM_IOAPIC_PINS],
+    pub irqfd_resample: [Option<EventFd>; kvm::NUM_IOAPIC_PINS],
+}
+
+impl GsiRelay {
+    pub fn new() -> GsiRelay {
+        GsiRelay {
+            irqfd: Default::default(),
+            irqfd_resample: Default::default(),
+        }
+    }
+
+    pub fn register_irqfd(&mut self, evt: EventFd, gsi: usize) {
+        if gsi >= kvm::NUM_IOAPIC_PINS {
+            // Invalid gsi; ignore.
+            return;
+        }
+        self.irqfd[gsi] = Some(evt);
+    }
+
+    pub fn register_irqfd_resample(&mut self, evt: EventFd, resample_evt: EventFd, gsi: usize) {
+        if gsi >= kvm::NUM_IOAPIC_PINS {
+            // Invalid gsi; ignore.
+            return;
+        }
+        self.irqfd[gsi] = Some(evt);
+        self.irqfd_resample[gsi] = Some(resample_evt);
+    }
+}
diff --git a/src/linux.rs b/src/linux.rs
index a181e52..97f691f 100644
--- a/src/linux.rs
+++ b/src/linux.rs
@@ -1699,6 +1699,7 @@ fn run_control(
         Suspend,
         ChildSignal,
         CheckAvailableMemory,
+        IrqFd { gsi: usize },
         LowMemory,
         LowmemTimer,
         VmControlServer,
@@ -1749,6 +1750,16 @@ fn run_control(
         .add(&freemem_timer, Token::CheckAvailableMemory)
         .map_err(Error::PollContextAdd)?;
 
+    if let Some(gsi_relay) = &linux.gsi_relay {
+        for (gsi, evt) in gsi_relay.irqfd.into_iter().enumerate() {
+            if let Some(evt) = evt {
+                poll_ctx
+                    .add(evt, Token::IrqFd { gsi })
+                    .map_err(Error::PollContextAdd)?;
+            }
+        }
+    }
+
     // Used to add jitter to timer values so that we don't have a thundering herd problem when
     // multiple VMs are running.
     let mut simple_rng = SimpleRng::new(
@@ -1787,6 +1798,7 @@ fn run_control(
     }
     vcpu_thread_barrier.wait();
 
+    let mut ioapic_delayed = Vec::<usize>::default();
     'poll: loop {
         let events = {
             match poll_ctx.wait() {
@@ -1798,6 +1810,26 @@ fn run_control(
             }
         };
 
+        ioapic_delayed.retain(|&gsi| {
+            if let Some((_, ioapic)) = &linux.split_irqchip {
+                if let Ok(mut ioapic) = ioapic.try_lock() {
+                    // The unwrap will never fail because gsi_relay is Some iff split_irqchip is
+                    // Some.
+                    if linux.gsi_relay.as_ref().unwrap().irqfd_resample[gsi].is_some() {
+                        ioapic.service_irq(gsi, true);
+                    } else {
+                        ioapic.service_irq(gsi, true);
+                        ioapic.service_irq(gsi, false);
+                    }
+                    false
+                } else {
+                    true
+                }
+            } else {
+                true
+            }
+        });
+
         let mut vm_control_indices_to_remove = Vec::new();
         for event in events.iter_readable() {
             match event.token() {
@@ -1861,6 +1893,47 @@ fn run_control(
                         }
                     }
                 }
+                Token::IrqFd { gsi } => {
+                    if let Some((pic, ioapic)) = &linux.split_irqchip {
+                        // This will never fail because gsi_relay is Some iff split_irqchip is
+                        // Some.
+                        let gsi_relay = linux.gsi_relay.as_ref().unwrap();
+                        if let Some(eventfd) = &gsi_relay.irqfd[gsi] {
+                            eventfd.read().unwrap();
+                        } else {
+                            warn!(
+                                "irqfd {} not found in GSI relay, should be impossible.",
+                                gsi
+                            );
+                        }
+
+                        let mut pic = pic.lock();
+                        if gsi_relay.irqfd_resample[gsi].is_some() {
+                            pic.service_irq(gsi as u8, true);
+                        } else {
+                            pic.service_irq(gsi as u8, true);
+                            pic.service_irq(gsi as u8, false);
+                        }
+                        if let Err(e) = vcpu_handles[0].kill(SIGRTMIN() + 0) {
+                            warn!("PIC: failed to kick vCPU0: {}", e);
+                        }
+
+                        // When IOAPIC is configuring its redirection table, we should first
+                        // process its AddMsiRoute request, otherwise we would deadlock.
+                        if let Ok(mut ioapic) = ioapic.try_lock() {
+                            if gsi_relay.irqfd_resample[gsi].is_some() {
+                                ioapic.service_irq(gsi, true);
+                            } else {
+                                ioapic.service_irq(gsi, true);
+                                ioapic.service_irq(gsi, false);
+                            }
+                        } else {
+                            ioapic_delayed.push(gsi);
+                        }
+                    } else {
+                        panic!("split irqchip not found, should be impossible.");
+                    }
+                }
                 Token::LowMemory => {
                     if let Some(low_mem) = &low_mem {
                         let old_balloon_memory = current_balloon_memory;
@@ -2020,6 +2093,7 @@ fn run_control(
                 Token::Suspend => {}
                 Token::ChildSignal => {}
                 Token::CheckAvailableMemory => {}
+                Token::IrqFd { gsi: _ } => {}
                 Token::LowMemory => {}
                 Token::LowmemTimer => {}
                 Token::VmControlServer => {}
diff --git a/x86_64/src/lib.rs b/x86_64/src/lib.rs
index d9fe8a9..a912edd 100644
--- a/x86_64/src/lib.rs
+++ b/x86_64/src/lib.rs
@@ -55,6 +55,7 @@ use std::sync::Arc;
 
 use crate::bootparam::boot_params;
 use arch::{RunnableLinuxVm, VmComponents, VmImage};
+use devices::split_irqchip_common::GsiRelay;
 use devices::{
     get_serial_tty_string, Ioapic, PciConfigIo, PciDevice, PciInterruptPin, Pic, SerialParameters,
     IOAPIC_BASE_ADDRESS, IOAPIC_MEM_LENGTH_BYTES,
@@ -88,6 +89,7 @@ pub enum Error {
     CreateVcpu(sys_util::Error),
     CreateVm(sys_util::Error),
     E820Configuration,
+    EnableSplitIrqchip(sys_util::Error),
     KernelOffsetPastEnd,
     LoadBios(io::Error),
     LoadBzImage(bzimage::Error),
@@ -136,6 +138,7 @@ impl Display for Error {
             CreateVcpu(e) => write!(f, "failed to create VCPU: {}", e),
             CreateVm(e) => write!(f, "failed to create VM: {}", e),
             E820Configuration => write!(f, "invalid e820 setup params"),
+            EnableSplitIrqchip(e) => write!(f, "failed to enable split irqchip: {}", e),
             KernelOffsetPastEnd => write!(f, "the kernel extends past the end of RAM"),
             LoadBios(e) => write!(f, "error loading bios: {}", e),
             LoadBzImage(e) => write!(f, "error loading kernel bzImage: {}", e),
@@ -369,7 +372,8 @@ impl arch::LinuxArch for X8664arch {
 
         let exit_evt = EventFd::new().map_err(Error::CreateEventFd)?;
 
-        let split_irqchip = if split_irqchip {
+        let (split_irqchip, mut gsi_relay) = if split_irqchip {
+            let gsi_relay = GsiRelay::new();
             let pic = Arc::new(Mutex::new(Pic::new()));
             let ioapic = Arc::new(Mutex::new(
                 Ioapic::new(&mut vm, ioapic_device_socket).map_err(Error::CreateIoapicDevice)?,
@@ -382,15 +386,20 @@ impl arch::LinuxArch for X8664arch {
                     false,
                 )
                 .unwrap();
-            Some((pic, ioapic))
+            (Some((pic, ioapic)), Some(gsi_relay))
         } else {
-            None
+            (None, None)
         };
         let pci_devices = create_devices(&mem, &mut vm, &mut resources, &exit_evt)
             .map_err(|e| Error::CreateDevices(Box::new(e)))?;
-        let (pci, pci_irqs, pid_debug_label_map) =
-            arch::generate_pci_root(pci_devices, &mut mmio_bus, &mut resources, &mut vm)
-                .map_err(Error::CreatePciRoot)?;
+        let (pci, pci_irqs, pid_debug_label_map) = arch::generate_pci_root(
+            pci_devices,
+            &mut gsi_relay,
+            &mut mmio_bus,
+            &mut resources,
+            &mut vm,
+        )
+        .map_err(Error::CreatePciRoot)?;
         let pci_bus = Arc::new(Mutex::new(PciConfigIo::new(pci)));
 
         // Event used to notify crosvm that guest OS is trying to suspend.
@@ -400,15 +409,20 @@ impl arch::LinuxArch for X8664arch {
 
         let mut io_bus = Self::setup_io_bus(
             &mut vm,
-            split_irqchip.is_some(),
+            &mut gsi_relay,
             exit_evt.try_clone().map_err(Error::CloneEventFd)?,
             Some(pci_bus.clone()),
             components.memory_size,
             suspend_evt.try_clone().map_err(Error::CloneEventFd)?,
         )?;
 
-        let stdio_serial_num =
-            Self::setup_serial_devices(&mut vm, &mut io_bus, serial_parameters, serial_jail)?;
+        let stdio_serial_num = Self::setup_serial_devices(
+            &mut vm,
+            &mut io_bus,
+            &mut gsi_relay,
+            serial_parameters,
+            serial_jail,
+        )?;
 
         let ramoops_region = match components.pstore {
             Some(pstore) => Some(
@@ -418,7 +432,7 @@ impl arch::LinuxArch for X8664arch {
             None => None,
         };
 
-        if let Some((pic, _)) = &split_irqchip {
+        let gsi_relay = if let Some((pic, ioapic)) = &split_irqchip {
             io_bus.insert(pic.clone(), 0x20, 0x2, true).unwrap();
             io_bus.insert(pic.clone(), 0xa0, 0x2, true).unwrap();
             io_bus.insert(pic.clone(), 0x4d0, 0x2, true).unwrap();
@@ -427,7 +441,15 @@ impl arch::LinuxArch for X8664arch {
             while irq_num < kvm::NUM_IOAPIC_PINS as u32 {
                 irq_num = resources.allocate_irq().unwrap();
             }
-        }
+
+            // This will never fail because gsi_relay is Some iff split_irqchip is Some.
+            let gsi_relay = Arc::new(gsi_relay.unwrap());
+            pic.lock().register_relay(gsi_relay.clone());
+            ioapic.lock().register_relay(gsi_relay.clone());
+            Some(gsi_relay)
+        } else {
+            None
+        };
 
         match components.vm_image {
             VmImage::Bios(ref mut bios) => Self::load_bios(&mem, bios)?,
@@ -483,6 +505,7 @@ impl arch::LinuxArch for X8664arch {
             vcpu_affinity,
             irq_chip,
             split_irqchip,
+            gsi_relay,
             io_bus,
             mmio_bus,
             pid_debug_label_map,
@@ -638,6 +661,8 @@ impl X8664arch {
             vm.create_pit().map_err(Error::CreatePit)?;
             vm.create_irq_chip().map_err(Error::CreateIrqChip)?;
         } else {
+            vm.enable_split_irqchip()
+                .map_err(Error::EnableSplitIrqchip)?;
             for i in 0..kvm::NUM_IOAPIC_PINS {
                 // Add dummy MSI routes to replace the default IRQChip routes.
                 let route = IrqRoute {
@@ -719,13 +744,13 @@ impl X8664arch {
     /// # Arguments
     ///
     /// * - `vm` the vm object
-    /// * - `split_irqchip`: whether to use a split IRQ chip (i.e. userspace PIT/PIC/IOAPIC)
+    /// * - `gsi_relay`: only valid for split IRQ chip (i.e. userspace PIT/PIC/IOAPIC)
     /// * - `exit_evt` - the event fd object which should receive exit events
     /// * - `mem_size` - the size in bytes of physical ram for the guest
     /// * - `suspend_evt` - the event fd object which used to suspend the vm
     fn setup_io_bus(
-        vm: &mut Vm,
-        split_irqchip: bool,
+        _vm: &mut Vm,
+        gsi_relay: &mut Option<GsiRelay>,
         exit_evt: EventFd,
         pci: Option<Arc<Mutex<devices::PciConfigIo>>>,
         mem_size: u64,
@@ -758,7 +783,7 @@ impl X8664arch {
             exit_evt.try_clone().map_err(Error::CloneEventFd)?,
         )));
 
-        if split_irqchip {
+        if let Some(gsi_relay) = gsi_relay {
             let pit_evt = EventFd::new().map_err(Error::CreateEventFd)?;
             let pit = Arc::new(Mutex::new(
                 devices::Pit::new(
@@ -770,8 +795,7 @@ impl X8664arch {
             io_bus.insert(pit.clone(), 0x040, 0x8, true).unwrap();
             io_bus.insert(pit.clone(), 0x061, 0x1, true).unwrap();
             io_bus.insert(i8042, 0x062, 0x3, true).unwrap();
-            vm.register_irqfd(&pit_evt, 0)
-                .map_err(Error::RegisterIrqfd)?;
+            gsi_relay.register_irqfd(pit_evt, 0);
         } else {
             io_bus
                 .insert(nul_device.clone(), 0x040, 0x8, false)
@@ -816,10 +840,12 @@ impl X8664arch {
     ///
     /// * - `vm` the vm object
     /// * - `io_bus` the I/O bus to add the devices to
+    /// * - `gsi_relay`: only valid for split IRQ chip (i.e. userspace PIT/PIC/IOAPIC)
     /// * - `serial_parmaters` - definitions for how the serial devices should be configured
     fn setup_serial_devices(
         vm: &mut Vm,
         io_bus: &mut devices::Bus,
+        gsi_relay: &mut Option<GsiRelay>,
         serial_parameters: &BTreeMap<u8, SerialParameters>,
         serial_jail: Option<Minijail>,
     ) -> Result<Option<u8>> {
@@ -835,10 +861,15 @@ impl X8664arch {
         )
         .map_err(Error::CreateSerialDevices)?;
 
-        vm.register_irqfd(&com_evt_1_3, X86_64_SERIAL_1_3_IRQ)
-            .map_err(Error::RegisterIrqfd)?;
-        vm.register_irqfd(&com_evt_2_4, X86_64_SERIAL_2_4_IRQ)
-            .map_err(Error::RegisterIrqfd)?;
+        if let Some(gsi_relay) = gsi_relay {
+            gsi_relay.register_irqfd(com_evt_1_3, X86_64_SERIAL_1_3_IRQ as usize);
+            gsi_relay.register_irqfd(com_evt_2_4, X86_64_SERIAL_2_4_IRQ as usize);
+        } else {
+            vm.register_irqfd(&com_evt_1_3, X86_64_SERIAL_1_3_IRQ)
+                .map_err(Error::RegisterIrqfd)?;
+            vm.register_irqfd(&com_evt_2_4, X86_64_SERIAL_2_4_IRQ)
+                .map_err(Error::RegisterIrqfd)?;
+        }
 
         Ok(stdio_serial_num)
     }