summary refs log tree commit diff
diff options
context:
space:
mode:
authorXiong Zhang <xiong.y.zhang@intel.corp-partner.google.com>2019-04-23 17:14:43 +0800
committerCommit Bot <commit-bot@chromium.org>2019-09-18 17:23:59 +0000
commit9628f61e7125bb6ee2ae921756af185d83a9c2aa (patch)
treeb1c859fe8c635dd75ca78ed883da5c9dc9d6a79a
parent36cc5092fc6237ddbd1e63b8d2bc7c1460ea3041 (diff)
downloadcrosvm-9628f61e7125bb6ee2ae921756af185d83a9c2aa.tar
crosvm-9628f61e7125bb6ee2ae921756af185d83a9c2aa.tar.gz
crosvm-9628f61e7125bb6ee2ae921756af185d83a9c2aa.tar.bz2
crosvm-9628f61e7125bb6ee2ae921756af185d83a9c2aa.tar.lz
crosvm-9628f61e7125bb6ee2ae921756af185d83a9c2aa.tar.xz
crosvm-9628f61e7125bb6ee2ae921756af185d83a9c2aa.tar.zst
crosvm-9628f61e7125bb6ee2ae921756af185d83a9c2aa.zip
vfio: Implement PciDevice Trait for vfio device
According to kernel Documents/vfio.txt and
Documents/vfio-mediated-device.txt,user pass host assigned
device or mdev to crosvm through --vfio parameter, vfio module
open this device and get this device's information.

Implement PciDevice trait on this device, then vfio_pci
module could trap guest pci cfg r/w and mmio r/w,
and transfer this operation into kernel vfio.

Currently the relationship of vfio container:group:device are
1:1:1, in the future it could extend to 1:m:n.

BUG=chromium:992270
TEST=none

Change-Id: I8006ef65022d56197eaeb464811a59db2ce54b9a
Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.corp-partner.google.com>
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/1580458
Reviewed-by: Zach Reizner <zachr@chromium.org>
Reviewed-by: Daniel Verkamp <dverkamp@chromium.org>
Tested-by: kokoro <noreply+kokoro@google.com>
Commit-Queue: Daniel Verkamp <dverkamp@chromium.org>
-rw-r--r--Cargo.lock9
-rw-r--r--devices/Cargo.toml2
-rw-r--r--devices/src/lib.rs3
-rw-r--r--devices/src/pci/mod.rs2
-rw-r--r--devices/src/pci/vfio_pci.rs290
-rw-r--r--devices/src/vfio.rs377
6 files changed, 683 insertions, 0 deletions
diff --git a/Cargo.lock b/Cargo.lock
index b4c5c6a..968416a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -145,6 +145,7 @@ dependencies = [
  "gpu_renderer 0.1.0",
  "io_jail 0.1.0",
  "kvm 0.1.0",
+ "kvm_sys 0.1.0",
  "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",
  "msg_on_socket_derive 0.1.0",
  "msg_socket 0.1.0",
@@ -159,6 +160,7 @@ dependencies = [
  "tempfile 3.0.7",
  "tpm2 0.1.0",
  "usb_util 0.1.0",
+ "vfio_sys 0.1.0",
  "vhost 0.1.0",
  "virtio_sys 0.1.0",
  "vm_control 0.1.0",
@@ -528,6 +530,13 @@ dependencies = [
 ]
 
 [[package]]
+name = "vfio_sys"
+version = "0.1.0"
+dependencies = [
+ "sys_util 0.1.0",
+]
+
+[[package]]
 name = "vhost"
 version = "0.1.0"
 dependencies = [
diff --git a/devices/Cargo.toml b/devices/Cargo.toml
index 8a91a4e..2ddfcd0 100644
--- a/devices/Cargo.toml
+++ b/devices/Cargo.toml
@@ -22,6 +22,7 @@ gpu_display = { path = "../gpu_display", optional = true }
 gpu_renderer = { path = "../gpu_renderer", optional = true }
 io_jail = { path = "../io_jail" }
 kvm = { path = "../kvm" }
+kvm_sys = { path = "../kvm_sys" }
 libc = "*"
 msg_on_socket_derive = { path = "../msg_socket/msg_on_socket_derive" }
 msg_socket = { path = "../msg_socket" }
@@ -35,6 +36,7 @@ sync = { path = "../sync" }
 sys_util = { path = "../sys_util" }
 tpm2 = { path = "../tpm2", optional = true }
 usb_util = { path = "../usb_util" }
+vfio_sys = { path = "../vfio_sys" }
 vhost = { path = "../vhost" }
 virtio_sys = { path = "../virtio_sys" }
 vm_control = { path = "../vm_control" }
diff --git a/devices/src/lib.rs b/devices/src/lib.rs
index bc9c8c1..512d08b 100644
--- a/devices/src/lib.rs
+++ b/devices/src/lib.rs
@@ -19,6 +19,7 @@ mod serial;
 pub mod split_irqchip_common;
 pub mod usb;
 mod utils;
+pub mod vfio;
 pub mod virtio;
 
 pub use self::bus::Error as BusError;
@@ -28,6 +29,7 @@ pub use self::i8042::I8042Device;
 pub use self::ioapic::Ioapic;
 pub use self::pci::{
     Ac97Dev, PciConfigIo, PciConfigMmio, PciDevice, PciDeviceError, PciInterruptPin, PciRoot,
+    VfioPciDevice,
 };
 pub use self::pic::Pic;
 pub use self::pit::{Pit, PitError};
@@ -40,4 +42,5 @@ pub use self::serial::{
 };
 pub use self::usb::host_backend::host_backend_device_provider::HostBackendDeviceProvider;
 pub use self::usb::xhci::xhci_controller::XhciController;
+pub use self::vfio::VfioDevice;
 pub use self::virtio::VirtioPciDevice;
diff --git a/devices/src/pci/mod.rs b/devices/src/pci/mod.rs
index 791161a..5d44dfc 100644
--- a/devices/src/pci/mod.rs
+++ b/devices/src/pci/mod.rs
@@ -11,6 +11,7 @@ mod ac97_regs;
 mod pci_configuration;
 mod pci_device;
 mod pci_root;
+mod vfio_pci;
 
 pub use self::ac97::Ac97Dev;
 pub use self::pci_configuration::{
@@ -21,6 +22,7 @@ pub use self::pci_configuration::{
 pub use self::pci_device::Error as PciDeviceError;
 pub use self::pci_device::PciDevice;
 pub use self::pci_root::{PciConfigIo, PciConfigMmio, PciRoot};
+pub use self::vfio_pci::VfioPciDevice;
 
 /// PCI has four interrupt pins A->D.
 #[derive(Copy, Clone)]
diff --git a/devices/src/pci/vfio_pci.rs b/devices/src/pci/vfio_pci.rs
new file mode 100644
index 0000000..b5c5152
--- /dev/null
+++ b/devices/src/pci/vfio_pci.rs
@@ -0,0 +1,290 @@
+// Copyright 2019 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use std::os::unix::io::RawFd;
+use std::sync::Arc;
+use std::u32;
+
+use kvm::Datamatch;
+use resources::{Alloc, SystemAllocator};
+use sys_util::EventFd;
+
+use vfio_sys::*;
+
+use crate::pci::pci_device::{Error as PciDeviceError, PciDevice};
+use crate::pci::PciInterruptPin;
+
+use crate::vfio::VfioDevice;
+
+struct VfioPciConfig {
+    device: Arc<VfioDevice>,
+}
+
+impl VfioPciConfig {
+    fn new(device: Arc<VfioDevice>) -> Self {
+        VfioPciConfig { device }
+    }
+
+    #[allow(dead_code)]
+    fn read_config_byte(&self, offset: u32) -> u8 {
+        let mut data: [u8; 1] = [0];
+        self.device
+            .region_read(VFIO_PCI_CONFIG_REGION_INDEX, data.as_mut(), offset.into());
+
+        data[0]
+    }
+
+    #[allow(dead_code)]
+    fn read_config_word(&self, offset: u32) -> u16 {
+        let mut data: [u8; 2] = [0, 0];
+        self.device
+            .region_read(VFIO_PCI_CONFIG_REGION_INDEX, data.as_mut(), offset.into());
+
+        u16::from_le_bytes(data)
+    }
+
+    #[allow(dead_code)]
+    fn read_config_dword(&self, offset: u32) -> u32 {
+        let mut data: [u8; 4] = [0, 0, 0, 0];
+        self.device
+            .region_read(VFIO_PCI_CONFIG_REGION_INDEX, data.as_mut(), offset.into());
+
+        u32::from_le_bytes(data)
+    }
+
+    #[allow(dead_code)]
+    fn write_config_byte(&self, buf: u8, offset: u32) {
+        self.device.region_write(
+            VFIO_PCI_CONFIG_REGION_INDEX,
+            ::std::slice::from_ref(&buf),
+            offset.into(),
+        )
+    }
+
+    #[allow(dead_code)]
+    fn write_config_word(&self, buf: u16, offset: u32) {
+        let data: [u8; 2] = buf.to_le_bytes();
+        self.device
+            .region_write(VFIO_PCI_CONFIG_REGION_INDEX, &data, offset.into())
+    }
+
+    #[allow(dead_code)]
+    fn write_config_dword(&self, buf: u32, offset: u32) {
+        let data: [u8; 4] = buf.to_le_bytes();
+        self.device
+            .region_write(VFIO_PCI_CONFIG_REGION_INDEX, &data, offset.into())
+    }
+}
+
+struct MmioInfo {
+    bar_index: u32,
+    start: u64,
+    length: u64,
+}
+
+struct IoInfo {
+    bar_index: u32,
+}
+
+/// Implements the Vfio Pci device, then a pci device is added into vm
+pub struct VfioPciDevice {
+    device: Arc<VfioDevice>,
+    config: VfioPciConfig,
+    pci_bus_dev: Option<(u8, u8)>,
+    interrupt_evt: Option<EventFd>,
+    interrupt_resample_evt: Option<EventFd>,
+    mmio_regions: Vec<MmioInfo>,
+    io_regions: Vec<IoInfo>,
+}
+
+impl VfioPciDevice {
+    /// Constructs a new Vfio Pci device for the give Vfio device
+    pub fn new(device: Box<VfioDevice>) -> Self {
+        let dev = Arc::new(*device);
+        let config = VfioPciConfig::new(Arc::clone(&dev));
+        VfioPciDevice {
+            device: dev,
+            config,
+            pci_bus_dev: None,
+            interrupt_evt: None,
+            interrupt_resample_evt: None,
+            mmio_regions: Vec::new(),
+            io_regions: Vec::new(),
+        }
+    }
+
+    fn find_region(&self, addr: u64) -> Option<MmioInfo> {
+        for mmio_info in self.mmio_regions.iter() {
+            if addr >= mmio_info.start && addr < mmio_info.start + mmio_info.length {
+                return Some(MmioInfo {
+                    bar_index: mmio_info.bar_index,
+                    start: mmio_info.start,
+                    length: mmio_info.length,
+                });
+            }
+        }
+
+        None
+    }
+}
+
+impl PciDevice for VfioPciDevice {
+    fn debug_label(&self) -> String {
+        format!("vfio pci device")
+    }
+
+    fn assign_bus_dev(&mut self, bus: u8, device: u8) {
+        self.pci_bus_dev = Some((bus, device));
+    }
+
+    fn keep_fds(&self) -> Vec<RawFd> {
+        let fds = Vec::new();
+        fds
+    }
+
+    fn assign_irq(
+        &mut self,
+        irq_evt: EventFd,
+        irq_resample_evt: EventFd,
+        irq_num: u32,
+        irq_pin: PciInterruptPin,
+    ) {
+        self.config.write_config_byte(irq_num as u8, 0x3C);
+        self.config.write_config_byte(irq_pin as u8 + 1, 0x3D);
+        self.interrupt_evt = Some(irq_evt);
+        self.interrupt_resample_evt = Some(irq_resample_evt);
+    }
+
+    fn allocate_io_bars(
+        &mut self,
+        resources: &mut SystemAllocator,
+    ) -> Result<Vec<(u64, u64)>, PciDeviceError> {
+        let mut ranges = Vec::new();
+        let mut i = VFIO_PCI_BAR0_REGION_INDEX;
+        let (bus, dev) = self
+            .pci_bus_dev
+            .expect("assign_bus_dev must be called prior to allocate_io_bars");
+
+        while i <= VFIO_PCI_ROM_REGION_INDEX {
+            let mut low: u32 = 0xffffffff;
+            let offset: u32;
+            if i == VFIO_PCI_ROM_REGION_INDEX {
+                offset = 0x30;
+            } else {
+                offset = 0x10 + i * 4;
+            }
+            self.config.write_config_dword(low, offset);
+            low = self.config.read_config_dword(offset);
+
+            let low_flag = low & 0xf;
+            let is_64bit = match low_flag & 0x4 {
+                0x4 => true,
+                _ => false,
+            };
+            if (low_flag & 0x1 == 0 || i == VFIO_PCI_ROM_REGION_INDEX) && low != 0 {
+                let mut upper: u32 = 0xffffffff;
+                if is_64bit {
+                    self.config.write_config_dword(upper, offset + 4);
+                    upper = self.config.read_config_dword(offset + 4);
+                }
+
+                low &= 0xffff_fff0;
+                let mut size: u64 = u64::from(upper);
+                size <<= 32;
+                size |= u64::from(low);
+                size = !size + 1;
+                let bar_addr = resources
+                    .mmio_allocator()
+                    .allocate_with_align(
+                        size,
+                        Alloc::PciBar {
+                            bus,
+                            dev,
+                            bar: i as u8,
+                        },
+                        "vfio_bar".to_string(),
+                        size,
+                    )
+                    .map_err(|e| PciDeviceError::IoAllocationFailed(size, e))?;
+                ranges.push((bar_addr, size));
+                self.mmio_regions.push(MmioInfo {
+                    bar_index: i,
+                    start: bar_addr,
+                    length: size,
+                });
+
+                low = bar_addr as u32;
+                low |= low_flag;
+                self.config.write_config_dword(low, offset);
+                if is_64bit {
+                    upper = (bar_addr >> 32) as u32;
+                    self.config.write_config_dword(upper, offset + 4);
+                }
+            } else if low_flag & 0x1 == 0x1 {
+                self.io_regions.push(IoInfo { bar_index: i });
+            }
+
+            if is_64bit {
+                i += 2;
+            } else {
+                i += 1;
+            }
+        }
+        Ok(ranges)
+    }
+
+    fn allocate_device_bars(
+        &mut self,
+        _resources: &mut SystemAllocator,
+    ) -> Result<Vec<(u64, u64)>, PciDeviceError> {
+        Ok(Vec::new())
+    }
+
+    fn register_device_capabilities(&mut self) -> Result<(), PciDeviceError> {
+        Ok(())
+    }
+
+    fn ioeventfds(&self) -> Vec<(&EventFd, u64, Datamatch)> {
+        Vec::new()
+    }
+
+    fn read_config_register(&self, reg_idx: usize) -> u32 {
+        let reg: u32 = (reg_idx * 4) as u32;
+
+        let mut config = self.config.read_config_dword(reg);
+
+        // Ignore IO bar
+        if reg >= 0x10 && reg <= 0x24 {
+            for io_info in self.io_regions.iter() {
+                if io_info.bar_index * 4 + 0x10 == reg {
+                    config = 0;
+                }
+            }
+        }
+
+        config
+    }
+
+    fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) {
+        self.device.region_write(
+            VFIO_PCI_CONFIG_REGION_INDEX,
+            data,
+            (reg_idx * 4) as u64 + offset,
+        )
+    }
+
+    fn read_bar(&mut self, addr: u64, data: &mut [u8]) {
+        if let Some(mmio_info) = self.find_region(addr) {
+            let offset = addr - mmio_info.start;
+            self.device.region_read(mmio_info.bar_index, data, offset);
+        }
+    }
+
+    fn write_bar(&mut self, addr: u64, data: &[u8]) {
+        if let Some(mmio_info) = self.find_region(addr) {
+            let offset = addr - mmio_info.start;
+            self.device.region_write(mmio_info.bar_index, data, offset);
+        }
+    }
+}
diff --git a/devices/src/vfio.rs b/devices/src/vfio.rs
new file mode 100644
index 0000000..b08414a
--- /dev/null
+++ b/devices/src/vfio.rs
@@ -0,0 +1,377 @@
+// Copyright 2019 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use std::ffi::CString;
+use std::fmt;
+use std::fs::{File, OpenOptions};
+use std::io;
+use std::mem;
+use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
+use std::os::unix::prelude::FileExt;
+use std::path::{Path, PathBuf};
+use std::u32;
+
+use kvm::Vm;
+use sys_util::{
+    ioctl, ioctl_with_mut_ref, ioctl_with_ptr, ioctl_with_ref, ioctl_with_val, warn, Error,
+};
+
+use vfio_sys::*;
+
+#[derive(Debug)]
+pub enum VfioError {
+    OpenContainer(io::Error),
+    OpenGroup(io::Error),
+    GetGroupStatus(Error),
+    GroupViable,
+    VfioApiVersion,
+    VfioType1V2,
+    GroupSetContainer(Error),
+    ContainerSetIOMMU(Error),
+    GroupGetDeviceFD(Error),
+    CreateVfioKvmDevice(Error),
+    KvmSetDeviceAttr(Error),
+    VfioDeviceGetInfo(Error),
+    VfioDeviceGetRegionInfo(Error),
+    InvalidPath,
+}
+
+impl fmt::Display for VfioError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            VfioError::OpenContainer(e) => write!(f, "failed to open /dev/vfio/vfio container: {}", e),
+            VfioError::OpenGroup(e) => write!(f, "failed to open /dev/vfio/$group_num group: {}", e),
+            VfioError::GetGroupStatus(e) => write!(f, "failed to get Group Status: {}", e),
+            VfioError::GroupViable => write!(f, "group is inviable"),
+            VfioError::VfioApiVersion => write!(f, "vfio API version doesn't match with VFIO_API_VERSION defined in vfio_sys/srv/vfio.rs"),
+            VfioError::VfioType1V2 => write!(f, "container dones't support VfioType1V2 IOMMU driver type"),
+            VfioError::GroupSetContainer(e) => write!(f, "failed to add vfio group into vfio container: {}", e),
+            VfioError::ContainerSetIOMMU(e) => write!(f, "failed to set container's IOMMU driver type as VfioType1V2: {}", e),
+            VfioError::GroupGetDeviceFD(e) => write!(f, "failed to get vfio device fd: {}", e),
+            VfioError::CreateVfioKvmDevice(e) => write!(f, "failed to create KVM vfio device: {}", e),
+            VfioError::KvmSetDeviceAttr(e) => write!(f, "failed to set KVM vfio device's attribute: {}", e),
+            VfioError::VfioDeviceGetInfo(e) => write!(f, "failed to get vfio device's info or info doesn't match: {}", e),
+            VfioError::VfioDeviceGetRegionInfo(e) => write!(f, "failed to get vfio device's region info: {}", e),
+            VfioError::InvalidPath => write!(f,"invalid file path"),
+        }
+    }
+}
+
+fn get_error() -> Error {
+    Error::last()
+}
+
+struct VfioContainer {
+    container: File,
+}
+
+const VFIO_API_VERSION: u8 = 0;
+impl VfioContainer {
+    fn new() -> Result<Self, VfioError> {
+        let container = OpenOptions::new()
+            .read(true)
+            .write(true)
+            .open("/dev/vfio/vfio")
+            .map_err(|e| VfioError::OpenContainer(e))?;
+
+        Ok(VfioContainer { container })
+    }
+
+    fn get_api_version(&self) -> i32 {
+        // Safe as file is vfio container fd and ioctl is defined by kernel.
+        unsafe { ioctl(self, VFIO_GET_API_VERSION()) }
+    }
+
+    fn check_extension(&self, val: u32) -> bool {
+        if val != VFIO_TYPE1_IOMMU && val != VFIO_TYPE1v2_IOMMU {
+            panic!("IOMMU type error");
+        }
+
+        // Safe as file is vfio container and make sure val is valid.
+        let ret = unsafe { ioctl_with_val(self, VFIO_CHECK_EXTENSION(), val.into()) };
+        ret == 1
+    }
+
+    fn set_iommu(&self, val: u32) -> i32 {
+        if val != VFIO_TYPE1_IOMMU && val != VFIO_TYPE1v2_IOMMU {
+            panic!("IOMMU type error");
+        }
+
+        // Safe as file is vfio container and make sure val is valid.
+        unsafe { ioctl_with_val(self, VFIO_SET_IOMMU(), val.into()) }
+    }
+}
+
+impl AsRawFd for VfioContainer {
+    fn as_raw_fd(&self) -> RawFd {
+        self.container.as_raw_fd()
+    }
+}
+
+struct VfioGroup {
+    group: File,
+}
+
+impl VfioGroup {
+    fn new(id: u32, vm: &Vm) -> Result<Self, VfioError> {
+        let mut group_path = String::from("/dev/vfio/");
+        let s_id = &id;
+        group_path.push_str(s_id.to_string().as_str());
+
+        let group_file = OpenOptions::new()
+            .read(true)
+            .write(true)
+            .open(Path::new(&group_path))
+            .map_err(|e| VfioError::OpenGroup(e))?;
+
+        let mut group_status = vfio_group_status {
+            argsz: mem::size_of::<vfio_group_status>() as u32,
+            flags: 0,
+        };
+        // Safe as we are the owner of group_file and group_status which are valid value.
+        let mut ret =
+            unsafe { ioctl_with_mut_ref(&group_file, VFIO_GROUP_GET_STATUS(), &mut group_status) };
+        if ret < 0 {
+            return Err(VfioError::GetGroupStatus(get_error()));
+        }
+
+        if group_status.flags != VFIO_GROUP_FLAGS_VIABLE {
+            return Err(VfioError::GroupViable);
+        }
+
+        let container = VfioContainer::new()?;
+        if container.get_api_version() as u8 != VFIO_API_VERSION {
+            return Err(VfioError::VfioApiVersion);
+        }
+        if !container.check_extension(VFIO_TYPE1v2_IOMMU) {
+            return Err(VfioError::VfioType1V2);
+        }
+
+        // Safe as we are the owner of group_file and container_raw_fd which are valid value,
+        // and we verify the ret value
+        let container_raw_fd = container.as_raw_fd();
+        ret = unsafe { ioctl_with_ref(&group_file, VFIO_GROUP_SET_CONTAINER(), &container_raw_fd) };
+        if ret < 0 {
+            return Err(VfioError::GroupSetContainer(get_error()));
+        }
+
+        ret = container.set_iommu(VFIO_TYPE1v2_IOMMU);
+        if ret < 0 {
+            return Err(VfioError::ContainerSetIOMMU(get_error()));
+        }
+
+        Self::kvm_device_add_group(vm, &group_file)?;
+
+        Ok(VfioGroup { group: group_file })
+    }
+
+    fn kvm_device_add_group(vm: &Vm, group: &File) -> Result<File, VfioError> {
+        let mut vfio_dev = kvm_sys::kvm_create_device {
+            type_: kvm_sys::kvm_device_type_KVM_DEV_TYPE_VFIO,
+            fd: 0,
+            flags: 0,
+        };
+        vm.create_device(&mut vfio_dev)
+            .map_err(|e| VfioError::CreateVfioKvmDevice(e))?;
+
+        // Safe as we are the owner of vfio_dev.fd which is valid value.
+        let vfio_dev_fd = unsafe { File::from_raw_fd(vfio_dev.fd as i32) };
+
+        let group_fd = group.as_raw_fd();
+        let group_fd_ptr = &group_fd as *const i32;
+        let vfio_dev_attr = kvm_sys::kvm_device_attr {
+            flags: 0,
+            group: kvm_sys::KVM_DEV_VFIO_GROUP,
+            attr: kvm_sys::KVM_DEV_VFIO_GROUP_ADD as u64,
+            addr: group_fd_ptr as u64,
+        };
+
+        // Safe as we are the owner of vfio_dev_fd and vfio_dev_attr which are valid value,
+        // and we verify the return value.
+        if 0 != unsafe {
+            ioctl_with_ref(&vfio_dev_fd, kvm_sys::KVM_SET_DEVICE_ATTR(), &vfio_dev_attr)
+        } {
+            return Err(VfioError::KvmSetDeviceAttr(get_error()));
+        }
+
+        Ok(vfio_dev_fd)
+    }
+
+    fn get_device(&self, name: &Path) -> Result<File, VfioError> {
+        let uuid_osstr = name.file_name().ok_or(VfioError::InvalidPath)?;
+        let uuid_str = uuid_osstr.to_str().ok_or(VfioError::InvalidPath)?;
+        let path: CString = CString::new(uuid_str.as_bytes()).expect("CString::new() failed");
+        let path_ptr = path.as_ptr();
+
+        // Safe as we are the owner of self and path_ptr which are valid value.
+        let ret = unsafe { ioctl_with_ptr(self, VFIO_GROUP_GET_DEVICE_FD(), path_ptr) };
+        if ret < 0 {
+            return Err(VfioError::GroupGetDeviceFD(get_error()));
+        }
+
+        // Safe as ret is valid FD
+        Ok(unsafe { File::from_raw_fd(ret) })
+    }
+}
+
+impl AsRawFd for VfioGroup {
+    fn as_raw_fd(&self) -> RawFd {
+        self.group.as_raw_fd()
+    }
+}
+
+struct VfioRegion {
+    flags: u32,
+    size: u64,
+    offset: u64,
+}
+
+/// Vfio device for exposing regions which could be read/write to kernel vfio device.
+pub struct VfioDevice {
+    dev: File,
+    regions: Vec<VfioRegion>,
+}
+
+impl VfioDevice {
+    /// Create a new vfio device, then guest read/write on this device could be
+    /// transfered into kernel vfio.
+    /// sysfspath specify the vfio device path in sys file system.
+    pub fn new(sysfspath: &Path, vm: &Vm) -> Result<Self, VfioError> {
+        let mut uuid_path = PathBuf::new();
+        uuid_path.push(sysfspath);
+        uuid_path.push("iommu_group");
+        let group_path = uuid_path.read_link().map_err(|_| VfioError::InvalidPath)?;
+        let group_osstr = group_path.file_name().ok_or(VfioError::InvalidPath)?;
+        let group_str = group_osstr.to_str().ok_or(VfioError::InvalidPath)?;
+        let group_id = group_str
+            .parse::<u32>()
+            .map_err(|_| VfioError::InvalidPath)?;
+
+        let group = VfioGroup::new(group_id, vm)?;
+        let new_dev = group.get_device(sysfspath)?;
+        let dev_regions = Self::get_regions(&new_dev)?;
+
+        Ok(VfioDevice {
+            dev: new_dev,
+            regions: dev_regions,
+        })
+    }
+
+    fn get_regions(dev: &File) -> Result<Vec<VfioRegion>, VfioError> {
+        let mut regions: Vec<VfioRegion> = Vec::new();
+        let mut dev_info = vfio_device_info {
+            argsz: mem::size_of::<vfio_device_info>() as u32,
+            flags: 0,
+            num_regions: 0,
+            num_irqs: 0,
+        };
+        // Safe as we are the owner of dev and dev_info which are valid value,
+        // and we verify the return value.
+        let mut ret = unsafe { ioctl_with_mut_ref(dev, VFIO_DEVICE_GET_INFO(), &mut dev_info) };
+        if ret < 0
+            || (dev_info.flags & VFIO_DEVICE_FLAGS_PCI) == 0
+            || dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1
+            || dev_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1
+        {
+            return Err(VfioError::VfioDeviceGetInfo(get_error()));
+        }
+
+        for i in VFIO_PCI_BAR0_REGION_INDEX..dev_info.num_regions {
+            let mut reg_info = vfio_region_info {
+                argsz: mem::size_of::<vfio_region_info>() as u32,
+                flags: 0,
+                index: i,
+                cap_offset: 0,
+                size: 0,
+                offset: 0,
+            };
+            // Safe as we are the owner of dev and reg_info which are valid value,
+            // and we verify the return value.
+            ret = unsafe { ioctl_with_mut_ref(dev, VFIO_DEVICE_GET_REGION_INFO(), &mut reg_info) };
+            if ret < 0 {
+                return Err(VfioError::VfioDeviceGetRegionInfo(get_error()));
+            }
+
+            let region = VfioRegion {
+                flags: reg_info.flags,
+                size: reg_info.size,
+                offset: reg_info.offset,
+            };
+            regions.push(region);
+        }
+
+        Ok(regions)
+    }
+
+    /// Read region's data from VFIO device into buf
+    /// index: region num
+    /// buf: data destination and buf length is read size
+    /// addr: offset in the region
+    pub fn region_read(&self, index: u32, buf: &mut [u8], addr: u64) {
+        let stub: &VfioRegion;
+        match self.regions.get(index as usize) {
+            Some(v) => stub = v,
+            None => {
+                warn!("region read with invalid index: {}", index);
+                return;
+            }
+        }
+
+        let size = buf.len() as u64;
+        if size > stub.size || addr + size > stub.size {
+            warn!(
+                "region read with invalid parameter, index: {}, add: {:x}, size: {:x}",
+                index, addr, size
+            );
+            return;
+        }
+
+        if let Err(e) = self.dev.read_exact_at(buf, stub.offset + addr) {
+            warn!(
+                "Failed to read region in index: {}, addr: {:x}, error: {}",
+                index, addr, e
+            );
+        }
+    }
+
+    /// write the data from buf into a vfio device region
+    /// index: region num
+    /// buf: data src and buf length is write size
+    /// addr: offset in the region
+    pub fn region_write(&self, index: u32, buf: &[u8], addr: u64) {
+        let stub: &VfioRegion;
+        match self.regions.get(index as usize) {
+            Some(v) => stub = v,
+            None => {
+                warn!("region write with invalid index: {}", index);
+                return;
+            }
+        }
+
+        let size = buf.len() as u64;
+        if size > stub.size
+            || addr + size > stub.size
+            || (stub.flags & VFIO_REGION_INFO_FLAG_WRITE) == 0
+        {
+            warn!(
+                "region write with invalid parameter,indxe: {}, add: {:x}, size: {:x}",
+                index, addr, size
+            );
+            return;
+        }
+
+        if let Err(e) = self.dev.write_all_at(buf, stub.offset + addr) {
+            warn!(
+                "Failed to write region in index: {}, addr: {:x}, error: {}",
+                index, addr, e
+            );
+        }
+    }
+}
+
+impl AsRawFd for VfioDevice {
+    fn as_raw_fd(&self) -> RawFd {
+        self.dev.as_raw_fd()
+    }
+}