summary refs log tree commit diff
diff options
context:
space:
mode:
authorDaniel Verkamp <dverkamp@chromium.org>2020-02-07 11:00:55 -0800
committerCommit Bot <commit-bot@chromium.org>2020-04-08 06:09:25 +0000
commite1980a9c360b04705a16434bdaf1a56161dafb56 (patch)
tree95944d7bfa87505050c2716fd764ffd1699d737e
parentf3081b120e0934539f6f3f2c60c9ff26c801c0ea (diff)
downloadcrosvm-e1980a9c360b04705a16434bdaf1a56161dafb56.tar
crosvm-e1980a9c360b04705a16434bdaf1a56161dafb56.tar.gz
crosvm-e1980a9c360b04705a16434bdaf1a56161dafb56.tar.bz2
crosvm-e1980a9c360b04705a16434bdaf1a56161dafb56.tar.lz
crosvm-e1980a9c360b04705a16434bdaf1a56161dafb56.tar.xz
crosvm-e1980a9c360b04705a16434bdaf1a56161dafb56.tar.zst
crosvm-e1980a9c360b04705a16434bdaf1a56161dafb56.zip
devices: pmem: implement flush using msync()
Previously, writable pmem devices implemented the flush command using
fsync(); however, this does not guarantee synchronization of memory
mappings via mmap() to the file on disk.  What we actually need is
msync() on the pmem file mapping, but we don't have access to that
mapping in the pmem child process, and it isn't trivial to pass it along
since it is owned by the Vm object once it has been added as a
mmap_arena.

In order to call msync() on the mapping, add a new VmControl socket so
that the pmem device can request that the main process issues an msync()
on the MemoryMappingArena identified by its slot number.

BUG=chromium:1007535
TEST=mount filesystem on /dev/pmem0 and sync; verify msync in strace

Change-Id: Id0484757c422cf81d454fd54012a12dbcc1baaf6
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2044365
Reviewed-by: Stephen Barber <smbarber@chromium.org>
Tested-by: kokoro <noreply+kokoro@google.com>
Commit-Queue: Daniel Verkamp <dverkamp@chromium.org>
-rw-r--r--devices/src/virtio/pmem.rs60
-rw-r--r--src/linux.rs67
-rw-r--r--vm_control/src/lib.rs47
3 files changed, 152 insertions, 22 deletions
diff --git a/devices/src/virtio/pmem.rs b/devices/src/virtio/pmem.rs
index 931b037..499e110 100644
--- a/devices/src/virtio/pmem.rs
+++ b/devices/src/virtio/pmem.rs
@@ -13,6 +13,10 @@ use sys_util::{error, EventFd, GuestAddress, GuestMemory, PollContext, PollToken
 
 use data_model::{DataInit, Le32, Le64};
 
+use msg_socket::{MsgReceiver, MsgSender};
+
+use vm_control::{VmMsyncRequest, VmMsyncRequestSocket, VmMsyncResponse};
+
 use super::{
     copy_config, DescriptorChain, DescriptorError, Interrupt, Queue, Reader, VirtioDevice, Writer,
     TYPE_PMEM, VIRTIO_F_VERSION_1,
@@ -83,19 +87,38 @@ struct Worker {
     interrupt: Interrupt,
     queue: Queue,
     memory: GuestMemory,
-    disk_image: File,
+    pmem_device_socket: VmMsyncRequestSocket,
+    mapping_arena_slot: u32,
 }
 
 impl Worker {
     fn execute_request(&self, request: virtio_pmem_req) -> u32 {
         match request.type_.to_native() {
-            VIRTIO_PMEM_REQ_TYPE_FLUSH => match self.disk_image.sync_all() {
-                Ok(()) => VIRTIO_PMEM_RESP_TYPE_OK,
-                Err(e) => {
-                    error!("failed flushing disk image: {}", e);
-                    VIRTIO_PMEM_RESP_TYPE_EIO
+            VIRTIO_PMEM_REQ_TYPE_FLUSH => {
+                let request = VmMsyncRequest::MsyncArena {
+                    slot: self.mapping_arena_slot,
+                    offset: 0, // The pmem backing file is always at offset 0 in the arena.
+                };
+
+                if let Err(e) = self.pmem_device_socket.send(&request) {
+                    error!("failed to send request: {}", e);
+                    return VIRTIO_PMEM_RESP_TYPE_EIO;
+                }
+
+                match self.pmem_device_socket.recv() {
+                    Ok(response) => match response {
+                        VmMsyncResponse::Ok => VIRTIO_PMEM_RESP_TYPE_OK,
+                        VmMsyncResponse::Err(e) => {
+                            error!("failed flushing disk image: {}", e);
+                            VIRTIO_PMEM_RESP_TYPE_EIO
+                        }
+                    },
+                    Err(e) => {
+                        error!("failed to receive data: {}", e);
+                        VIRTIO_PMEM_RESP_TYPE_EIO
+                    }
                 }
-            },
+            }
             _ => {
                 error!("unknown request type: {}", request.type_.to_native());
                 VIRTIO_PMEM_RESP_TYPE_EIO
@@ -199,21 +222,27 @@ pub struct Pmem {
     worker_thread: Option<thread::JoinHandle<()>>,
     disk_image: Option<File>,
     mapping_address: GuestAddress,
+    mapping_arena_slot: u32,
     mapping_size: u64,
+    pmem_device_socket: Option<VmMsyncRequestSocket>,
 }
 
 impl Pmem {
     pub fn new(
         disk_image: File,
         mapping_address: GuestAddress,
+        mapping_arena_slot: u32,
         mapping_size: u64,
+        pmem_device_socket: Option<VmMsyncRequestSocket>,
     ) -> SysResult<Pmem> {
         Ok(Pmem {
             kill_event: None,
             worker_thread: None,
             disk_image: Some(disk_image),
             mapping_address,
+            mapping_arena_slot,
             mapping_size,
+            pmem_device_socket,
         })
     }
 }
@@ -233,11 +262,15 @@ impl Drop for Pmem {
 
 impl VirtioDevice for Pmem {
     fn keep_fds(&self) -> Vec<RawFd> {
+        let mut keep_fds = Vec::new();
         if let Some(disk_image) = &self.disk_image {
-            vec![disk_image.as_raw_fd()]
-        } else {
-            vec![]
+            keep_fds.push(disk_image.as_raw_fd());
         }
+
+        if let Some(ref pmem_device_socket) = self.pmem_device_socket {
+            keep_fds.push(pmem_device_socket.as_raw_fd());
+        }
+        keep_fds
     }
 
     fn device_type(&self) -> u32 {
@@ -274,7 +307,9 @@ impl VirtioDevice for Pmem {
         let queue = queues.remove(0);
         let queue_event = queue_events.remove(0);
 
-        if let Some(disk_image) = self.disk_image.take() {
+        let mapping_arena_slot = self.mapping_arena_slot;
+
+        if let Some(pmem_device_socket) = self.pmem_device_socket.take() {
             let (self_kill_event, kill_event) =
                 match EventFd::new().and_then(|e| Ok((e.try_clone()?, e))) {
                     Ok(v) => v,
@@ -291,8 +326,9 @@ impl VirtioDevice for Pmem {
                     let mut worker = Worker {
                         interrupt,
                         memory,
-                        disk_image,
                         queue,
+                        pmem_device_socket,
+                        mapping_arena_slot,
                     };
                     worker.run(queue_event, kill_event);
                 });
diff --git a/src/linux.rs b/src/linux.rs
index 5bc2dcc..9dbdb5c 100644
--- a/src/linux.rs
+++ b/src/linux.rs
@@ -56,7 +56,8 @@ use vm_control::{
     BalloonControlResult, DiskControlCommand, DiskControlRequestSocket, DiskControlResponseSocket,
     DiskControlResult, UsbControlSocket, VmControlResponseSocket, VmIrqRequest, VmIrqResponse,
     VmIrqResponseSocket, VmMemoryControlRequestSocket, VmMemoryControlResponseSocket,
-    VmMemoryRequest, VmMemoryResponse, VmRunMode,
+    VmMemoryRequest, VmMemoryResponse, VmMsyncRequest, VmMsyncRequestSocket, VmMsyncResponse,
+    VmMsyncResponseSocket, VmRunMode,
 };
 
 use crate::{Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption};
@@ -254,6 +255,7 @@ enum TaggedControlSocket {
     Vm(VmControlResponseSocket),
     VmMemory(VmMemoryControlResponseSocket),
     VmIrq(VmIrqResponseSocket),
+    VmMsync(VmMsyncResponseSocket),
 }
 
 impl AsRef<UnixSeqpacket> for TaggedControlSocket {
@@ -263,6 +265,7 @@ impl AsRef<UnixSeqpacket> for TaggedControlSocket {
             Vm(ref socket) => socket.as_ref(),
             VmMemory(ref socket) => socket.as_ref(),
             VmIrq(ref socket) => socket.as_ref(),
+            VmMsync(ref socket) => socket.as_ref(),
         }
     }
 }
@@ -874,6 +877,7 @@ fn create_pmem_device(
     resources: &mut SystemAllocator,
     disk: &DiskOption,
     index: usize,
+    pmem_device_socket: VmMsyncRequestSocket,
 ) -> DeviceResult {
     let fd = OpenOptions::new()
         .read(true)
@@ -935,16 +939,23 @@ fn create_pmem_device(
         )
         .map_err(Error::AllocatePmemDeviceAddress)?;
 
-    vm.add_mmap_arena(
+    let slot = vm
+        .add_mmap_arena(
+            GuestAddress(mapping_address),
+            arena,
+            /* read_only = */ disk.read_only,
+            /* log_dirty_pages = */ false,
+        )
+        .map_err(Error::AddPmemDeviceMemory)?;
+
+    let dev = virtio::Pmem::new(
+        fd,
         GuestAddress(mapping_address),
-        arena,
-        /* read_only = */ disk.read_only,
-        /* log_dirty_pages = */ false,
+        slot,
+        arena_size,
+        Some(pmem_device_socket),
     )
-    .map_err(Error::AddPmemDeviceMemory)?;
-
-    let dev = virtio::Pmem::new(fd, GuestAddress(mapping_address), arena_size)
-        .map_err(Error::PmemDeviceNew)?;
+    .map_err(Error::PmemDeviceNew)?;
 
     Ok(VirtioDeviceStub {
         dev: Box::new(dev) as Box<dyn VirtioDevice>,
@@ -964,6 +975,7 @@ fn create_virtio_devices(
     gpu_device_socket: VmMemoryControlRequestSocket,
     balloon_device_socket: BalloonControlResponseSocket,
     disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
+    pmem_device_sockets: &mut Vec<VmMsyncRequestSocket>,
 ) -> DeviceResult<Vec<VirtioDeviceStub>> {
     let mut devs = Vec::new();
 
@@ -973,7 +985,15 @@ fn create_virtio_devices(
     }
 
     for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
-        devs.push(create_pmem_device(cfg, vm, resources, pmem_disk, index)?);
+        let pmem_device_socket = pmem_device_sockets.remove(0);
+        devs.push(create_pmem_device(
+            cfg,
+            vm,
+            resources,
+            pmem_disk,
+            index,
+            pmem_device_socket,
+        )?);
     }
 
     devs.push(create_rng_device(cfg)?);
@@ -1124,6 +1144,7 @@ fn create_devices(
     gpu_device_socket: VmMemoryControlRequestSocket,
     balloon_device_socket: BalloonControlResponseSocket,
     disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
+    pmem_device_sockets: &mut Vec<VmMsyncRequestSocket>,
     usb_provider: HostBackendDeviceProvider,
 ) -> DeviceResult<Vec<(Box<dyn PciDevice>, Option<Minijail>)>> {
     let stubs = create_virtio_devices(
@@ -1136,6 +1157,7 @@ fn create_devices(
         gpu_device_socket,
         balloon_device_socket,
         disk_device_sockets,
+        pmem_device_sockets,
     )?;
 
     let mut pci_devices = Vec::new();
@@ -1606,6 +1628,15 @@ pub fn run_config(cfg: Config) -> Result<()> {
         disk_device_sockets.push(disk_device_socket);
     }
 
+    let mut pmem_device_sockets = Vec::new();
+    let pmem_count = cfg.pmem_devices.len();
+    for _ in 0..pmem_count {
+        let (pmem_host_socket, pmem_device_socket) =
+            msg_socket::pair::<VmMsyncResponse, VmMsyncRequest>().map_err(Error::CreateSocket)?;
+        pmem_device_sockets.push(pmem_device_socket);
+        control_sockets.push(TaggedControlSocket::VmMsync(pmem_host_socket));
+    }
+
     let (gpu_host_socket, gpu_device_socket) =
         msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?;
     control_sockets.push(TaggedControlSocket::VmMemory(gpu_host_socket));
@@ -1633,6 +1664,7 @@ pub fn run_config(cfg: Config) -> Result<()> {
                 gpu_device_socket,
                 balloon_device_socket,
                 &mut disk_device_sockets,
+                &mut pmem_device_sockets,
                 usb_provider,
             )
         },
@@ -2031,6 +2063,21 @@ fn run_control(
                                     }
                                 }
                             },
+                            TaggedControlSocket::VmMsync(socket) => match socket.recv() {
+                                Ok(request) => {
+                                    let response = request.execute(&mut linux.vm);
+                                    if let Err(e) = socket.send(&response) {
+                                        error!("failed to send VmMsyncResponse: {}", e);
+                                    }
+                                }
+                                Err(e) => {
+                                    if let MsgError::BadRecvSize { actual: 0, .. } = e {
+                                        vm_control_indices_to_remove.push(index);
+                                    } else {
+                                        error!("failed to recv VmMsyncRequest: {}", e);
+                                    }
+                                }
+                            },
                         }
                     }
                 }
diff --git a/vm_control/src/lib.rs b/vm_control/src/lib.rs
index 4d48ff4..4035c1c 100644
--- a/vm_control/src/lib.rs
+++ b/vm_control/src/lib.rs
@@ -408,6 +408,50 @@ pub enum VmIrqResponse {
     Err(SysError),
 }
 
+#[derive(MsgOnSocket, Debug)]
+pub enum VmMsyncRequest {
+    /// Flush the content of a memory mapping to its backing file.
+    /// `slot` selects the arena (as returned by `Vm::add_mmap_arena`).
+    /// `offset` is the offset of the mapping to sync within the arena.
+    MsyncArena { slot: u32, offset: usize },
+}
+
+#[derive(MsgOnSocket, Debug)]
+pub enum VmMsyncResponse {
+    Ok,
+    Err(SysError),
+}
+
+impl VmMsyncRequest {
+    /// Executes this request on the given Vm.
+    ///
+    /// # Arguments
+    /// * `vm` - The `Vm` to perform the request on.
+    ///
+    /// This does not return a result, instead encapsulating the success or failure in a
+    /// `VmMsyncResponse` with the intended purpose of sending the response back over the socket
+    /// that received this `VmMsyncResponse`.
+    pub fn execute(&self, vm: &mut Vm) -> VmMsyncResponse {
+        use self::VmMsyncRequest::*;
+        match *self {
+            MsyncArena { slot, offset } => {
+                if let Some(arena) = vm.get_mmap_arena(slot) {
+                    match arena.msync(offset) {
+                        Ok(true) => VmMsyncResponse::Ok,
+                        Ok(false) => VmMsyncResponse::Err(SysError::new(EINVAL)),
+                        Err(e) => match e {
+                            MmapError::SystemCallFailed(errno) => VmMsyncResponse::Err(errno),
+                            _ => VmMsyncResponse::Err(SysError::new(EINVAL)),
+                        },
+                    }
+                } else {
+                    VmMsyncResponse::Err(SysError::new(EINVAL))
+                }
+            }
+        }
+    }
+}
+
 pub type BalloonControlRequestSocket = MsgSocket<BalloonControlCommand, BalloonControlResult>;
 pub type BalloonControlResponseSocket = MsgSocket<BalloonControlResult, BalloonControlCommand>;
 
@@ -422,6 +466,9 @@ pub type VmMemoryControlResponseSocket = MsgSocket<VmMemoryResponse, VmMemoryReq
 pub type VmIrqRequestSocket = MsgSocket<VmIrqRequest, VmIrqResponse>;
 pub type VmIrqResponseSocket = MsgSocket<VmIrqResponse, VmIrqRequest>;
 
+pub type VmMsyncRequestSocket = MsgSocket<VmMsyncRequest, VmMsyncResponse>;
+pub type VmMsyncResponseSocket = MsgSocket<VmMsyncResponse, VmMsyncRequest>;
+
 pub type VmControlRequestSocket = MsgSocket<VmRequest, VmResponse>;
 pub type VmControlResponseSocket = MsgSocket<VmResponse, VmRequest>;