diff options
author | Daniel Verkamp <dverkamp@chromium.org> | 2020-02-07 11:00:55 -0800 |
---|---|---|
committer | Commit Bot <commit-bot@chromium.org> | 2020-04-08 06:09:25 +0000 |
commit | e1980a9c360b04705a16434bdaf1a56161dafb56 (patch) | |
tree | 95944d7bfa87505050c2716fd764ffd1699d737e | |
parent | f3081b120e0934539f6f3f2c60c9ff26c801c0ea (diff) | |
download | crosvm-e1980a9c360b04705a16434bdaf1a56161dafb56.tar crosvm-e1980a9c360b04705a16434bdaf1a56161dafb56.tar.gz crosvm-e1980a9c360b04705a16434bdaf1a56161dafb56.tar.bz2 crosvm-e1980a9c360b04705a16434bdaf1a56161dafb56.tar.lz crosvm-e1980a9c360b04705a16434bdaf1a56161dafb56.tar.xz crosvm-e1980a9c360b04705a16434bdaf1a56161dafb56.tar.zst crosvm-e1980a9c360b04705a16434bdaf1a56161dafb56.zip |
devices: pmem: implement flush using msync()
Previously, writable pmem devices implemented the flush command using fsync(); however, this does not guarantee synchronization of memory mappings via mmap() to the file on disk. What we actually need is msync() on the pmem file mapping, but we don't have access to that mapping in the pmem child process, and it isn't trivial to pass it along since it is owned by the Vm object once it has been added as a mmap_arena. In order to call msync() on the mapping, add a new VmControl socket so that the pmem device can request that the main process issues an msync() on the MemoryMappingArena identified by its slot number. BUG=chromium:1007535 TEST=mount filesystem on /dev/pmem0 and sync; verify msync in strace Change-Id: Id0484757c422cf81d454fd54012a12dbcc1baaf6 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2044365 Reviewed-by: Stephen Barber <smbarber@chromium.org> Tested-by: kokoro <noreply+kokoro@google.com> Commit-Queue: Daniel Verkamp <dverkamp@chromium.org>
-rw-r--r-- | devices/src/virtio/pmem.rs | 60 | ||||
-rw-r--r-- | src/linux.rs | 67 | ||||
-rw-r--r-- | vm_control/src/lib.rs | 47 |
3 files changed, 152 insertions, 22 deletions
diff --git a/devices/src/virtio/pmem.rs b/devices/src/virtio/pmem.rs index 931b037..499e110 100644 --- a/devices/src/virtio/pmem.rs +++ b/devices/src/virtio/pmem.rs @@ -13,6 +13,10 @@ use sys_util::{error, EventFd, GuestAddress, GuestMemory, PollContext, PollToken use data_model::{DataInit, Le32, Le64}; +use msg_socket::{MsgReceiver, MsgSender}; + +use vm_control::{VmMsyncRequest, VmMsyncRequestSocket, VmMsyncResponse}; + use super::{ copy_config, DescriptorChain, DescriptorError, Interrupt, Queue, Reader, VirtioDevice, Writer, TYPE_PMEM, VIRTIO_F_VERSION_1, @@ -83,19 +87,38 @@ struct Worker { interrupt: Interrupt, queue: Queue, memory: GuestMemory, - disk_image: File, + pmem_device_socket: VmMsyncRequestSocket, + mapping_arena_slot: u32, } impl Worker { fn execute_request(&self, request: virtio_pmem_req) -> u32 { match request.type_.to_native() { - VIRTIO_PMEM_REQ_TYPE_FLUSH => match self.disk_image.sync_all() { - Ok(()) => VIRTIO_PMEM_RESP_TYPE_OK, - Err(e) => { - error!("failed flushing disk image: {}", e); - VIRTIO_PMEM_RESP_TYPE_EIO + VIRTIO_PMEM_REQ_TYPE_FLUSH => { + let request = VmMsyncRequest::MsyncArena { + slot: self.mapping_arena_slot, + offset: 0, // The pmem backing file is always at offset 0 in the arena. + }; + + if let Err(e) = self.pmem_device_socket.send(&request) { + error!("failed to send request: {}", e); + return VIRTIO_PMEM_RESP_TYPE_EIO; + } + + match self.pmem_device_socket.recv() { + Ok(response) => match response { + VmMsyncResponse::Ok => VIRTIO_PMEM_RESP_TYPE_OK, + VmMsyncResponse::Err(e) => { + error!("failed flushing disk image: {}", e); + VIRTIO_PMEM_RESP_TYPE_EIO + } + }, + Err(e) => { + error!("failed to receive data: {}", e); + VIRTIO_PMEM_RESP_TYPE_EIO + } } - }, + } _ => { error!("unknown request type: {}", request.type_.to_native()); VIRTIO_PMEM_RESP_TYPE_EIO @@ -199,21 +222,27 @@ pub struct Pmem { worker_thread: Option<thread::JoinHandle<()>>, disk_image: Option<File>, mapping_address: GuestAddress, + mapping_arena_slot: u32, mapping_size: u64, + pmem_device_socket: Option<VmMsyncRequestSocket>, } impl Pmem { pub fn new( disk_image: File, mapping_address: GuestAddress, + mapping_arena_slot: u32, mapping_size: u64, + pmem_device_socket: Option<VmMsyncRequestSocket>, ) -> SysResult<Pmem> { Ok(Pmem { kill_event: None, worker_thread: None, disk_image: Some(disk_image), mapping_address, + mapping_arena_slot, mapping_size, + pmem_device_socket, }) } } @@ -233,11 +262,15 @@ impl Drop for Pmem { impl VirtioDevice for Pmem { fn keep_fds(&self) -> Vec<RawFd> { + let mut keep_fds = Vec::new(); if let Some(disk_image) = &self.disk_image { - vec![disk_image.as_raw_fd()] - } else { - vec![] + keep_fds.push(disk_image.as_raw_fd()); } + + if let Some(ref pmem_device_socket) = self.pmem_device_socket { + keep_fds.push(pmem_device_socket.as_raw_fd()); + } + keep_fds } fn device_type(&self) -> u32 { @@ -274,7 +307,9 @@ impl VirtioDevice for Pmem { let queue = queues.remove(0); let queue_event = queue_events.remove(0); - if let Some(disk_image) = self.disk_image.take() { + let mapping_arena_slot = self.mapping_arena_slot; + + if let Some(pmem_device_socket) = self.pmem_device_socket.take() { let (self_kill_event, kill_event) = match EventFd::new().and_then(|e| Ok((e.try_clone()?, e))) { Ok(v) => v, @@ -291,8 +326,9 @@ impl VirtioDevice for Pmem { let mut worker = Worker { interrupt, memory, - disk_image, queue, + pmem_device_socket, + mapping_arena_slot, }; worker.run(queue_event, kill_event); }); diff --git a/src/linux.rs b/src/linux.rs index 5bc2dcc..9dbdb5c 100644 --- a/src/linux.rs +++ b/src/linux.rs @@ -56,7 +56,8 @@ use vm_control::{ BalloonControlResult, DiskControlCommand, DiskControlRequestSocket, DiskControlResponseSocket, DiskControlResult, UsbControlSocket, VmControlResponseSocket, VmIrqRequest, VmIrqResponse, VmIrqResponseSocket, VmMemoryControlRequestSocket, VmMemoryControlResponseSocket, - VmMemoryRequest, VmMemoryResponse, VmRunMode, + VmMemoryRequest, VmMemoryResponse, VmMsyncRequest, VmMsyncRequestSocket, VmMsyncResponse, + VmMsyncResponseSocket, VmRunMode, }; use crate::{Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption}; @@ -254,6 +255,7 @@ enum TaggedControlSocket { Vm(VmControlResponseSocket), VmMemory(VmMemoryControlResponseSocket), VmIrq(VmIrqResponseSocket), + VmMsync(VmMsyncResponseSocket), } impl AsRef<UnixSeqpacket> for TaggedControlSocket { @@ -263,6 +265,7 @@ impl AsRef<UnixSeqpacket> for TaggedControlSocket { Vm(ref socket) => socket.as_ref(), VmMemory(ref socket) => socket.as_ref(), VmIrq(ref socket) => socket.as_ref(), + VmMsync(ref socket) => socket.as_ref(), } } } @@ -874,6 +877,7 @@ fn create_pmem_device( resources: &mut SystemAllocator, disk: &DiskOption, index: usize, + pmem_device_socket: VmMsyncRequestSocket, ) -> DeviceResult { let fd = OpenOptions::new() .read(true) @@ -935,16 +939,23 @@ fn create_pmem_device( ) .map_err(Error::AllocatePmemDeviceAddress)?; - vm.add_mmap_arena( + let slot = vm + .add_mmap_arena( + GuestAddress(mapping_address), + arena, + /* read_only = */ disk.read_only, + /* log_dirty_pages = */ false, + ) + .map_err(Error::AddPmemDeviceMemory)?; + + let dev = virtio::Pmem::new( + fd, GuestAddress(mapping_address), - arena, - /* read_only = */ disk.read_only, - /* log_dirty_pages = */ false, + slot, + arena_size, + Some(pmem_device_socket), ) - .map_err(Error::AddPmemDeviceMemory)?; - - let dev = virtio::Pmem::new(fd, GuestAddress(mapping_address), arena_size) - .map_err(Error::PmemDeviceNew)?; + .map_err(Error::PmemDeviceNew)?; Ok(VirtioDeviceStub { dev: Box::new(dev) as Box<dyn VirtioDevice>, @@ -964,6 +975,7 @@ fn create_virtio_devices( gpu_device_socket: VmMemoryControlRequestSocket, balloon_device_socket: BalloonControlResponseSocket, disk_device_sockets: &mut Vec<DiskControlResponseSocket>, + pmem_device_sockets: &mut Vec<VmMsyncRequestSocket>, ) -> DeviceResult<Vec<VirtioDeviceStub>> { let mut devs = Vec::new(); @@ -973,7 +985,15 @@ fn create_virtio_devices( } for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() { - devs.push(create_pmem_device(cfg, vm, resources, pmem_disk, index)?); + let pmem_device_socket = pmem_device_sockets.remove(0); + devs.push(create_pmem_device( + cfg, + vm, + resources, + pmem_disk, + index, + pmem_device_socket, + )?); } devs.push(create_rng_device(cfg)?); @@ -1124,6 +1144,7 @@ fn create_devices( gpu_device_socket: VmMemoryControlRequestSocket, balloon_device_socket: BalloonControlResponseSocket, disk_device_sockets: &mut Vec<DiskControlResponseSocket>, + pmem_device_sockets: &mut Vec<VmMsyncRequestSocket>, usb_provider: HostBackendDeviceProvider, ) -> DeviceResult<Vec<(Box<dyn PciDevice>, Option<Minijail>)>> { let stubs = create_virtio_devices( @@ -1136,6 +1157,7 @@ fn create_devices( gpu_device_socket, balloon_device_socket, disk_device_sockets, + pmem_device_sockets, )?; let mut pci_devices = Vec::new(); @@ -1606,6 +1628,15 @@ pub fn run_config(cfg: Config) -> Result<()> { disk_device_sockets.push(disk_device_socket); } + let mut pmem_device_sockets = Vec::new(); + let pmem_count = cfg.pmem_devices.len(); + for _ in 0..pmem_count { + let (pmem_host_socket, pmem_device_socket) = + msg_socket::pair::<VmMsyncResponse, VmMsyncRequest>().map_err(Error::CreateSocket)?; + pmem_device_sockets.push(pmem_device_socket); + control_sockets.push(TaggedControlSocket::VmMsync(pmem_host_socket)); + } + let (gpu_host_socket, gpu_device_socket) = msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?; control_sockets.push(TaggedControlSocket::VmMemory(gpu_host_socket)); @@ -1633,6 +1664,7 @@ pub fn run_config(cfg: Config) -> Result<()> { gpu_device_socket, balloon_device_socket, &mut disk_device_sockets, + &mut pmem_device_sockets, usb_provider, ) }, @@ -2031,6 +2063,21 @@ fn run_control( } } }, + TaggedControlSocket::VmMsync(socket) => match socket.recv() { + Ok(request) => { + let response = request.execute(&mut linux.vm); + if let Err(e) = socket.send(&response) { + error!("failed to send VmMsyncResponse: {}", e); + } + } + Err(e) => { + if let MsgError::BadRecvSize { actual: 0, .. } = e { + vm_control_indices_to_remove.push(index); + } else { + error!("failed to recv VmMsyncRequest: {}", e); + } + } + }, } } } diff --git a/vm_control/src/lib.rs b/vm_control/src/lib.rs index 4d48ff4..4035c1c 100644 --- a/vm_control/src/lib.rs +++ b/vm_control/src/lib.rs @@ -408,6 +408,50 @@ pub enum VmIrqResponse { Err(SysError), } +#[derive(MsgOnSocket, Debug)] +pub enum VmMsyncRequest { + /// Flush the content of a memory mapping to its backing file. + /// `slot` selects the arena (as returned by `Vm::add_mmap_arena`). + /// `offset` is the offset of the mapping to sync within the arena. + MsyncArena { slot: u32, offset: usize }, +} + +#[derive(MsgOnSocket, Debug)] +pub enum VmMsyncResponse { + Ok, + Err(SysError), +} + +impl VmMsyncRequest { + /// Executes this request on the given Vm. + /// + /// # Arguments + /// * `vm` - The `Vm` to perform the request on. + /// + /// This does not return a result, instead encapsulating the success or failure in a + /// `VmMsyncResponse` with the intended purpose of sending the response back over the socket + /// that received this `VmMsyncResponse`. + pub fn execute(&self, vm: &mut Vm) -> VmMsyncResponse { + use self::VmMsyncRequest::*; + match *self { + MsyncArena { slot, offset } => { + if let Some(arena) = vm.get_mmap_arena(slot) { + match arena.msync(offset) { + Ok(true) => VmMsyncResponse::Ok, + Ok(false) => VmMsyncResponse::Err(SysError::new(EINVAL)), + Err(e) => match e { + MmapError::SystemCallFailed(errno) => VmMsyncResponse::Err(errno), + _ => VmMsyncResponse::Err(SysError::new(EINVAL)), + }, + } + } else { + VmMsyncResponse::Err(SysError::new(EINVAL)) + } + } + } + } +} + pub type BalloonControlRequestSocket = MsgSocket<BalloonControlCommand, BalloonControlResult>; pub type BalloonControlResponseSocket = MsgSocket<BalloonControlResult, BalloonControlCommand>; @@ -422,6 +466,9 @@ pub type VmMemoryControlResponseSocket = MsgSocket<VmMemoryResponse, VmMemoryReq pub type VmIrqRequestSocket = MsgSocket<VmIrqRequest, VmIrqResponse>; pub type VmIrqResponseSocket = MsgSocket<VmIrqResponse, VmIrqRequest>; +pub type VmMsyncRequestSocket = MsgSocket<VmMsyncRequest, VmMsyncResponse>; +pub type VmMsyncResponseSocket = MsgSocket<VmMsyncResponse, VmMsyncRequest>; + pub type VmControlRequestSocket = MsgSocket<VmRequest, VmResponse>; pub type VmControlResponseSocket = MsgSocket<VmResponse, VmRequest>; |