diff options
author | Daniel Prilik <prilik@google.com> | 2019-03-29 10:48:57 -0700 |
---|---|---|
committer | chrome-bot <chrome-bot@chromium.org> | 2019-04-12 14:50:03 -0700 |
commit | d49adc9005a300dbae60bd8ecb12ea620fc0fd31 (patch) | |
tree | 21bd0290bccf00dd7ba09e4ee359e3ade86b0140 /sys_util | |
parent | c211a6ccc69dbf090002e58822846c2b4a69519c (diff) | |
download | crosvm-d49adc9005a300dbae60bd8ecb12ea620fc0fd31.tar crosvm-d49adc9005a300dbae60bd8ecb12ea620fc0fd31.tar.gz crosvm-d49adc9005a300dbae60bd8ecb12ea620fc0fd31.tar.bz2 crosvm-d49adc9005a300dbae60bd8ecb12ea620fc0fd31.tar.lz crosvm-d49adc9005a300dbae60bd8ecb12ea620fc0fd31.tar.xz crosvm-d49adc9005a300dbae60bd8ecb12ea620fc0fd31.tar.zst crosvm-d49adc9005a300dbae60bd8ecb12ea620fc0fd31.zip |
sys_util: add MemoryMappingArena
There is a hard-limit to the number of MemoryMaps that can be added to a KVM VM, a arch-dependent number defined as KVM_USER_MEM_SLOTS. e.g: on x86 this is 509 (512 - 3 internal slots). For most purposes, this isn't too much of an issue, but there are some cases where one might want to share a lot of mmaps with a Guest. e.g: virtio-fs uses a large cache region for mapping in slices of file fds directly into guest memory. If one tries to add a new KVM memory region for each mmap, the number of available slots is quickly exhausted. MemoryMappingArena is a way to work around this limitation by allocating a single KVM memory region for a large slice of memory, and then using mmap with MAP_FIXED to override slices of this "arena" hostside, thereby achieving the same effect without quickly exhausting the number of KVM memory region slots. BUG=chromium:936567 TEST=cargo test -p sys_util Change-Id: I89cc3b22cdba6756b2d76689176d7147cf238f07 Reviewed-on: https://chromium-review.googlesource.com/1546600 Commit-Ready: ChromeOS CL Exonerator Bot <chromiumos-cl-exonerator@appspot.gserviceaccount.com> Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Zach Reizner <zachr@chromium.org>
Diffstat (limited to 'sys_util')
-rw-r--r-- | sys_util/src/mmap.rs | 461 |
1 files changed, 423 insertions, 38 deletions
diff --git a/sys_util/src/mmap.rs b/sys_util/src/mmap.rs index d0f043b..8da2486 100644 --- a/sys_util/src/mmap.rs +++ b/sys_util/src/mmap.rs @@ -6,8 +6,10 @@ //! mmap object leaves scope. use std; +use std::collections::BTreeMap; use std::fmt::{self, Display}; use std::io::{Read, Write}; +use std::mem::ManuallyDrop; use std::os::unix::io::AsRawFd; use std::ptr::null_mut; @@ -15,7 +17,7 @@ use data_model::volatile_memory::*; use data_model::DataInit; use libc::{self, c_int}; -use crate::errno; +use crate::{errno, pagesize}; #[derive(Debug)] pub enum Error { @@ -23,6 +25,10 @@ pub enum Error { InvalidAddress, /// Requested offset is out of range of `libc::off_t`. InvalidOffset, + /// Requested mapping is not page aligned + NotPageAligned, + /// Overlapping regions + Overlapping(usize, usize), /// Requested memory range spans past the end of the region. InvalidRange(usize, usize), /// Couldn't read from the given source. @@ -43,6 +49,12 @@ impl Display for Error { match self { InvalidAddress => write!(f, "requested memory out of range"), InvalidOffset => write!(f, "requested offset is out of range of off_t"), + NotPageAligned => write!(f, "requested memory is not page aligned"), + Overlapping(offset, count) => write!( + f, + "requested memory range overlaps with existing region: offset={} size={}", + offset, count + ), InvalidRange(offset, count) => write!( f, "requested memory range spans past the end of the region: offset={} count={}", @@ -117,7 +129,7 @@ impl MemoryMapping { /// # Arguments /// * `size` - Size of memory region in bytes. pub fn new(size: usize) -> Result<MemoryMapping> { - Self::new_protection(size, Protection::read_write()) + MemoryMapping::new_protection(size, Protection::read_write()) } /// Creates an anonymous shared mapping of `size` bytes with `prot` protection. @@ -128,34 +140,18 @@ impl MemoryMapping { pub fn new_protection(size: usize, prot: Protection) -> Result<MemoryMapping> { // This is safe because we are creating an anonymous mapping in a place not already used by // any other area in this process. - let addr = unsafe { - libc::mmap( - null_mut(), + unsafe { + MemoryMapping::try_mmap( + None, size, prot.into(), libc::MAP_ANONYMOUS | libc::MAP_SHARED | libc::MAP_NORESERVE, - -1, - 0, + None, ) - }; - if addr == libc::MAP_FAILED { - return Err(Error::SystemCallFailed(errno::Error::last())); } - // This is safe because we call madvise with a valid address and size, and we check the - // return value. We only warn about an error because failure here is not fatal to the mmap. - if unsafe { libc::madvise(addr, size, libc::MADV_DONTDUMP) } == -1 { - warn!( - "failed madvise(MADV_DONTDUMP) on mmap: {}", - errno::Error::last() - ); - } - Ok(MemoryMapping { - addr: addr as *mut u8, - size, - }) } - /// Maps the first `size` bytes of the given `fd`. + /// Maps the first `size` bytes of the given `fd` as read/write. /// /// # Arguments /// * `fd` - File descriptor to mmap from. @@ -164,34 +160,121 @@ impl MemoryMapping { MemoryMapping::from_fd_offset(fd, size, 0) } - /// Maps the `size` bytes starting at `offset` bytes of the given `fd`. + pub fn from_fd_offset(fd: &dyn AsRawFd, size: usize, offset: usize) -> Result<MemoryMapping> { + MemoryMapping::from_fd_offset_protection(fd, size, offset, Protection::read_write()) + } + + /// Maps the `size` bytes starting at `offset` bytes of the given `fd` as read/write. /// /// # Arguments /// * `fd` - File descriptor to mmap from. /// * `size` - Size of memory region in bytes. /// * `offset` - Offset in bytes from the beginning of `fd` to start the mmap. - pub fn from_fd_offset(fd: &dyn AsRawFd, size: usize, offset: usize) -> Result<MemoryMapping> { - if offset > libc::off_t::max_value() as usize { - return Err(Error::InvalidOffset); - } - // This is safe because we are creating a mapping in a place not already used by any other - // area in this process. - let addr = unsafe { - libc::mmap( - null_mut(), + /// * `prot` - Protection (e.g. readable/writable) of the memory region. + pub fn from_fd_offset_protection( + fd: &dyn AsRawFd, + size: usize, + offset: usize, + prot: Protection, + ) -> Result<MemoryMapping> { + // This is safe because we are creating an anonymous mapping in a place not already used by + // any other area in this process. + unsafe { + MemoryMapping::try_mmap( + None, size, - libc::PROT_READ | libc::PROT_WRITE, + prot.into(), libc::MAP_SHARED, - fd.as_raw_fd(), - offset as libc::off_t, + Some((fd, offset)), ) + } + } + + /// Creates an anonymous shared mapping of `size` bytes with `prot` protection. + /// Unsafe: unmaps any mmap'd regions already present at (addr..addr+size). + /// + /// # Arguments + /// * `addr` - Memory address to mmap at. + /// * `size` - Size of memory region in bytes. + /// * `prot` - Protection (e.g. readable/writable) of the memory region. + pub unsafe fn new_protection_fixed( + addr: *mut u8, + size: usize, + prot: Protection, + ) -> Result<MemoryMapping> { + MemoryMapping::try_mmap( + Some(addr), + size, + prot.into(), + libc::MAP_ANONYMOUS | libc::MAP_SHARED | libc::MAP_NORESERVE, + None, + ) + } + + /// Maps the `size` bytes starting at `offset` bytes of the given `fd` with + /// `prot` protections. + /// Unsafe: unmaps any mmap'd regions already present at (addr..addr+size). + /// + /// # Arguments + /// * `addr` - Memory address to mmap at. + /// * `fd` - File descriptor to mmap from. + /// * `size` - Size of memory region in bytes. + /// * `offset` - Offset in bytes from the beginning of `fd` to start the mmap. + /// * `prot` - Protection (e.g. readable/writable) of the memory region. + pub unsafe fn from_fd_offset_protection_fixed( + addr: *mut u8, + fd: &dyn AsRawFd, + size: usize, + offset: usize, + prot: Protection, + ) -> Result<MemoryMapping> { + MemoryMapping::try_mmap( + Some(addr), + size, + prot.into(), + libc::MAP_SHARED | libc::MAP_NORESERVE, + Some((fd, offset)), + ) + } + + /// Helper wrapper around libc::mmap that does some basic validation, and calls + /// madvise with MADV_DONTDUMP on the created mmap + unsafe fn try_mmap( + addr: Option<*mut u8>, + size: usize, + prot: c_int, + flags: c_int, + fd: Option<(&AsRawFd, usize)>, + ) -> Result<MemoryMapping> { + let mut flags = flags; + // If addr is provided, set the FIXED flag, and validate addr alignment + let addr = match addr { + Some(addr) => { + if (addr as usize) % pagesize() != 0 { + return Err(Error::NotPageAligned); + } + flags = flags | libc::MAP_FIXED; + addr as *mut libc::c_void + } + None => null_mut(), + }; + // If fd is provided, validate fd offset is within bounds + let (fd, offset) = match fd { + Some((fd, offset)) => { + if offset > libc::off_t::max_value() as usize { + return Err(Error::InvalidOffset); + } + (fd.as_raw_fd(), offset as libc::off_t) + } + None => (-1, 0), }; + let addr = libc::mmap(addr, size, prot, flags, fd, offset); if addr == libc::MAP_FAILED { return Err(Error::SystemCallFailed(errno::Error::last())); } // This is safe because we call madvise with a valid address and size, and we check the // return value. We only warn about an error because failure here is not fatal to the mmap. - if unsafe { libc::madvise(addr, size, libc::MADV_DONTDUMP) } == -1 { + if libc::madvise(addr, size, libc::MADV_DONTDUMP) == -1 { warn!( "failed madvise(MADV_DONTDUMP) on mmap: {}", errno::Error::last() @@ -203,7 +286,7 @@ impl MemoryMapping { }) } - /// Returns a pointer to the begining of the memory region. Should only be + /// Returns a pointer to the beginning of the memory region. Should only be /// used for passing this region to ioctls for setting guest memory. pub fn as_ptr(&self) -> *mut u8 { self.addr @@ -214,6 +297,23 @@ impl MemoryMapping { self.size } + /// Calls msync with MS_SYNC on the mapping. + pub fn msync(&self) -> Result<()> { + // This is safe since we use the exact address and length of a known + // good memory mapping. + let ret = unsafe { + libc::msync( + self.as_ptr() as *mut libc::c_void, + self.size(), + libc::MS_SYNC, + ) + }; + if ret == -1 { + return Err(Error::SystemCallFailed(errno::Error::last())); + } + Ok(()) + } + /// Writes a slice to the memory region at the specified offset. /// Returns the number of bytes written. The number of bytes written can /// be less than the length of the slice if there isn't enough room in the @@ -468,6 +568,236 @@ impl Drop for MemoryMapping { } } +/// Tracks Fixed Memory Maps within an anonymous memory-mapped fixed-sized arena +/// in the current process. +pub struct MemoryMappingArena { + addr: *mut u8, + size: usize, + // When doing in-place swaps of MemoryMappings, the BTreeMap returns a owned + // instance of the old MemoryMapping. When the old MemoryMapping falls out + // of scope, it calls munmap on the same region as the new MemoryMapping + // that was just mapped in. To avoid accidentally munmapping the new, + // MemoryMapping, all mappings are wrapped in a ManuallyDrop, and then + // "forgotten" when removed from the BTreeMap + maps: BTreeMap<usize, ManuallyDrop<MemoryMapping>>, +} + +// Send and Sync aren't automatically inherited for the raw address pointer. +// Accessing that pointer is only done through the stateless interface which +// allows the object to be shared by multiple threads without a decrease in +// safety. +unsafe impl Send for MemoryMappingArena {} +unsafe impl Sync for MemoryMappingArena {} + +impl MemoryMappingArena { + /// Creates an mmap arena of `size` bytes. + /// + /// # Arguments + /// * `size` - Size of memory region in bytes. + pub fn new(size: usize) -> Result<MemoryMappingArena> { + // Reserve the arena's memory using an anonymous read-only mmap. + // The actual MemoryMapping object is forgotten, with + // MemoryMappingArena manually calling munmap on drop. + let mmap = MemoryMapping::new_protection(size, Protection::none().set_read())?; + let addr = mmap.as_ptr(); + let size = mmap.size(); + std::mem::forget(mmap); + Ok(MemoryMappingArena { + addr, + size, + maps: BTreeMap::new(), + }) + } + + /// Anonymously maps `size` bytes at `offset` bytes from the start of the arena. + /// `offset` must be page aligned. + /// + /// # Arguments + /// * `offset` - Page aligned offset into the arena in bytes. + /// * `size` - Size of memory region in bytes. + /// * `fd` - File descriptor to mmap from. + pub fn add_anon(&mut self, offset: usize, size: usize) -> Result<()> { + self.try_add(offset, size, Protection::read_write(), None) + } + + /// Maps `size` bytes from the start of the given `fd` at `offset` bytes from + /// the start of the arena. `offset` must be page aligned. + /// + /// # Arguments + /// * `offset` - Page aligned offset into the arena in bytes. + /// * `size` - Size of memory region in bytes. + /// * `fd` - File descriptor to mmap from. + pub fn add_fd(&mut self, offset: usize, size: usize, fd: &dyn AsRawFd) -> Result<()> { + self.add_fd_offset(offset, size, fd, 0) + } + + /// Maps `size` bytes starting at `fs_offset` bytes from within the given `fd` + /// at `offset` bytes from the start of the arena. `offset` must be page aligned. + /// + /// # Arguments + /// * `offset` - Page aligned offset into the arena in bytes. + /// * `size` - Size of memory region in bytes. + /// * `fd` - File descriptor to mmap from. + /// * `fd_offset` - Offset in bytes from the beginning of `fd` to start the mmap. + pub fn add_fd_offset( + &mut self, + offset: usize, + size: usize, + fd: &dyn AsRawFd, + fd_offset: usize, + ) -> Result<()> { + self.add_fd_offset_protection(offset, size, fd, fd_offset, Protection::read_write()) + } + + /// Maps `size` bytes starting at `fs_offset` bytes from within the given `fd` + /// at `offset` bytes from the start of the arena with `prot` protections. + /// `offset` must be page aligned. + /// + /// # Arguments + /// * `offset` - Page aligned offset into the arena in bytes. + /// * `size` - Size of memory region in bytes. + /// * `fd` - File descriptor to mmap from. + /// * `fd_offset` - Offset in bytes from the beginning of `fd` to start the mmap. + /// * `prot` - Protection (e.g. readable/writable) of the memory region. + pub fn add_fd_offset_protection( + &mut self, + offset: usize, + size: usize, + fd: &dyn AsRawFd, + fd_offset: usize, + prot: Protection, + ) -> Result<()> { + self.try_add(offset, size, prot, Some((fd, fd_offset))) + } + + /// Helper method that calls appropriate MemoryMapping constructor and adds + /// the resulting map into the arena. + fn try_add( + &mut self, + offset: usize, + size: usize, + prot: Protection, + fd: Option<(&AsRawFd, usize)>, + ) -> Result<()> { + self.validate_range(offset, size)?; + + // This is safe since the range has been validated. + let mmap = unsafe { + match fd { + Some((fd, fd_offset)) => MemoryMapping::from_fd_offset_protection_fixed( + (self.addr as usize + offset) as *mut u8, + fd, + size, + fd_offset, + prot, + )?, + None => MemoryMapping::new_protection_fixed( + (self.addr as usize + offset) as *mut u8, + size, + prot, + )?, + } + }; + + self.maps.insert(offset, ManuallyDrop::new(mmap)); + Ok(()) + } + + /// Removes a mapping at `offset` from the start of the arena. + /// Returns a boolean indicating if there was a mapping present at `offset`. + /// If none was present, this method is a noop. + pub fn remove(&mut self, offset: usize) -> Result<bool> { + if let Some(mmap) = self.maps.remove(&offset) { + // Instead of munmapping the memory map, leaving an unprotected hole + // in the arena, swap this mmap with an anonymous protection. + // This is safe since the memory mapping perfectly overlaps with an + // existing, known good memory mapping. + let mmap = unsafe { + MemoryMapping::new_protection_fixed( + mmap.as_ptr(), + mmap.size(), + Protection::none().set_read(), + )? + }; + self.maps.insert(offset, ManuallyDrop::new(mmap)); + Ok(true) + } else { + Ok(false) + } + } + + /// Calls msync with MS_SYNC on the mapping at `offset` from the start of + /// the arena. + /// Returns a boolean indicating if there was a mapping present at `offset`. + /// If none was present, this method is a noop. + pub fn msync(&self, offset: usize) -> Result<bool> { + if let Some(mmap) = self.maps.get(&offset) { + mmap.msync()?; + Ok(true) + } else { + Ok(false) + } + } + + /// Returns a pointer to the beginning of the memory region. Should only be + /// used for passing this region to ioctls for setting guest memory. + pub fn as_ptr(&self) -> *mut u8 { + self.addr + } + + /// Returns the size of the memory region in bytes. + pub fn size(&self) -> usize { + self.size + } + + /// Validates `offset` and `size`. + /// Checks that offset..offset+size doesn't overlap with existing mappings. + /// Also ensures correct alignment, and checks for any overflow. + /// Note: offset..offset+size is considered valid if it _perfectly_ overlaps + /// with single other region. + fn validate_range(&self, offset: usize, size: usize) -> Result<()> { + // Ensure offset is page-aligned + if offset % pagesize() != 0 { + return Err(Error::NotPageAligned); + } + // Ensure offset + size doesn't overflow + let end_offset = offset.checked_add(size).ok_or(Error::InvalidAddress)?; + // Ensure offset + size are within the arena bounds + if end_offset > self.size { + return Err(Error::InvalidAddress); + } + // Ensure offset..offset+size doesn't overlap with existing regions + // Find the offset + size of the first mapping before the desired offset + let (prev_offset, prev_size) = match self.maps.range(..offset).rev().next() { + Some((offset, mmap)) => (*offset, mmap.size()), + None => { + // Empty map + return Ok(()); + } + }; + if offset == prev_offset { + // Perfectly overlapping regions are allowed + if size != prev_size { + return Err(Error::Overlapping(offset, size)); + } + } else if offset < (prev_offset + prev_size) { + return Err(Error::Overlapping(offset, size)); + } + + Ok(()) + } +} + +impl Drop for MemoryMappingArena { + fn drop(&mut self) { + // This is safe because we mmap the area at addr ourselves, and nobody + // else is holding a reference to it. + unsafe { + libc::munmap(self.addr as *mut libc::c_void, self.size); + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -560,4 +890,59 @@ mod tests { e => panic!("unexpected error: {}", e), } } + + #[test] + fn arena_new() { + let m = MemoryMappingArena::new(0x40000).unwrap(); + assert_eq!(m.size(), 0x40000); + } + + #[test] + fn arena_add() { + let mut m = MemoryMappingArena::new(0x40000).unwrap(); + assert!(m.add_anon(0, pagesize() * 4).is_ok()); + } + + #[test] + fn arena_remove() { + let mut m = MemoryMappingArena::new(0x40000).unwrap(); + assert!(m.add_anon(0, pagesize() * 4).is_ok()); + assert!(m.remove(0).unwrap(), true); + assert!(m.remove(0).unwrap(), false); + } + + #[test] + fn arena_add_overlap_error() { + let page = pagesize(); + let mut m = MemoryMappingArena::new(page * 4).unwrap(); + assert!(m.add_anon(0, page * 4).is_ok()); + let res = m.add_anon(page, page).unwrap_err(); + match res { + Error::Overlapping(a, o) => { + assert_eq!((a, o), (page, page)); + } + e => panic!("unexpected error: {}", e), + } + } + + #[test] + fn arena_add_alignment_error() { + let mut m = MemoryMappingArena::new(pagesize() * 2).unwrap(); + assert!(m.add_anon(0, 0x100).is_ok()); + let res = m.add_anon(pagesize() + 1, 0x100).unwrap_err(); + match res { + Error::NotPageAligned => {} + e => panic!("unexpected error: {}", e), + } + } + + #[test] + fn arena_add_oob_error() { + let mut m = MemoryMappingArena::new(pagesize()).unwrap(); + let res = m.add_anon(0, pagesize() + 1).unwrap_err(); + match res { + Error::InvalidAddress => {} + e => panic!("unexpected error: {}", e), + } + } } |