summary refs log tree commit diff
path: root/sys_util
diff options
context:
space:
mode:
authorDaniel Prilik <prilik@google.com>2019-03-29 10:48:57 -0700
committerchrome-bot <chrome-bot@chromium.org>2019-04-12 14:50:03 -0700
commitd49adc9005a300dbae60bd8ecb12ea620fc0fd31 (patch)
tree21bd0290bccf00dd7ba09e4ee359e3ade86b0140 /sys_util
parentc211a6ccc69dbf090002e58822846c2b4a69519c (diff)
downloadcrosvm-d49adc9005a300dbae60bd8ecb12ea620fc0fd31.tar
crosvm-d49adc9005a300dbae60bd8ecb12ea620fc0fd31.tar.gz
crosvm-d49adc9005a300dbae60bd8ecb12ea620fc0fd31.tar.bz2
crosvm-d49adc9005a300dbae60bd8ecb12ea620fc0fd31.tar.lz
crosvm-d49adc9005a300dbae60bd8ecb12ea620fc0fd31.tar.xz
crosvm-d49adc9005a300dbae60bd8ecb12ea620fc0fd31.tar.zst
crosvm-d49adc9005a300dbae60bd8ecb12ea620fc0fd31.zip
sys_util: add MemoryMappingArena
There is a hard-limit to the number of MemoryMaps that can be added to a
KVM VM, a arch-dependent number defined as KVM_USER_MEM_SLOTS. e.g: on
x86 this is 509 (512 - 3 internal slots).

For most purposes, this isn't too much of an issue, but there are some
cases where one might want to share a lot of mmaps with a Guest. e.g:
virtio-fs uses a large cache region for mapping in slices of file fds
directly into guest memory. If one tries to add a new KVM memory region
for each mmap, the number of available slots is quickly exhausted.

MemoryMappingArena is a way to work around this limitation by allocating
a single KVM memory region for a large slice of memory, and then using
mmap with MAP_FIXED to override slices of this "arena" hostside, thereby
achieving the same effect without quickly exhausting the number of KVM
memory region slots.

BUG=chromium:936567
TEST=cargo test -p sys_util

Change-Id: I89cc3b22cdba6756b2d76689176d7147cf238f07
Reviewed-on: https://chromium-review.googlesource.com/1546600
Commit-Ready: ChromeOS CL Exonerator Bot <chromiumos-cl-exonerator@appspot.gserviceaccount.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Zach Reizner <zachr@chromium.org>
Diffstat (limited to 'sys_util')
-rw-r--r--sys_util/src/mmap.rs461
1 files changed, 423 insertions, 38 deletions
diff --git a/sys_util/src/mmap.rs b/sys_util/src/mmap.rs
index d0f043b..8da2486 100644
--- a/sys_util/src/mmap.rs
+++ b/sys_util/src/mmap.rs
@@ -6,8 +6,10 @@
 //! mmap object leaves scope.
 
 use std;
+use std::collections::BTreeMap;
 use std::fmt::{self, Display};
 use std::io::{Read, Write};
+use std::mem::ManuallyDrop;
 use std::os::unix::io::AsRawFd;
 use std::ptr::null_mut;
 
@@ -15,7 +17,7 @@ use data_model::volatile_memory::*;
 use data_model::DataInit;
 use libc::{self, c_int};
 
-use crate::errno;
+use crate::{errno, pagesize};
 
 #[derive(Debug)]
 pub enum Error {
@@ -23,6 +25,10 @@ pub enum Error {
     InvalidAddress,
     /// Requested offset is out of range of `libc::off_t`.
     InvalidOffset,
+    /// Requested mapping is not page aligned
+    NotPageAligned,
+    /// Overlapping regions
+    Overlapping(usize, usize),
     /// Requested memory range spans past the end of the region.
     InvalidRange(usize, usize),
     /// Couldn't read from the given source.
@@ -43,6 +49,12 @@ impl Display for Error {
         match self {
             InvalidAddress => write!(f, "requested memory out of range"),
             InvalidOffset => write!(f, "requested offset is out of range of off_t"),
+            NotPageAligned => write!(f, "requested memory is not page aligned"),
+            Overlapping(offset, count) => write!(
+                f,
+                "requested memory range overlaps with existing region: offset={} size={}",
+                offset, count
+            ),
             InvalidRange(offset, count) => write!(
                 f,
                 "requested memory range spans past the end of the region: offset={} count={}",
@@ -117,7 +129,7 @@ impl MemoryMapping {
     /// # Arguments
     /// * `size` - Size of memory region in bytes.
     pub fn new(size: usize) -> Result<MemoryMapping> {
-        Self::new_protection(size, Protection::read_write())
+        MemoryMapping::new_protection(size, Protection::read_write())
     }
 
     /// Creates an anonymous shared mapping of `size` bytes with `prot` protection.
@@ -128,34 +140,18 @@ impl MemoryMapping {
     pub fn new_protection(size: usize, prot: Protection) -> Result<MemoryMapping> {
         // This is safe because we are creating an anonymous mapping in a place not already used by
         // any other area in this process.
-        let addr = unsafe {
-            libc::mmap(
-                null_mut(),
+        unsafe {
+            MemoryMapping::try_mmap(
+                None,
                 size,
                 prot.into(),
                 libc::MAP_ANONYMOUS | libc::MAP_SHARED | libc::MAP_NORESERVE,
-                -1,
-                0,
+                None,
             )
-        };
-        if addr == libc::MAP_FAILED {
-            return Err(Error::SystemCallFailed(errno::Error::last()));
         }
-        // This is safe because we call madvise with a valid address and size, and we check the
-        // return value. We only warn about an error because failure here is not fatal to the mmap.
-        if unsafe { libc::madvise(addr, size, libc::MADV_DONTDUMP) } == -1 {
-            warn!(
-                "failed madvise(MADV_DONTDUMP) on mmap: {}",
-                errno::Error::last()
-            );
-        }
-        Ok(MemoryMapping {
-            addr: addr as *mut u8,
-            size,
-        })
     }
 
-    /// Maps the first `size` bytes of the given `fd`.
+    /// Maps the first `size` bytes of the given `fd` as read/write.
     ///
     /// # Arguments
     /// * `fd` - File descriptor to mmap from.
@@ -164,34 +160,121 @@ impl MemoryMapping {
         MemoryMapping::from_fd_offset(fd, size, 0)
     }
 
-    /// Maps the `size` bytes starting at `offset` bytes of the given `fd`.
+    pub fn from_fd_offset(fd: &dyn AsRawFd, size: usize, offset: usize) -> Result<MemoryMapping> {
+        MemoryMapping::from_fd_offset_protection(fd, size, offset, Protection::read_write())
+    }
+
+    /// Maps the `size` bytes starting at `offset` bytes of the given `fd` as read/write.
     ///
     /// # Arguments
     /// * `fd` - File descriptor to mmap from.
     /// * `size` - Size of memory region in bytes.
     /// * `offset` - Offset in bytes from the beginning of `fd` to start the mmap.
-    pub fn from_fd_offset(fd: &dyn AsRawFd, size: usize, offset: usize) -> Result<MemoryMapping> {
-        if offset > libc::off_t::max_value() as usize {
-            return Err(Error::InvalidOffset);
-        }
-        // This is safe because we are creating a mapping in a place not already used by any other
-        // area in this process.
-        let addr = unsafe {
-            libc::mmap(
-                null_mut(),
+    /// * `prot` - Protection (e.g. readable/writable) of the memory region.
+    pub fn from_fd_offset_protection(
+        fd: &dyn AsRawFd,
+        size: usize,
+        offset: usize,
+        prot: Protection,
+    ) -> Result<MemoryMapping> {
+        // This is safe because we are creating an anonymous mapping in a place not already used by
+        // any other area in this process.
+        unsafe {
+            MemoryMapping::try_mmap(
+                None,
                 size,
-                libc::PROT_READ | libc::PROT_WRITE,
+                prot.into(),
                 libc::MAP_SHARED,
-                fd.as_raw_fd(),
-                offset as libc::off_t,
+                Some((fd, offset)),
             )
+        }
+    }
+
+    /// Creates an anonymous shared mapping of `size` bytes with `prot` protection.
+    /// Unsafe: unmaps any mmap'd regions already present at (addr..addr+size).
+    ///
+    /// # Arguments
+    /// * `addr` - Memory address to mmap at.
+    /// * `size` - Size of memory region in bytes.
+    /// * `prot` - Protection (e.g. readable/writable) of the memory region.
+    pub unsafe fn new_protection_fixed(
+        addr: *mut u8,
+        size: usize,
+        prot: Protection,
+    ) -> Result<MemoryMapping> {
+        MemoryMapping::try_mmap(
+            Some(addr),
+            size,
+            prot.into(),
+            libc::MAP_ANONYMOUS | libc::MAP_SHARED | libc::MAP_NORESERVE,
+            None,
+        )
+    }
+
+    /// Maps the `size` bytes starting at `offset` bytes of the given `fd` with
+    /// `prot` protections.
+    /// Unsafe: unmaps any mmap'd regions already present at (addr..addr+size).
+    ///
+    /// # Arguments
+    /// * `addr` - Memory address to mmap at.
+    /// * `fd` - File descriptor to mmap from.
+    /// * `size` - Size of memory region in bytes.
+    /// * `offset` - Offset in bytes from the beginning of `fd` to start the mmap.
+    /// * `prot` - Protection (e.g. readable/writable) of the memory region.
+    pub unsafe fn from_fd_offset_protection_fixed(
+        addr: *mut u8,
+        fd: &dyn AsRawFd,
+        size: usize,
+        offset: usize,
+        prot: Protection,
+    ) -> Result<MemoryMapping> {
+        MemoryMapping::try_mmap(
+            Some(addr),
+            size,
+            prot.into(),
+            libc::MAP_SHARED | libc::MAP_NORESERVE,
+            Some((fd, offset)),
+        )
+    }
+
+    /// Helper wrapper around libc::mmap that does some basic validation, and calls
+    /// madvise with MADV_DONTDUMP on the created mmap
+    unsafe fn try_mmap(
+        addr: Option<*mut u8>,
+        size: usize,
+        prot: c_int,
+        flags: c_int,
+        fd: Option<(&AsRawFd, usize)>,
+    ) -> Result<MemoryMapping> {
+        let mut flags = flags;
+        // If addr is provided, set the FIXED flag, and validate addr alignment
+        let addr = match addr {
+            Some(addr) => {
+                if (addr as usize) % pagesize() != 0 {
+                    return Err(Error::NotPageAligned);
+                }
+                flags = flags | libc::MAP_FIXED;
+                addr as *mut libc::c_void
+            }
+            None => null_mut(),
+        };
+        // If fd is provided, validate fd offset is within bounds
+        let (fd, offset) = match fd {
+            Some((fd, offset)) => {
+                if offset > libc::off_t::max_value() as usize {
+                    return Err(Error::InvalidOffset);
+                }
+                (fd.as_raw_fd(), offset as libc::off_t)
+            }
+            None => (-1, 0),
         };
+        let addr = libc::mmap(addr, size, prot, flags, fd, offset);
         if addr == libc::MAP_FAILED {
             return Err(Error::SystemCallFailed(errno::Error::last()));
         }
         // This is safe because we call madvise with a valid address and size, and we check the
         // return value. We only warn about an error because failure here is not fatal to the mmap.
-        if unsafe { libc::madvise(addr, size, libc::MADV_DONTDUMP) } == -1 {
+        if libc::madvise(addr, size, libc::MADV_DONTDUMP) == -1 {
             warn!(
                 "failed madvise(MADV_DONTDUMP) on mmap: {}",
                 errno::Error::last()
@@ -203,7 +286,7 @@ impl MemoryMapping {
         })
     }
 
-    /// Returns a pointer to the begining of the memory region.  Should only be
+    /// Returns a pointer to the beginning of the memory region. Should only be
     /// used for passing this region to ioctls for setting guest memory.
     pub fn as_ptr(&self) -> *mut u8 {
         self.addr
@@ -214,6 +297,23 @@ impl MemoryMapping {
         self.size
     }
 
+    /// Calls msync with MS_SYNC on the mapping.
+    pub fn msync(&self) -> Result<()> {
+        // This is safe since we use the exact address and length of a known
+        // good memory mapping.
+        let ret = unsafe {
+            libc::msync(
+                self.as_ptr() as *mut libc::c_void,
+                self.size(),
+                libc::MS_SYNC,
+            )
+        };
+        if ret == -1 {
+            return Err(Error::SystemCallFailed(errno::Error::last()));
+        }
+        Ok(())
+    }
+
     /// Writes a slice to the memory region at the specified offset.
     /// Returns the number of bytes written.  The number of bytes written can
     /// be less than the length of the slice if there isn't enough room in the
@@ -468,6 +568,236 @@ impl Drop for MemoryMapping {
     }
 }
 
+/// Tracks Fixed Memory Maps within an anonymous memory-mapped fixed-sized arena
+/// in the current process.
+pub struct MemoryMappingArena {
+    addr: *mut u8,
+    size: usize,
+    // When doing in-place swaps of MemoryMappings, the BTreeMap returns a owned
+    // instance of the old MemoryMapping. When the old MemoryMapping falls out
+    // of scope, it calls munmap on the same region as the new MemoryMapping
+    // that was just mapped in. To avoid accidentally munmapping the new,
+    // MemoryMapping, all mappings are wrapped in a ManuallyDrop, and then
+    // "forgotten" when removed from the BTreeMap
+    maps: BTreeMap<usize, ManuallyDrop<MemoryMapping>>,
+}
+
+// Send and Sync aren't automatically inherited for the raw address pointer.
+// Accessing that pointer is only done through the stateless interface which
+// allows the object to be shared by multiple threads without a decrease in
+// safety.
+unsafe impl Send for MemoryMappingArena {}
+unsafe impl Sync for MemoryMappingArena {}
+
+impl MemoryMappingArena {
+    /// Creates an mmap arena of `size` bytes.
+    ///
+    /// # Arguments
+    /// * `size` - Size of memory region in bytes.
+    pub fn new(size: usize) -> Result<MemoryMappingArena> {
+        // Reserve the arena's memory using an anonymous read-only mmap.
+        // The actual MemoryMapping object is forgotten, with
+        // MemoryMappingArena manually calling munmap on drop.
+        let mmap = MemoryMapping::new_protection(size, Protection::none().set_read())?;
+        let addr = mmap.as_ptr();
+        let size = mmap.size();
+        std::mem::forget(mmap);
+        Ok(MemoryMappingArena {
+            addr,
+            size,
+            maps: BTreeMap::new(),
+        })
+    }
+
+    /// Anonymously maps `size` bytes at `offset` bytes from the start of the arena.
+    /// `offset` must be page aligned.
+    ///
+    /// # Arguments
+    /// * `offset` - Page aligned offset into the arena in bytes.
+    /// * `size` - Size of memory region in bytes.
+    /// * `fd` - File descriptor to mmap from.
+    pub fn add_anon(&mut self, offset: usize, size: usize) -> Result<()> {
+        self.try_add(offset, size, Protection::read_write(), None)
+    }
+
+    /// Maps `size` bytes from the start of the given `fd` at `offset` bytes from
+    /// the start of the arena. `offset` must be page aligned.
+    ///
+    /// # Arguments
+    /// * `offset` - Page aligned offset into the arena in bytes.
+    /// * `size` - Size of memory region in bytes.
+    /// * `fd` - File descriptor to mmap from.
+    pub fn add_fd(&mut self, offset: usize, size: usize, fd: &dyn AsRawFd) -> Result<()> {
+        self.add_fd_offset(offset, size, fd, 0)
+    }
+
+    /// Maps `size` bytes starting at `fs_offset` bytes from within the given `fd`
+    /// at `offset` bytes from the start of the arena. `offset` must be page aligned.
+    ///
+    /// # Arguments
+    /// * `offset` - Page aligned offset into the arena in bytes.
+    /// * `size` - Size of memory region in bytes.
+    /// * `fd` - File descriptor to mmap from.
+    /// * `fd_offset` - Offset in bytes from the beginning of `fd` to start the mmap.
+    pub fn add_fd_offset(
+        &mut self,
+        offset: usize,
+        size: usize,
+        fd: &dyn AsRawFd,
+        fd_offset: usize,
+    ) -> Result<()> {
+        self.add_fd_offset_protection(offset, size, fd, fd_offset, Protection::read_write())
+    }
+
+    /// Maps `size` bytes starting at `fs_offset` bytes from within the given `fd`
+    /// at `offset` bytes from the start of the arena with `prot` protections.
+    /// `offset` must be page aligned.
+    ///
+    /// # Arguments
+    /// * `offset` - Page aligned offset into the arena in bytes.
+    /// * `size` - Size of memory region in bytes.
+    /// * `fd` - File descriptor to mmap from.
+    /// * `fd_offset` - Offset in bytes from the beginning of `fd` to start the mmap.
+    /// * `prot` - Protection (e.g. readable/writable) of the memory region.
+    pub fn add_fd_offset_protection(
+        &mut self,
+        offset: usize,
+        size: usize,
+        fd: &dyn AsRawFd,
+        fd_offset: usize,
+        prot: Protection,
+    ) -> Result<()> {
+        self.try_add(offset, size, prot, Some((fd, fd_offset)))
+    }
+
+    /// Helper method that calls appropriate MemoryMapping constructor and adds
+    /// the resulting map into the arena.
+    fn try_add(
+        &mut self,
+        offset: usize,
+        size: usize,
+        prot: Protection,
+        fd: Option<(&AsRawFd, usize)>,
+    ) -> Result<()> {
+        self.validate_range(offset, size)?;
+
+        // This is safe since the range has been validated.
+        let mmap = unsafe {
+            match fd {
+                Some((fd, fd_offset)) => MemoryMapping::from_fd_offset_protection_fixed(
+                    (self.addr as usize + offset) as *mut u8,
+                    fd,
+                    size,
+                    fd_offset,
+                    prot,
+                )?,
+                None => MemoryMapping::new_protection_fixed(
+                    (self.addr as usize + offset) as *mut u8,
+                    size,
+                    prot,
+                )?,
+            }
+        };
+
+        self.maps.insert(offset, ManuallyDrop::new(mmap));
+        Ok(())
+    }
+
+    /// Removes a mapping at `offset` from the start of the arena.
+    /// Returns a boolean indicating if there was a mapping present at `offset`.
+    /// If none was present, this method is a noop.
+    pub fn remove(&mut self, offset: usize) -> Result<bool> {
+        if let Some(mmap) = self.maps.remove(&offset) {
+            // Instead of munmapping the memory map, leaving an unprotected hole
+            // in the arena, swap this mmap with an anonymous protection.
+            // This is safe since the memory mapping perfectly overlaps with an
+            // existing, known good memory mapping.
+            let mmap = unsafe {
+                MemoryMapping::new_protection_fixed(
+                    mmap.as_ptr(),
+                    mmap.size(),
+                    Protection::none().set_read(),
+                )?
+            };
+            self.maps.insert(offset, ManuallyDrop::new(mmap));
+            Ok(true)
+        } else {
+            Ok(false)
+        }
+    }
+
+    /// Calls msync with MS_SYNC on the mapping at `offset` from the start of
+    /// the arena.
+    /// Returns a boolean indicating if there was a mapping present at `offset`.
+    /// If none was present, this method is a noop.
+    pub fn msync(&self, offset: usize) -> Result<bool> {
+        if let Some(mmap) = self.maps.get(&offset) {
+            mmap.msync()?;
+            Ok(true)
+        } else {
+            Ok(false)
+        }
+    }
+
+    /// Returns a pointer to the beginning of the memory region.  Should only be
+    /// used for passing this region to ioctls for setting guest memory.
+    pub fn as_ptr(&self) -> *mut u8 {
+        self.addr
+    }
+
+    /// Returns the size of the memory region in bytes.
+    pub fn size(&self) -> usize {
+        self.size
+    }
+
+    /// Validates `offset` and `size`.
+    /// Checks that offset..offset+size doesn't overlap with existing mappings.
+    /// Also ensures correct alignment, and checks for any overflow.
+    /// Note: offset..offset+size is considered valid if it _perfectly_ overlaps
+    /// with single other region.
+    fn validate_range(&self, offset: usize, size: usize) -> Result<()> {
+        // Ensure offset is page-aligned
+        if offset % pagesize() != 0 {
+            return Err(Error::NotPageAligned);
+        }
+        // Ensure offset + size doesn't overflow
+        let end_offset = offset.checked_add(size).ok_or(Error::InvalidAddress)?;
+        // Ensure offset + size are within the arena bounds
+        if end_offset > self.size {
+            return Err(Error::InvalidAddress);
+        }
+        // Ensure offset..offset+size doesn't overlap with existing regions
+        // Find the offset + size of the first mapping before the desired offset
+        let (prev_offset, prev_size) = match self.maps.range(..offset).rev().next() {
+            Some((offset, mmap)) => (*offset, mmap.size()),
+            None => {
+                // Empty map
+                return Ok(());
+            }
+        };
+        if offset == prev_offset {
+            // Perfectly overlapping regions are allowed
+            if size != prev_size {
+                return Err(Error::Overlapping(offset, size));
+            }
+        } else if offset < (prev_offset + prev_size) {
+            return Err(Error::Overlapping(offset, size));
+        }
+
+        Ok(())
+    }
+}
+
+impl Drop for MemoryMappingArena {
+    fn drop(&mut self) {
+        // This is safe because we mmap the area at addr ourselves, and nobody
+        // else is holding a reference to it.
+        unsafe {
+            libc::munmap(self.addr as *mut libc::c_void, self.size);
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -560,4 +890,59 @@ mod tests {
             e => panic!("unexpected error: {}", e),
         }
     }
+
+    #[test]
+    fn arena_new() {
+        let m = MemoryMappingArena::new(0x40000).unwrap();
+        assert_eq!(m.size(), 0x40000);
+    }
+
+    #[test]
+    fn arena_add() {
+        let mut m = MemoryMappingArena::new(0x40000).unwrap();
+        assert!(m.add_anon(0, pagesize() * 4).is_ok());
+    }
+
+    #[test]
+    fn arena_remove() {
+        let mut m = MemoryMappingArena::new(0x40000).unwrap();
+        assert!(m.add_anon(0, pagesize() * 4).is_ok());
+        assert!(m.remove(0).unwrap(), true);
+        assert!(m.remove(0).unwrap(), false);
+    }
+
+    #[test]
+    fn arena_add_overlap_error() {
+        let page = pagesize();
+        let mut m = MemoryMappingArena::new(page * 4).unwrap();
+        assert!(m.add_anon(0, page * 4).is_ok());
+        let res = m.add_anon(page, page).unwrap_err();
+        match res {
+            Error::Overlapping(a, o) => {
+                assert_eq!((a, o), (page, page));
+            }
+            e => panic!("unexpected error: {}", e),
+        }
+    }
+
+    #[test]
+    fn arena_add_alignment_error() {
+        let mut m = MemoryMappingArena::new(pagesize() * 2).unwrap();
+        assert!(m.add_anon(0, 0x100).is_ok());
+        let res = m.add_anon(pagesize() + 1, 0x100).unwrap_err();
+        match res {
+            Error::NotPageAligned => {}
+            e => panic!("unexpected error: {}", e),
+        }
+    }
+
+    #[test]
+    fn arena_add_oob_error() {
+        let mut m = MemoryMappingArena::new(pagesize()).unwrap();
+        let res = m.add_anon(0, pagesize() + 1).unwrap_err();
+        match res {
+            Error::InvalidAddress => {}
+            e => panic!("unexpected error: {}", e),
+        }
+    }
 }