summary refs log tree commit diff
path: root/devices/src
diff options
context:
space:
mode:
authorChirantan Ekbote <chirantan@chromium.org>2019-08-16 16:18:25 +0900
committerCommit Bot <commit-bot@chromium.org>2019-11-01 06:20:27 +0000
commit2569b20f0fbfb976a7196ae9466c2c9d8ef506af (patch)
tree9c207f11be641e689393e81592d31213bfdb1eec /devices/src
parent85858f580eada8dd85c8c798ef3e98f18d92dc1e (diff)
downloadcrosvm-2569b20f0fbfb976a7196ae9466c2c9d8ef506af.tar
crosvm-2569b20f0fbfb976a7196ae9466c2c9d8ef506af.tar.gz
crosvm-2569b20f0fbfb976a7196ae9466c2c9d8ef506af.tar.bz2
crosvm-2569b20f0fbfb976a7196ae9466c2c9d8ef506af.tar.lz
crosvm-2569b20f0fbfb976a7196ae9466c2c9d8ef506af.tar.xz
crosvm-2569b20f0fbfb976a7196ae9466c2c9d8ef506af.tar.zst
crosvm-2569b20f0fbfb976a7196ae9466c2c9d8ef506af.zip
devices: fs: Add a passthrough file system
Add a "passthrough" file system implementation that just forwards it's
requests to the appropriate system call.

BUG=b:136128319
TEST=`tast run vm.VirtioFs`

Change-Id: I802c91dd0af8cdd8b9e761d9f04f874ae41ec033
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/1758103
Tested-by: Chirantan Ekbote <chirantan@chromium.org>
Tested-by: kokoro <noreply+kokoro@google.com>
Commit-Queue: Chirantan Ekbote <chirantan@chromium.org>
Reviewed-by: Stephen Barber <smbarber@chromium.org>
Diffstat (limited to 'devices/src')
-rw-r--r--devices/src/virtio/fs/passthrough.rs1504
1 files changed, 1504 insertions, 0 deletions
diff --git a/devices/src/virtio/fs/passthrough.rs b/devices/src/virtio/fs/passthrough.rs
new file mode 100644
index 0000000..c42b212
--- /dev/null
+++ b/devices/src/virtio/fs/passthrough.rs
@@ -0,0 +1,1504 @@
+// Copyright 2019 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use std::collections::btree_map;
+use std::collections::BTreeMap;
+use std::ffi::{CStr, CString};
+use std::fs::File;
+use std::io;
+use std::mem::{self, size_of, MaybeUninit};
+use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
+use std::str::FromStr;
+use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
+use std::sync::{Arc, RwLock};
+use std::time::Duration;
+
+use data_model::DataInit;
+use libc;
+use sync::Mutex;
+use sys_util::error;
+
+use crate::virtio::fs::filesystem::{
+    Context, DirEntry, Entry, FileSystem, FsOptions, GetxattrReply, ListxattrReply, OpenOptions,
+    SetattrValid, ZeroCopyReader, ZeroCopyWriter,
+};
+use crate::virtio::fs::fuse;
+use crate::virtio::fs::multikey::MultikeyBTreeMap;
+
+const CURRENT_DIR_CSTR: &[u8] = b".\0";
+const PARENT_DIR_CSTR: &[u8] = b"..\0";
+const EMPTY_CSTR: &[u8] = b"\0";
+const ROOT_CSTR: &[u8] = b"/\0";
+const PROC_CSTR: &[u8] = b"/proc\0";
+
+type Inode = u64;
+type Handle = u64;
+
+#[derive(Clone, Copy, PartialOrd, Ord, PartialEq, Eq)]
+struct InodeAltKey {
+    ino: libc::ino64_t,
+    dev: libc::dev_t,
+}
+
+struct InodeData {
+    inode: Inode,
+    // Most of these aren't actually files but ¯\_(ツ)_/¯.
+    file: File,
+    refcount: AtomicU64,
+}
+
+struct HandleData {
+    inode: Inode,
+    file: Mutex<File>,
+}
+
+#[repr(C, packed)]
+#[derive(Clone, Copy, Debug)]
+struct LinuxDirent64 {
+    d_ino: libc::ino64_t,
+    d_off: libc::off64_t,
+    d_reclen: libc::c_ushort,
+    d_ty: libc::c_uchar,
+}
+unsafe impl DataInit for LinuxDirent64 {}
+
+/// The caching policy that the server should report to the client.
+pub enum CachePolicy {
+    /// The client should never cache file data and all I/O should be directly forwarded
+    /// to the server.
+    Never,
+
+    /// The client is free to choose when and how to cache file data.
+    Auto,
+
+    /// The client should always cache file data.
+    Always,
+}
+
+impl FromStr for CachePolicy {
+    type Err = &'static str;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "never" | "Never" | "NEVER" => Ok(CachePolicy::Never),
+            "auto" | "Auto" | "AUTO" => Ok(CachePolicy::Auto),
+            "always" | "Always" | "ALWAYS" => Ok(CachePolicy::Always),
+            _ => Err("invalid cache policy"),
+        }
+    }
+}
+
+impl Default for CachePolicy {
+    fn default() -> Self {
+        CachePolicy::Auto
+    }
+}
+
+macro_rules! scoped_cred {
+    ($name:ident, $ty:ty, $syscall_nr:expr) => {
+        #[derive(Debug)]
+        struct $name;
+
+        impl $name {
+            // Changes the effective uid/gid of the current thread to `val`.  Changes
+            // the thread's credentials back to root when the returned struct is dropped.
+            fn new(val: $ty) -> io::Result<Option<$name>> {
+                if val == 0 {
+                    // Nothing to do since we are already uid 0.
+                    return Ok(None);
+                }
+
+                // We want credential changes to be per-thread because otherwise
+                // we might interfere with operations being carried out on other
+                // threads with different uids/gids.  However, posix requires that
+                // all threads in a process share the same credentials.  To do this
+                // libc uses signals to ensure that when one thread changes its
+                // credentials the other threads do the same thing.
+                //
+                // So instead we invoke the syscall directly in order to get around
+                // this limitation.  Another option is to use the setfsuid and
+                // setfsgid systems calls.   However since those calls have no way to
+                // return an error, it's preferable to do this instead.
+
+                // This call is safe because it doesn't modify any memory and we
+                // check the return value.
+                let res = unsafe { libc::syscall($syscall_nr, -1, val, -1) };
+                if res == 0 {
+                    Ok(Some($name))
+                } else {
+                    Err(io::Error::last_os_error())
+                }
+            }
+        }
+
+        impl Drop for $name {
+            fn drop(&mut self) {
+                let res = unsafe { libc::syscall($syscall_nr, -1, 0, -1) };
+                if res < 0 {
+                    error!(
+                        "failed to change credentials back to root: {}",
+                        io::Error::last_os_error(),
+                    );
+                }
+            }
+        }
+    };
+}
+scoped_cred!(ScopedUid, libc::uid_t, libc::SYS_setresuid);
+scoped_cred!(ScopedGid, libc::gid_t, libc::SYS_setresgid);
+
+fn set_creds(
+    uid: libc::uid_t,
+    gid: libc::gid_t,
+) -> io::Result<(Option<ScopedUid>, Option<ScopedGid>)> {
+    // We have to change the gid before we change the uid because if we change the uid first then we
+    // lose the capability to change the gid.  However changing back can happen in any order.
+    ScopedGid::new(gid).and_then(|gid| Ok((ScopedUid::new(uid)?, gid)))
+}
+
+fn ebadf() -> io::Error {
+    io::Error::from_raw_os_error(libc::EBADF)
+}
+
+fn stat(f: &File) -> io::Result<libc::stat64> {
+    let mut st = MaybeUninit::<libc::stat64>::zeroed();
+
+    // Safe because this is a constant value and a valid C string.
+    let pathname = unsafe { CStr::from_bytes_with_nul_unchecked(EMPTY_CSTR) };
+
+    // Safe because the kernel will only write data in `st` and we check the return
+    // value.
+    let res = unsafe {
+        libc::fstatat64(
+            f.as_raw_fd(),
+            pathname.as_ptr(),
+            st.as_mut_ptr(),
+            libc::AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW,
+        )
+    };
+    if res >= 0 {
+        // Safe because the kernel guarantees that the struct is now fully initialized.
+        Ok(unsafe { st.assume_init() })
+    } else {
+        Err(io::Error::last_os_error())
+    }
+}
+
+pub struct PassthroughFs {
+    // File descriptors for various points in the file system tree. These fds are always opened with
+    // the `O_PATH` option so they cannot be used for reading or writing any data. See the
+    // documentation of the `O_PATH` flag in `open(2)` for more details on what one can and cannot
+    // do with an fd opened with this flag.
+    inodes: RwLock<MultikeyBTreeMap<Inode, InodeAltKey, Arc<InodeData>>>,
+    next_inode: AtomicU64,
+
+    // File descriptors for open files and directories. Unlike the fds in `inodes`, these _can_ be
+    // used for reading and writing data.
+    handles: RwLock<BTreeMap<Handle, Arc<HandleData>>>,
+    next_handle: AtomicU64,
+
+    // File descriptor pointing to the `/proc` directory. This is used to convert an fd from
+    // `inodes` into one that can go into `handles`. This is accomplished by reading the
+    // `self/fd/{}` symlink. We keep an open fd here in case the file system tree that we are meant
+    // to be serving doesn't have access to `/proc`.
+    proc: File,
+
+    // Whether writeback caching is enabled for this directory. This can improve write performance
+    // as it allows the guest to complete write requests before the data has been flushed to this
+    // server. However, this also has the possibility of causing data corruption as the contents of
+    // a file may change on disk while they are still buffered in the guest. So this should only be
+    // enabled when the guest has exclusive access to the directory being shared.
+    writeback: AtomicBool,
+
+    timeout: Duration,
+    cache_policy: CachePolicy,
+}
+
+impl PassthroughFs {
+    pub fn new(timeout: Duration, cache_policy: CachePolicy) -> io::Result<PassthroughFs> {
+        // Safe because this is a constant value and a valid C string.
+        let proc_cstr = unsafe { CStr::from_bytes_with_nul_unchecked(PROC_CSTR) };
+
+        // Safe because this doesn't modify any memory and we check the return value.
+        let fd = unsafe {
+            libc::openat(
+                libc::AT_FDCWD,
+                proc_cstr.as_ptr(),
+                libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC,
+            )
+        };
+        if fd < 0 {
+            return Err(io::Error::last_os_error());
+        }
+
+        // Safe because we just opened this fd.
+        let proc = unsafe { File::from_raw_fd(fd) };
+
+        Ok(PassthroughFs {
+            inodes: RwLock::new(MultikeyBTreeMap::new()),
+            next_inode: AtomicU64::new(fuse::ROOT_ID + 1),
+
+            handles: RwLock::new(BTreeMap::new()),
+            next_handle: AtomicU64::new(0),
+
+            proc,
+
+            writeback: AtomicBool::new(false),
+            timeout,
+            cache_policy,
+        })
+    }
+
+    pub fn keep_fds(&self) -> Vec<RawFd> {
+        vec![self.proc.as_raw_fd()]
+    }
+
+    fn open_inode(&self, inode: Inode, mut flags: i32) -> io::Result<File> {
+        let data = self
+            .inodes
+            .read()
+            .unwrap()
+            .get(&inode)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+
+        let pathname = CString::new(format!("self/fd/{}", data.file.as_raw_fd()))
+            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
+
+        // When writeback caching is enabled, the kernel may send read requests even if the
+        // userspace program opened the file write-only. So we need to ensure that we have opened
+        // the file for reading as well as writing.
+        let writeback = self.writeback.load(Ordering::Relaxed);
+        if writeback && flags & libc::O_ACCMODE == libc::O_WRONLY {
+            flags &= !libc::O_ACCMODE;
+            flags |= libc::O_RDWR;
+        }
+
+        // When writeback caching is enabled the kernel is responsible for handling `O_APPEND`.
+        // However, this breaks atomicity as the file may have changed on disk, invalidating the
+        // cached copy of the data in the kernel and the offset that the kernel thinks is the end of
+        // the file. Just allow this for now as it is the user's responsibility to enable writeback
+        // caching only for directories that are not shared. It also means that we need to clear the
+        // `O_APPEND` flag.
+        if writeback && flags & libc::O_APPEND != 0 {
+            flags &= !libc::O_APPEND;
+        }
+
+        // Safe because this doesn't modify any memory and we check the return value. We don't
+        // really check `flags` because if the kernel can't handle poorly specified flags then we
+        // have much bigger problems. Also, clear the `O_NOFOLLOW` flag if it is set since we need
+        // to follow the `/proc/self/fd` symlink to get the file.
+        let fd = unsafe {
+            libc::openat(
+                self.proc.as_raw_fd(),
+                pathname.as_ptr(),
+                (flags | libc::O_CLOEXEC) & (!libc::O_NOFOLLOW),
+            )
+        };
+        if fd < 0 {
+            return Err(io::Error::last_os_error());
+        }
+
+        // Safe because we just opened this fd.
+        Ok(unsafe { File::from_raw_fd(fd) })
+    }
+
+    fn do_lookup(&self, parent: Inode, name: &CStr) -> io::Result<Entry> {
+        let p = self
+            .inodes
+            .read()
+            .unwrap()
+            .get(&parent)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+
+        // Safe because this doesn't modify any memory and we check the return value.
+        let fd = unsafe {
+            libc::openat(
+                p.file.as_raw_fd(),
+                name.as_ptr(),
+                libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC,
+            )
+        };
+        if fd < 0 {
+            return Err(io::Error::last_os_error());
+        }
+
+        // Safe because we just opened this fd.
+        let f = unsafe { File::from_raw_fd(fd) };
+
+        let st = stat(&f)?;
+
+        let altkey = InodeAltKey {
+            ino: st.st_ino,
+            dev: st.st_dev,
+        };
+        let data = self.inodes.read().unwrap().get_alt(&altkey).map(Arc::clone);
+
+        let inode = if let Some(data) = data {
+            // Matches with the release store in `forget`.
+            data.refcount.fetch_add(1, Ordering::Acquire);
+            data.inode
+        } else {
+            // There is a possible race here where 2 threads end up adding the same file
+            // into the inode list.  However, since each of those will get a unique Inode
+            // value and unique file descriptors this shouldn't be that much of a problem.
+            let inode = self.next_inode.fetch_add(1, Ordering::Relaxed);
+            self.inodes.write().unwrap().insert(
+                inode,
+                InodeAltKey {
+                    ino: st.st_ino,
+                    dev: st.st_dev,
+                },
+                Arc::new(InodeData {
+                    inode,
+                    file: f,
+                    refcount: AtomicU64::new(1),
+                }),
+            );
+
+            inode
+        };
+
+        Ok(Entry {
+            inode,
+            generation: 0,
+            attr: st,
+            attr_timeout: self.timeout.clone(),
+            entry_timeout: self.timeout.clone(),
+        })
+    }
+
+    fn do_readdir<F>(
+        &self,
+        inode: Inode,
+        handle: Handle,
+        size: u32,
+        offset: u64,
+        mut add_entry: F,
+    ) -> io::Result<()>
+    where
+        F: FnMut(DirEntry) -> io::Result<usize>,
+    {
+        if size == 0 {
+            return Ok(());
+        }
+
+        let data = self
+            .handles
+            .read()
+            .unwrap()
+            .get(&handle)
+            .filter(|hd| hd.inode == inode)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+
+        let mut buf = Vec::with_capacity(size as usize);
+        buf.resize(size as usize, 0);
+
+        {
+            // Since we are going to work with the kernel offset, we have to acquire the file lock
+            // for both the `lseek64` and `getdents64` syscalls to ensure that no other thread
+            // changes the kernel offset while we are using it.
+            let dir = data.file.lock();
+
+            // Safe because this doesn't modify any memory and we check the return value.
+            let res =
+                unsafe { libc::lseek64(dir.as_raw_fd(), offset as libc::off64_t, libc::SEEK_SET) };
+            if res < 0 {
+                return Err(io::Error::last_os_error());
+            }
+
+            // Safe because the kernel guarantees that it will only write to `buf` and we check the
+            // return value.
+            let res = unsafe {
+                libc::syscall(
+                    libc::SYS_getdents64,
+                    dir.as_raw_fd(),
+                    buf.as_mut_ptr() as *mut LinuxDirent64,
+                    size as libc::c_int,
+                )
+            };
+            if res < 0 {
+                return Err(io::Error::last_os_error());
+            }
+            buf.resize(res as usize, 0);
+
+            // Explicitly drop the lock so that it's not held while we fill in the fuse buffer.
+            mem::drop(dir);
+        }
+
+        let mut rem = &buf[..];
+        while rem.len() > 0 {
+            // We only use debug asserts here because these values are coming from the kernel and we
+            // trust them implicitly.
+            debug_assert!(
+                rem.len() >= size_of::<LinuxDirent64>(),
+                "not enough space left in `rem`"
+            );
+
+            let (front, back) = rem.split_at(size_of::<LinuxDirent64>());
+
+            let dirent64 =
+                LinuxDirent64::from_slice(front).expect("unable to get LinuxDirent64 from slice");
+
+            let namelen = dirent64.d_reclen as usize - size_of::<LinuxDirent64>();
+            debug_assert!(namelen <= back.len(), "back is smaller than `namelen`");
+
+            let name = &back[..namelen];
+            let res = if name.starts_with(CURRENT_DIR_CSTR) || name.starts_with(PARENT_DIR_CSTR) {
+                // We don't want to report the "." and ".." entries. However, returning `Ok(0)` will
+                // break the loop so return `Ok` with a non-zero value instead.
+                Ok(1)
+            } else {
+                add_entry(DirEntry {
+                    ino: dirent64.d_ino,
+                    offset: dirent64.d_off as u64,
+                    type_: dirent64.d_ty as u32,
+                    name,
+                })
+            };
+
+            debug_assert!(
+                rem.len() >= dirent64.d_reclen as usize,
+                "rem is smaller than `d_reclen`"
+            );
+
+            match res {
+                Ok(0) => break,
+                Ok(_) => rem = &rem[dirent64.d_reclen as usize..],
+                Err(e) => return Err(e),
+            }
+        }
+
+        Ok(())
+    }
+
+    fn do_open(&self, inode: Inode, flags: u32) -> io::Result<(Option<Handle>, OpenOptions)> {
+        let file = Mutex::new(self.open_inode(inode, flags as i32)?);
+
+        let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
+        let data = HandleData { inode, file };
+
+        self.handles.write().unwrap().insert(handle, Arc::new(data));
+
+        let mut opts = OpenOptions::empty();
+        match self.cache_policy {
+            // We only set the direct I/O option on files.
+            CachePolicy::Never => opts.set(
+                OpenOptions::DIRECT_IO,
+                flags & (libc::O_DIRECTORY as u32) == 0,
+            ),
+            CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE,
+            _ => {}
+        };
+
+        Ok((Some(handle), opts))
+    }
+
+    fn do_release(&self, inode: Inode, handle: Handle) -> io::Result<()> {
+        let mut handles = self.handles.write().unwrap();
+
+        if let btree_map::Entry::Occupied(e) = handles.entry(handle) {
+            if e.get().inode == inode {
+                // We don't need to close the file here because that will happen automatically when
+                // the last `Arc` is dropped.
+                e.remove();
+                return Ok(());
+            }
+        }
+
+        Err(ebadf())
+    }
+
+    fn do_getattr(&self, inode: Inode) -> io::Result<(libc::stat64, Duration)> {
+        let data = self
+            .inodes
+            .read()
+            .unwrap()
+            .get(&inode)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+
+        let st = stat(&data.file)?;
+
+        Ok((st, self.timeout.clone()))
+    }
+
+    fn do_unlink(&self, parent: Inode, name: &CStr, flags: libc::c_int) -> io::Result<()> {
+        let data = self
+            .inodes
+            .read()
+            .unwrap()
+            .get(&parent)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+
+        // Safe because this doesn't modify any memory and we check the return value.
+        let res = unsafe { libc::unlinkat(data.file.as_raw_fd(), name.as_ptr(), flags) };
+        if res == 0 {
+            Ok(())
+        } else {
+            Err(io::Error::last_os_error())
+        }
+    }
+}
+
+fn forget_one(
+    inodes: &mut MultikeyBTreeMap<Inode, InodeAltKey, Arc<InodeData>>,
+    inode: Inode,
+    count: u64,
+) {
+    if let Some(data) = inodes.get(&inode) {
+        // Acquiring the write lock on the inode map prevents new lookups from incrementing the
+        // refcount but there is the possibility that a previous lookup already acquired a
+        // reference to the inode data and is in the process of updating the refcount so we need
+        // to loop here until we can decrement successfully.
+        loop {
+            let refcount = data.refcount.load(Ordering::Relaxed);
+
+            // Saturating sub because it doesn't make sense for a refcount to go below zero and
+            // we don't want misbehaving clients to cause integer overflow.
+            let new_count = refcount.saturating_sub(count);
+
+            // Synchronizes with the acquire load in `do_lookup`.
+            if data
+                .refcount
+                .compare_and_swap(refcount, new_count, Ordering::Release)
+                == refcount
+            {
+                if new_count == 0 {
+                    // We just removed the last refcount for this inode. There's no need for an
+                    // acquire fence here because we hold a write lock on the inode map and any
+                    // thread that is waiting to do a forget on the same inode will have to wait
+                    // until we release the lock. So there's is no other release store for us to
+                    // synchronize with before deleting the entry.
+                    inodes.remove(&inode);
+                }
+                break;
+            }
+        }
+    }
+}
+
+impl FileSystem for PassthroughFs {
+    type Inode = Inode;
+    type Handle = Handle;
+
+    fn init(&self, capable: FsOptions) -> io::Result<FsOptions> {
+        // Safe because this is a constant value and a valid C string.
+        let root = unsafe { CStr::from_bytes_with_nul_unchecked(ROOT_CSTR) };
+
+        // Safe because this doesn't modify any memory and we check the return value.
+        // We use `O_PATH` because we just want this for traversing the directory tree
+        // and not for actually reading the contents.
+        let fd = unsafe {
+            libc::openat(
+                libc::AT_FDCWD,
+                root.as_ptr(),
+                libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC,
+            )
+        };
+        if fd < 0 {
+            return Err(io::Error::last_os_error());
+        }
+
+        // Safe because we just opened this fd above.
+        let f = unsafe { File::from_raw_fd(fd) };
+
+        let st = stat(&f)?;
+
+        // Safe because this doesn't modify any memory and there is no need to check the return
+        // value because this system call always succeeds. We need to clear the umask here because
+        // we want the client to be able to set all the bits in the mode.
+        unsafe { libc::umask(0o000) };
+
+        let mut inodes = self.inodes.write().unwrap();
+
+        // Not sure why the root inode gets a refcount of 2 but that's what libfuse does.
+        inodes.insert(
+            fuse::ROOT_ID,
+            InodeAltKey {
+                ino: st.st_ino,
+                dev: st.st_dev,
+            },
+            Arc::new(InodeData {
+                inode: fuse::ROOT_ID,
+                file: f,
+                refcount: AtomicU64::new(2),
+            }),
+        );
+
+        let mut opts = FsOptions::DO_READDIRPLUS | FsOptions::READDIRPLUS_AUTO;
+        if capable.contains(FsOptions::WRITEBACK_CACHE) {
+            opts |= FsOptions::WRITEBACK_CACHE;
+            self.writeback.store(true, Ordering::Relaxed);
+        }
+        Ok(opts)
+    }
+
+    fn destroy(&self) {
+        self.handles.write().unwrap().clear();
+        self.inodes.write().unwrap().clear();
+    }
+
+    fn statfs(&self, _ctx: Context, inode: Inode) -> io::Result<libc::statvfs64> {
+        let data = self
+            .inodes
+            .read()
+            .unwrap()
+            .get(&inode)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+
+        let mut out = MaybeUninit::<libc::statvfs64>::zeroed();
+
+        // Safe because this will only modify `out` and we check the return value.
+        let res = unsafe { libc::fstatvfs64(data.file.as_raw_fd(), out.as_mut_ptr()) };
+        if res == 0 {
+            // Safe because the kernel guarantees that `out` has been initialized.
+            Ok(unsafe { out.assume_init() })
+        } else {
+            Err(io::Error::last_os_error())
+        }
+    }
+
+    fn lookup(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<Entry> {
+        self.do_lookup(parent, name)
+    }
+
+    fn forget(&self, _ctx: Context, inode: Inode, count: u64) {
+        let mut inodes = self.inodes.write().unwrap();
+
+        forget_one(&mut inodes, inode, count)
+    }
+
+    fn batch_forget(&self, _ctx: Context, requests: Vec<(Inode, u64)>) {
+        let mut inodes = self.inodes.write().unwrap();
+
+        for (inode, count) in requests {
+            forget_one(&mut inodes, inode, count)
+        }
+    }
+
+    fn opendir(
+        &self,
+        _ctx: Context,
+        inode: Inode,
+        flags: u32,
+    ) -> io::Result<(Option<Handle>, OpenOptions)> {
+        self.do_open(inode, flags | (libc::O_DIRECTORY as u32))
+    }
+
+    fn releasedir(
+        &self,
+        _ctx: Context,
+        inode: Inode,
+        _flags: u32,
+        handle: Handle,
+    ) -> io::Result<()> {
+        self.do_release(inode, handle)
+    }
+
+    fn mkdir(
+        &self,
+        ctx: Context,
+        parent: Inode,
+        name: &CStr,
+        mode: u32,
+        umask: u32,
+    ) -> io::Result<Entry> {
+        let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?;
+        let data = self
+            .inodes
+            .read()
+            .unwrap()
+            .get(&parent)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+
+        // Safe because this doesn't modify any memory and we check the return value.
+        let res = unsafe { libc::mkdirat(data.file.as_raw_fd(), name.as_ptr(), mode & !umask) };
+        if res == 0 {
+            self.do_lookup(parent, name)
+        } else {
+            Err(io::Error::last_os_error())
+        }
+    }
+
+    fn rmdir(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<()> {
+        self.do_unlink(parent, name, libc::AT_REMOVEDIR)
+    }
+
+    fn readdir<F>(
+        &self,
+        _ctx: Context,
+        inode: Inode,
+        handle: Handle,
+        size: u32,
+        offset: u64,
+        add_entry: F,
+    ) -> io::Result<()>
+    where
+        F: FnMut(DirEntry) -> io::Result<usize>,
+    {
+        self.do_readdir(inode, handle, size, offset, add_entry)
+    }
+
+    fn readdirplus<F>(
+        &self,
+        _ctx: Context,
+        inode: Inode,
+        handle: Handle,
+        size: u32,
+        offset: u64,
+        mut add_entry: F,
+    ) -> io::Result<()>
+    where
+        F: FnMut(DirEntry, Entry) -> io::Result<usize>,
+    {
+        self.do_readdir(inode, handle, size, offset, |dir_entry| {
+            // Safe because the kernel guarantees that the buffer is nul-terminated. Additionally,
+            // the kernel will pad the name with '\0' bytes up to 8-byte alignment and there's no
+            // way for us to know exactly how many padding bytes there are. This would cause
+            // `CStr::from_bytes_with_nul` to return an error because it would think there are
+            // interior '\0' bytes. We trust the kernel to provide us with properly formatted data
+            // so we'll just skip the checks here.
+            let name = unsafe { CStr::from_bytes_with_nul_unchecked(dir_entry.name) };
+            let entry = self.do_lookup(inode, name)?;
+
+            add_entry(dir_entry, entry)
+        })
+    }
+
+    fn open(
+        &self,
+        _ctx: Context,
+        inode: Inode,
+        flags: u32,
+    ) -> io::Result<(Option<Handle>, OpenOptions)> {
+        self.do_open(inode, flags)
+    }
+
+    fn release(
+        &self,
+        _ctx: Context,
+        inode: Inode,
+        _flags: u32,
+        handle: Handle,
+        _flush: bool,
+        _flock_release: bool,
+        _lock_owner: Option<u64>,
+    ) -> io::Result<()> {
+        self.do_release(inode, handle)
+    }
+
+    fn create(
+        &self,
+        ctx: Context,
+        parent: Inode,
+        name: &CStr,
+        mode: u32,
+        flags: u32,
+        umask: u32,
+    ) -> io::Result<(Entry, Option<Handle>, OpenOptions)> {
+        let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?;
+        let data = self
+            .inodes
+            .read()
+            .unwrap()
+            .get(&parent)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+
+        // Safe because this doesn't modify any memory and we check the return value. We don't
+        // really check `flags` because if the kernel can't handle poorly specified flags then we
+        // have much bigger problems.
+        let fd = unsafe {
+            libc::openat(
+                data.file.as_raw_fd(),
+                name.as_ptr(),
+                flags as i32 | libc::O_CREAT | libc::O_CLOEXEC | libc::O_NOFOLLOW,
+                mode & !(umask & 0o777),
+            )
+        };
+        if fd < 0 {
+            return Err(io::Error::last_os_error());
+        }
+
+        // Safe because we just opened this fd.
+        let file = Mutex::new(unsafe { File::from_raw_fd(fd) });
+
+        let entry = self.do_lookup(parent, name)?;
+
+        let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
+        let data = HandleData {
+            inode: entry.inode,
+            file,
+        };
+
+        self.handles.write().unwrap().insert(handle, Arc::new(data));
+
+        let mut opts = OpenOptions::empty();
+        match self.cache_policy {
+            CachePolicy::Never => opts |= OpenOptions::DIRECT_IO,
+            CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE,
+            _ => {}
+        };
+
+        Ok((entry, Some(handle), opts))
+    }
+
+    fn unlink(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<()> {
+        self.do_unlink(parent, name, 0)
+    }
+
+    fn read<W: io::Write + ZeroCopyWriter>(
+        &self,
+        _ctx: Context,
+        inode: Inode,
+        handle: Handle,
+        mut w: W,
+        size: u32,
+        offset: u64,
+        _lock_owner: Option<u64>,
+        _flags: u32,
+    ) -> io::Result<usize> {
+        let data = self
+            .handles
+            .read()
+            .unwrap()
+            .get(&handle)
+            .filter(|hd| hd.inode == inode)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+
+        let mut f = data.file.lock();
+        w.write_from(&mut f, size as usize, offset)
+    }
+
+    fn write<R: io::Read + ZeroCopyReader>(
+        &self,
+        ctx: Context,
+        inode: Inode,
+        handle: Handle,
+        mut r: R,
+        size: u32,
+        offset: u64,
+        _lock_owner: Option<u64>,
+        _delayed_write: bool,
+        _flags: u32,
+    ) -> io::Result<usize> {
+        // We need to change credentials during a write so that the kernel will remove setuid or
+        // setgid bits from the file if it was written to by someone other than the owner.
+        let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?;
+        let data = self
+            .handles
+            .read()
+            .unwrap()
+            .get(&handle)
+            .filter(|hd| hd.inode == inode)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+
+        let mut f = data.file.lock();
+        r.read_to(&mut f, size as usize, offset)
+    }
+
+    fn getattr(
+        &self,
+        _ctx: Context,
+        inode: Inode,
+        _handle: Option<Handle>,
+    ) -> io::Result<(libc::stat64, Duration)> {
+        self.do_getattr(inode)
+    }
+
+    fn setattr(
+        &self,
+        _ctx: Context,
+        inode: Inode,
+        attr: libc::stat64,
+        handle: Option<Handle>,
+        valid: SetattrValid,
+    ) -> io::Result<(libc::stat64, Duration)> {
+        let inode_data = self
+            .inodes
+            .read()
+            .unwrap()
+            .get(&inode)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+
+        enum Data {
+            Handle(Arc<HandleData>, RawFd),
+            ProcPath(CString),
+        }
+
+        // If we have a handle then use it otherwise get a new fd from the inode.
+        let data = if let Some(handle) = handle {
+            let hd = self
+                .handles
+                .read()
+                .unwrap()
+                .get(&handle)
+                .filter(|hd| hd.inode == inode)
+                .map(Arc::clone)
+                .ok_or_else(ebadf)?;
+
+            let fd = hd.file.lock().as_raw_fd();
+            Data::Handle(hd, fd)
+        } else {
+            let pathname = CString::new(format!("self/fd/{}", inode_data.file.as_raw_fd()))
+                .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
+            Data::ProcPath(pathname)
+        };
+
+        if valid.contains(SetattrValid::MODE) {
+            // Safe because this doesn't modify any memory and we check the return value.
+            let res = unsafe {
+                match data {
+                    Data::Handle(_, fd) => libc::fchmod(fd, attr.st_mode),
+                    Data::ProcPath(ref p) => {
+                        libc::fchmodat(self.proc.as_raw_fd(), p.as_ptr(), attr.st_mode, 0)
+                    }
+                }
+            };
+            if res < 0 {
+                return Err(io::Error::last_os_error());
+            }
+        }
+
+        if valid.intersects(SetattrValid::UID | SetattrValid::GID) {
+            let uid = if valid.contains(SetattrValid::UID) {
+                attr.st_uid
+            } else {
+                // Cannot use -1 here because these are unsigned values.
+                ::std::u32::MAX
+            };
+            let gid = if valid.contains(SetattrValid::GID) {
+                attr.st_gid
+            } else {
+                // Cannot use -1 here because these are unsigned values.
+                ::std::u32::MAX
+            };
+
+            // Safe because this is a constant value and a valid C string.
+            let empty = unsafe { CStr::from_bytes_with_nul_unchecked(EMPTY_CSTR) };
+
+            // Safe because this doesn't modify any memory and we check the return value.
+            let res = unsafe {
+                libc::fchownat(
+                    inode_data.file.as_raw_fd(),
+                    empty.as_ptr(),
+                    uid,
+                    gid,
+                    libc::AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW,
+                )
+            };
+            if res < 0 {
+                return Err(io::Error::last_os_error());
+            }
+        }
+
+        if valid.contains(SetattrValid::SIZE) {
+            // Safe because this doesn't modify any memory and we check the return value.
+            let res = match data {
+                Data::Handle(_, fd) => unsafe { libc::ftruncate(fd, attr.st_size) },
+                _ => {
+                    // There is no `ftruncateat` so we need to get a new fd and truncate it.
+                    let f = self.open_inode(inode, libc::O_NONBLOCK | libc::O_RDWR)?;
+                    unsafe { libc::ftruncate(f.as_raw_fd(), attr.st_size) }
+                }
+            };
+            if res < 0 {
+                return Err(io::Error::last_os_error());
+            }
+        }
+
+        if valid.intersects(SetattrValid::ATIME | SetattrValid::MTIME) {
+            let mut tvs = [
+                libc::timespec {
+                    tv_sec: 0,
+                    tv_nsec: libc::UTIME_OMIT,
+                },
+                libc::timespec {
+                    tv_sec: 0,
+                    tv_nsec: libc::UTIME_OMIT,
+                },
+            ];
+
+            if valid.contains(SetattrValid::ATIME_NOW) {
+                tvs[0].tv_nsec = libc::UTIME_NOW;
+            } else if valid.contains(SetattrValid::ATIME) {
+                tvs[0].tv_sec = attr.st_atime;
+                tvs[0].tv_nsec = attr.st_atime_nsec;
+            }
+
+            if valid.contains(SetattrValid::MTIME_NOW) {
+                tvs[1].tv_nsec = libc::UTIME_NOW;
+            } else if valid.contains(SetattrValid::MTIME) {
+                tvs[1].tv_sec = attr.st_mtime;
+                tvs[1].tv_nsec = attr.st_mtime_nsec;
+            }
+
+            // Safe because this doesn't modify any memory and we check the return value.
+            let res = match data {
+                Data::Handle(_, fd) => unsafe { libc::futimens(fd, tvs.as_ptr()) },
+                Data::ProcPath(ref p) => unsafe {
+                    libc::utimensat(self.proc.as_raw_fd(), p.as_ptr(), tvs.as_ptr(), 0)
+                },
+            };
+            if res < 0 {
+                return Err(io::Error::last_os_error());
+            }
+        }
+
+        self.do_getattr(inode)
+    }
+
+    fn rename(
+        &self,
+        _ctx: Context,
+        olddir: Inode,
+        oldname: &CStr,
+        newdir: Inode,
+        newname: &CStr,
+        flags: u32,
+    ) -> io::Result<()> {
+        let old_inode = self
+            .inodes
+            .read()
+            .unwrap()
+            .get(&olddir)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+        let new_inode = self
+            .inodes
+            .read()
+            .unwrap()
+            .get(&newdir)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+
+        // Safe because this doesn't modify any memory and we check the return value.
+        // TODO: Switch to libc::renameat2 once https://github.com/rust-lang/libc/pull/1508 lands
+        // and we have glibc 2.28.
+        let res = unsafe {
+            libc::syscall(
+                libc::SYS_renameat2,
+                old_inode.file.as_raw_fd(),
+                oldname.as_ptr(),
+                new_inode.file.as_raw_fd(),
+                newname.as_ptr(),
+                flags,
+            )
+        };
+        if res == 0 {
+            Ok(())
+        } else {
+            Err(io::Error::last_os_error())
+        }
+    }
+
+    fn mknod(
+        &self,
+        ctx: Context,
+        parent: Inode,
+        name: &CStr,
+        mode: u32,
+        rdev: u32,
+        umask: u32,
+    ) -> io::Result<Entry> {
+        let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?;
+        let data = self
+            .inodes
+            .read()
+            .unwrap()
+            .get(&parent)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+
+        // Safe because this doesn't modify any memory and we check the return value.
+        let res = unsafe {
+            libc::mknodat(
+                data.file.as_raw_fd(),
+                name.as_ptr(),
+                (mode & !umask) as libc::mode_t,
+                rdev as libc::dev_t,
+            )
+        };
+
+        if res < 0 {
+            Err(io::Error::last_os_error())
+        } else {
+            self.do_lookup(parent, name)
+        }
+    }
+
+    fn link(
+        &self,
+        _ctx: Context,
+        inode: Inode,
+        newparent: Inode,
+        newname: &CStr,
+    ) -> io::Result<Entry> {
+        let data = self
+            .inodes
+            .read()
+            .unwrap()
+            .get(&inode)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+        let new_inode = self
+            .inodes
+            .read()
+            .unwrap()
+            .get(&newparent)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+
+        // Safe because this is a constant value and a valid C string.
+        let empty = unsafe { CStr::from_bytes_with_nul_unchecked(EMPTY_CSTR) };
+
+        // Safe because this doesn't modify any memory and we check the return value.
+        let res = unsafe {
+            libc::linkat(
+                data.file.as_raw_fd(),
+                empty.as_ptr(),
+                new_inode.file.as_raw_fd(),
+                newname.as_ptr(),
+                libc::AT_EMPTY_PATH,
+            )
+        };
+        if res == 0 {
+            self.do_lookup(newparent, newname)
+        } else {
+            Err(io::Error::last_os_error())
+        }
+    }
+
+    fn symlink(
+        &self,
+        ctx: Context,
+        linkname: &CStr,
+        parent: Inode,
+        name: &CStr,
+    ) -> io::Result<Entry> {
+        let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?;
+        let data = self
+            .inodes
+            .read()
+            .unwrap()
+            .get(&parent)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+
+        // Safe because this doesn't modify any memory and we check the return value.
+        let res =
+            unsafe { libc::symlinkat(linkname.as_ptr(), data.file.as_raw_fd(), name.as_ptr()) };
+        if res == 0 {
+            self.do_lookup(parent, name)
+        } else {
+            Err(io::Error::last_os_error())
+        }
+    }
+
+    fn readlink(&self, _ctx: Context, inode: Inode) -> io::Result<Vec<u8>> {
+        let data = self
+            .inodes
+            .read()
+            .unwrap()
+            .get(&inode)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+
+        let mut buf = Vec::with_capacity(libc::PATH_MAX as usize);
+        buf.resize(libc::PATH_MAX as usize, 0);
+
+        // Safe because this is a constant value and a valid C string.
+        let empty = unsafe { CStr::from_bytes_with_nul_unchecked(EMPTY_CSTR) };
+
+        // Safe because this will only modify the contents of `buf` and we check the return value.
+        let res = unsafe {
+            libc::readlinkat(
+                data.file.as_raw_fd(),
+                empty.as_ptr(),
+                buf.as_mut_ptr() as *mut libc::c_char,
+                buf.len(),
+            )
+        };
+        if res < 0 {
+            return Err(io::Error::last_os_error());
+        }
+
+        buf.resize(res as usize, 0);
+        Ok(buf)
+    }
+
+    fn flush(
+        &self,
+        _ctx: Context,
+        inode: Inode,
+        handle: Handle,
+        _lock_owner: u64,
+    ) -> io::Result<()> {
+        let data = self
+            .handles
+            .read()
+            .unwrap()
+            .get(&handle)
+            .filter(|hd| hd.inode == inode)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+
+        // Since this method is called whenever an fd is closed in the client, we can emulate that
+        // behavior by doing the same thing (dup-ing the fd and then immediately closing it). Safe
+        // because this doesn't modify any memory and we check the return values.
+        unsafe {
+            let newfd = libc::dup(data.file.lock().as_raw_fd());
+            if newfd < 0 {
+                return Err(io::Error::last_os_error());
+            }
+
+            if libc::close(newfd) < 0 {
+                Err(io::Error::last_os_error())
+            } else {
+                Ok(())
+            }
+        }
+    }
+
+    fn fsync(&self, _ctx: Context, inode: Inode, datasync: bool, handle: Handle) -> io::Result<()> {
+        let data = self
+            .handles
+            .read()
+            .unwrap()
+            .get(&handle)
+            .filter(|hd| hd.inode == inode)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+
+        let fd = data.file.lock().as_raw_fd();
+
+        // Safe because this doesn't modify any memory and we check the return value.
+        let res = unsafe {
+            if datasync {
+                libc::fdatasync(fd)
+            } else {
+                libc::fsync(fd)
+            }
+        };
+
+        if res == 0 {
+            Ok(())
+        } else {
+            Err(io::Error::last_os_error())
+        }
+    }
+
+    fn fsyncdir(
+        &self,
+        ctx: Context,
+        inode: Inode,
+        datasync: bool,
+        handle: Handle,
+    ) -> io::Result<()> {
+        self.fsync(ctx, inode, datasync, handle)
+    }
+
+    fn access(&self, ctx: Context, inode: Inode, mask: u32) -> io::Result<()> {
+        let data = self
+            .inodes
+            .read()
+            .unwrap()
+            .get(&inode)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+
+        let st = stat(&data.file)?;
+        let mode = mask as i32 & (libc::R_OK | libc::W_OK | libc::X_OK);
+
+        if mode == libc::F_OK {
+            // The file exists since we were able to call `stat(2)` on it.
+            return Ok(());
+        }
+
+        if (mode & libc::R_OK) != 0 {
+            if ctx.uid != 0
+                && (st.st_uid != ctx.uid || st.st_mode & 0o400 == 0)
+                && (st.st_gid != ctx.gid || st.st_mode & 0o040 == 0)
+                && st.st_mode & 0o004 == 0
+            {
+                return Err(io::Error::from_raw_os_error(libc::EACCES));
+            }
+        }
+
+        if (mode & libc::W_OK) != 0 {
+            if ctx.uid != 0
+                && (st.st_uid != ctx.uid || st.st_mode & 0o200 == 0)
+                && (st.st_gid != ctx.gid || st.st_mode & 0o020 == 0)
+                && st.st_mode & 0o002 == 0
+            {
+                return Err(io::Error::from_raw_os_error(libc::EACCES));
+            }
+        }
+
+        // root can only execute something if it is executable by one of the owner, the group, or
+        // everyone.
+        if (mode & libc::X_OK) != 0 {
+            if (ctx.uid != 0 || st.st_mode & 0o111 == 0)
+                && (st.st_uid != ctx.uid || st.st_mode & 0o100 == 0)
+                && (st.st_gid != ctx.gid || st.st_mode & 0o010 == 0)
+                && st.st_mode & 0o001 == 0
+            {
+                return Err(io::Error::from_raw_os_error(libc::EACCES));
+            }
+        }
+
+        Ok(())
+    }
+
+    fn setxattr(
+        &self,
+        _ctx: Context,
+        inode: Inode,
+        name: &CStr,
+        value: &[u8],
+        flags: u32,
+    ) -> io::Result<()> {
+        // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we
+        // need to get a new fd.
+        let file = self.open_inode(inode, libc::O_RDONLY | libc::O_NONBLOCK)?;
+
+        // Safe because this doesn't modify any memory and we check the return value.
+        let res = unsafe {
+            libc::fsetxattr(
+                file.as_raw_fd(),
+                name.as_ptr(),
+                value.as_ptr() as *const libc::c_void,
+                value.len(),
+                flags as libc::c_int,
+            )
+        };
+        if res == 0 {
+            Ok(())
+        } else {
+            Err(io::Error::last_os_error())
+        }
+    }
+
+    fn getxattr(
+        &self,
+        _ctx: Context,
+        inode: Inode,
+        name: &CStr,
+        size: u32,
+    ) -> io::Result<GetxattrReply> {
+        // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we
+        // need to get a new fd.
+        let file = self.open_inode(inode, libc::O_RDONLY | libc::O_NONBLOCK)?;
+
+        let mut buf = Vec::with_capacity(size as usize);
+        buf.resize(size as usize, 0);
+
+        // Safe because this will only modify the contents of `buf`.
+        let res = unsafe {
+            libc::fgetxattr(
+                file.as_raw_fd(),
+                name.as_ptr(),
+                buf.as_mut_ptr() as *mut libc::c_void,
+                size as libc::size_t,
+            )
+        };
+        if res < 0 {
+            return Err(io::Error::last_os_error());
+        }
+
+        if size == 0 {
+            Ok(GetxattrReply::Count(res as u32))
+        } else {
+            buf.resize(res as usize, 0);
+            Ok(GetxattrReply::Value(buf))
+        }
+    }
+
+    fn listxattr(&self, _ctx: Context, inode: Inode, size: u32) -> io::Result<ListxattrReply> {
+        // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we
+        // need to get a new fd.
+        let file = self.open_inode(inode, libc::O_RDONLY | libc::O_NONBLOCK)?;
+
+        let mut buf = Vec::with_capacity(size as usize);
+        buf.resize(size as usize, 0);
+
+        // Safe because this will only modify the contents of `buf`.
+        let res = unsafe {
+            libc::flistxattr(
+                file.as_raw_fd(),
+                buf.as_mut_ptr() as *mut libc::c_char,
+                size as libc::size_t,
+            )
+        };
+        if res < 0 {
+            return Err(io::Error::last_os_error());
+        }
+
+        if size == 0 {
+            Ok(ListxattrReply::Count(res as u32))
+        } else {
+            buf.resize(res as usize, 0);
+            Ok(ListxattrReply::Names(buf))
+        }
+    }
+
+    fn removexattr(&self, _ctx: Context, inode: Inode, name: &CStr) -> io::Result<()> {
+        // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we
+        // need to get a new fd.
+        let file = self.open_inode(inode, libc::O_RDONLY | libc::O_NONBLOCK)?;
+
+        // Safe because this doesn't modify any memory and we check the return value.
+        let res = unsafe { libc::fremovexattr(file.as_raw_fd(), name.as_ptr()) };
+
+        if res == 0 {
+            Ok(())
+        } else {
+            Err(io::Error::last_os_error())
+        }
+    }
+
+    fn fallocate(
+        &self,
+        _ctx: Context,
+        inode: Inode,
+        handle: Handle,
+        mode: u32,
+        offset: u64,
+        length: u64,
+    ) -> io::Result<()> {
+        let data = self
+            .handles
+            .read()
+            .unwrap()
+            .get(&handle)
+            .filter(|hd| hd.inode == inode)
+            .map(Arc::clone)
+            .ok_or_else(ebadf)?;
+
+        let fd = data.file.lock().as_raw_fd();
+        // Safe because this doesn't modify any memory and we check the return value.
+        let res = unsafe {
+            libc::fallocate64(
+                fd,
+                mode as libc::c_int,
+                offset as libc::off64_t,
+                length as libc::off64_t,
+            )
+        };
+        if res == 0 {
+            Ok(())
+        } else {
+            Err(io::Error::last_os_error())
+        }
+    }
+}