From ee215426f4e61474aea546b5c0b6c1f1040d5344 Mon Sep 17 00:00:00 2001 From: Daniel Verkamp Date: Mon, 1 Jun 2020 10:58:53 -0700 Subject: docker: update ADHD checkout for BoxError Fixes missing BoxError references and clippy errors. BUG=None TEST=docker/build_crosvm_base.sh && docker/wrapped_smoke_test.sh Change-Id: Icba02a1e1284cce6b40555fad86aecaf7956aa30 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2224775 Tested-by: Daniel Verkamp Reviewed-by: Dylan Reid Commit-Queue: Daniel Verkamp --- docker/checkout_commits.env | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/checkout_commits.env b/docker/checkout_commits.env index 67ec77b..1412aab 100644 --- a/docker/checkout_commits.env +++ b/docker/checkout_commits.env @@ -2,5 +2,5 @@ MESON_COMMIT=a1a8772034aef90e8d58230d8bcfce54ab27bf6a LIBEPOXY_COMMIT=af38a466caf9c2ae49b8acda4ff842ae44d57f78 TPM2_COMMIT=a9bc45bb7fafc65ea8a787894434d409f533b1f1 PLATFORM2_COMMIT=fabad43f1182bf71b3771735b4488180d08f3d59 -ADHD_COMMIT=e64841b1bba00f6e2c113b002f6d1e98bc904b0d +ADHD_COMMIT=ac71b07ecec4c9e31d747f0a9b33252cb402ad6e DRM_COMMIT=00320d7d68ddc7d815d073bb7c92d9a1f9bb8c31 -- cgit 1.4.1 From 1d6967437edc98716e545b82a28c788febfbe79a Mon Sep 17 00:00:00 2001 From: Steven Richman Date: Wed, 13 May 2020 17:22:33 -0700 Subject: hypervisor: add get/set_pvclock The clock functions on the Vm trait are for any arch, to support hypervisors that might have ARM pv clocks. The KVM implementation (x86 only) is mostly the same as before, but uses a hypervisor-agnostic ClockState struct instead of the KVM struct. BUG=chromium:1077058 TEST=cargo test -p hypervisor Change-Id: I0e77ae997d6a30851d28aeb5f73c9ef8ebc464a1 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2202742 Reviewed-by: Daniel Verkamp Tested-by: kokoro Commit-Queue: Steven Richman --- hypervisor/src/kvm/aarch64.rs | 18 ++++++- hypervisor/src/kvm/mod.rs | 12 ++++- hypervisor/src/kvm/x86_64.rs | 116 +++++++++++++++++++++++++++++++----------- hypervisor/src/lib.rs | 18 +++++++ 4 files changed, 132 insertions(+), 32 deletions(-) diff --git a/hypervisor/src/kvm/aarch64.rs b/hypervisor/src/kvm/aarch64.rs index 4f0398f..4e5b65c 100644 --- a/hypervisor/src/kvm/aarch64.rs +++ b/hypervisor/src/kvm/aarch64.rs @@ -2,10 +2,24 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -use sys_util::Result; +use libc::ENXIO; + +use sys_util::{Error, Result}; use super::{KvmVcpu, KvmVm}; -use crate::{VcpuAArch64, VmAArch64}; +use crate::{ClockState, VcpuAArch64, VmAArch64}; + +impl KvmVm { + /// Arch-specific implementation of `Vm::get_pvclock`. Always returns an error on AArch64. + pub fn get_pvclock_arch(&self) -> Result { + Err(Error::new(ENXIO)) + } + + /// Arch-specific implementation of `Vm::set_pvclock`. Always returns an error on AArch64. + pub fn set_pvclock_arch(&self, _state: &ClockState) -> Result<()> { + Err(Error::new(ENXIO)) + } +} impl VmAArch64 for KvmVm { type Vcpu = KvmVcpu; diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index 0550792..5fdf124 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -24,7 +24,9 @@ use sys_util::{ GuestMemory, RawDescriptor, Result, SafeDescriptor, }; -use crate::{Hypervisor, HypervisorCap, MappedRegion, RunnableVcpu, Vcpu, VcpuExit, Vm}; +use crate::{ + ClockState, Hypervisor, HypervisorCap, MappedRegion, RunnableVcpu, Vcpu, VcpuExit, Vm, +}; // Wrapper around KVM_SET_USER_MEMORY_REGION ioctl, which creates, modifies, or deletes a mapping // from guest physical to host user pages. @@ -184,6 +186,14 @@ impl Vm for KvmVm { fn get_memory(&self) -> &GuestMemory { &self.guest_mem } + + fn get_pvclock(&self) -> Result { + self.get_pvclock_arch() + } + + fn set_pvclock(&self, state: &ClockState) -> Result<()> { + self.set_pvclock_arch(state) + } } impl AsRawDescriptor for KvmVm { diff --git a/hypervisor/src/kvm/x86_64.rs b/hypervisor/src/kvm/x86_64.rs index 06774f4..f3fbc59 100644 --- a/hypervisor/src/kvm/x86_64.rs +++ b/hypervisor/src/kvm/x86_64.rs @@ -4,14 +4,17 @@ use std::convert::TryInto; -use kvm_sys::*; use libc::E2BIG; -use sys_util::{ioctl_with_mut_ptr, Error, Result}; + +use kvm_sys::*; +use sys_util::{ + errno_result, ioctl_with_mut_ptr, ioctl_with_mut_ref, ioctl_with_ref, Error, Result, +}; use super::{Kvm, KvmVcpu, KvmVm}; use crate::{ - CpuId, CpuIdEntry, HypervisorX86_64, IoapicRedirectionTableEntry, IoapicState, LapicState, - PicState, PitChannelState, PitState, Regs, VcpuX86_64, VmX86_64, + ClockState, CpuId, CpuIdEntry, HypervisorX86_64, IoapicRedirectionTableEntry, IoapicState, + LapicState, PicState, PitChannelState, PitState, Regs, VcpuX86_64, VmX86_64, }; type KvmCpuId = kvm::CpuId; @@ -54,6 +57,58 @@ impl Kvm { } } +impl HypervisorX86_64 for Kvm { + fn get_supported_cpuid(&self) -> Result { + self.get_cpuid(KVM_GET_SUPPORTED_CPUID()) + } + + fn get_emulated_cpuid(&self) -> Result { + self.get_cpuid(KVM_GET_EMULATED_CPUID()) + } +} + +impl KvmVm { + /// Arch-specific implementation of `Vm::get_pvclock`. + pub fn get_pvclock_arch(&self) -> Result { + // Safe because we know that our file is a VM fd, we know the kernel will only write correct + // amount of memory to our pointer, and we verify the return result. + let mut clock_data: kvm_clock_data = unsafe { std::mem::zeroed() }; + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_CLOCK(), &mut clock_data) }; + if ret == 0 { + Ok(ClockState::from(clock_data)) + } else { + errno_result() + } + } + + /// Arch-specific implementation of `Vm::set_pvclock`. + pub fn set_pvclock_arch(&self, state: &ClockState) -> Result<()> { + let clock_data = kvm_clock_data::from(*state); + // Safe because we know that our file is a VM fd, we know the kernel will only read correct + // amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_CLOCK(), &clock_data) }; + if ret == 0 { + Ok(()) + } else { + errno_result() + } + } +} + +impl VmX86_64 for KvmVm { + type Vcpu = KvmVcpu; + + fn create_vcpu(&self, id: usize) -> Result { + self.create_kvm_vcpu(id) + } +} + +impl VcpuX86_64 for KvmVcpu { + fn get_regs(&self) -> Result { + Ok(Regs {}) + } +} + impl<'a> From<&'a KvmCpuId> for CpuId { fn from(kvm_cpuid: &'a KvmCpuId) -> CpuId { let kvm_entries = kvm_cpuid.entries_slice(); @@ -74,27 +129,22 @@ impl<'a> From<&'a KvmCpuId> for CpuId { } } -impl HypervisorX86_64 for Kvm { - fn get_supported_cpuid(&self) -> Result { - self.get_cpuid(KVM_GET_SUPPORTED_CPUID()) - } - - fn get_emulated_cpuid(&self) -> Result { - self.get_cpuid(KVM_GET_EMULATED_CPUID()) - } -} - -impl VmX86_64 for KvmVm { - type Vcpu = KvmVcpu; - - fn create_vcpu(&self, id: usize) -> Result { - self.create_kvm_vcpu(id) +impl From for kvm_clock_data { + fn from(state: ClockState) -> Self { + kvm_clock_data { + clock: state.clock, + flags: state.flags, + ..Default::default() + } } } -impl VcpuX86_64 for KvmVcpu { - fn get_regs(&self) -> Result { - Ok(Regs {}) +impl From for ClockState { + fn from(clock_data: kvm_clock_data) -> Self { + ClockState { + clock: clock_data.clock, + flags: clock_data.flags, + } } } @@ -305,15 +355,13 @@ impl From<&kvm_pit_channel_state> for PitChannelState { #[cfg(test)] mod tests { + use super::*; use crate::{ - DeliveryMode, DeliveryStatus, DestinationMode, IoapicRedirectionTableEntry, IoapicState, - LapicState, PicInitState, PicState, PitChannelState, PitRWMode, PitRWState, PitState, - TriggerMode, + DeliveryMode, DeliveryStatus, DestinationMode, HypervisorX86_64, + IoapicRedirectionTableEntry, IoapicState, LapicState, PicInitState, PicState, + PitChannelState, PitRWMode, PitRWState, PitState, TriggerMode, Vm, }; - use kvm_sys::*; - - use super::Kvm; - use crate::HypervisorX86_64; + use sys_util::{GuestAddress, GuestMemory}; #[test] fn get_supported_cpuid() { @@ -501,4 +549,14 @@ mod tests { // convert back and compare assert_eq!(state, PitState::from(&kvm_state)); } + + #[test] + fn clock_handling() { + let kvm = Kvm::new().unwrap(); + let gm = GuestMemory::new(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vm = KvmVm::new(&kvm, gm).unwrap(); + let mut clock_data = vm.get_pvclock().unwrap(); + clock_data.clock += 1000; + vm.set_pvclock(&clock_data).unwrap(); + } } diff --git a/hypervisor/src/lib.rs b/hypervisor/src/lib.rs index 784af8c..ee0bcaf 100644 --- a/hypervisor/src/lib.rs +++ b/hypervisor/src/lib.rs @@ -33,6 +33,14 @@ pub trait Vm: Send + Sized { /// Gets the guest-mapped memory for the Vm. fn get_memory(&self) -> &GuestMemory; + + /// Retrieves the current timestamp of the paravirtual clock as seen by the current guest. + /// Only works on VMs that support `VmCap::PvClock`. + fn get_pvclock(&self) -> Result; + + /// Sets the current timestamp of the paravirtual clock as seen by the current guest. + /// Only works on VMs that support `VmCap::PvClock`. + fn set_pvclock(&self, state: &ClockState) -> Result<()>; } /// A wrapper around using a VCPU. @@ -83,4 +91,14 @@ pub enum VcpuExit { Unknown, } +/// A single route for an IRQ. pub struct IrqRoute {} + +/// The state of the paravirtual clock +#[derive(Debug, Default, Copy, Clone)] +pub struct ClockState { + /// Current pv clock timestamp, as seen by the guest + pub clock: u64, + /// Hypervisor-specific feature flags for the pv clock + pub flags: u32, +} -- cgit 1.4.1 From 5f1a64892b714885f6c7405084a390467c03201a Mon Sep 17 00:00:00 2001 From: Zach Reizner Date: Mon, 27 Apr 2020 12:52:08 -0700 Subject: msg_socket: implement MsgOnSocket for Vec and tuples These container types are similar to arrays except tuples have heterogeneous data types and Vec has a dynamic number of elements. BUG=None TEST=cargo test -p msg_socket Change-Id: I2cbbaeb7f13b7700294ac50751530510098ba16d Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2168588 Reviewed-by: Daniel Verkamp Reviewed-by: Dylan Reid Tested-by: kokoro Tested-by: Zach Reizner Commit-Queue: Zach Reizner --- msg_socket/src/msg_on_socket.rs | 123 +++++++------------- msg_socket/src/msg_on_socket/slice.rs | 184 ++++++++++++++++++++++++++++++ msg_socket/src/msg_on_socket/tuple.rs | 205 ++++++++++++++++++++++++++++++++++ 3 files changed, 431 insertions(+), 81 deletions(-) create mode 100644 msg_socket/src/msg_on_socket/slice.rs create mode 100644 msg_socket/src/msg_on_socket/tuple.rs diff --git a/msg_socket/src/msg_on_socket.rs b/msg_socket/src/msg_on_socket.rs index eeaf1bd..3e87102 100644 --- a/msg_socket/src/msg_on_socket.rs +++ b/msg_socket/src/msg_on_socket.rs @@ -2,17 +2,20 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +mod slice; +mod tuple; + use std::fmt::{self, Display}; use std::fs::File; use std::mem::{size_of, transmute_copy, MaybeUninit}; use std::net::{TcpListener, TcpStream, UdpSocket}; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::os::unix::net::{UnixDatagram, UnixListener, UnixStream}; -use std::ptr::drop_in_place; use std::result; use std::sync::Arc; use data_model::*; +use slice::{slice_read_helper, slice_write_helper}; use sync::Mutex; use sys_util::{Error as SysError, EventFd}; @@ -283,20 +286,6 @@ rawfd_impl!(UdpSocket); rawfd_impl!(UnixListener); rawfd_impl!(UnixDatagram); -// Converts a slice into an array of fixed size inferred from by the return value. Panics if the -// slice is too small, but will tolerate slices that are too large. -fn slice_to_array(s: &[T]) -> O -where - T: Copy, - O: Default + AsMut<[T]>, -{ - let mut o = O::default(); - let o_slice = o.as_mut(); - let len = o_slice.len(); - o_slice.copy_from_slice(&s[..len]); - o -} - // usize could be different sizes on different targets. We always use u64. impl MsgOnSocket for usize { fn msg_size(&self) -> usize { @@ -381,6 +370,35 @@ le_impl!(Le16, u16); le_impl!(Le32, u32); le_impl!(Le64, u64); +fn simple_read(buffer: &[u8], offset: &mut usize) -> MsgResult { + assert!(!T::uses_fd()); + // Safety for T::read_from_buffer depends on the given FDs being valid, but we pass no FDs. + let (v, _) = unsafe { T::read_from_buffer(&buffer[*offset..], &[])? }; + *offset += v.msg_size(); + Ok(v) +} + +fn simple_write(v: T, buffer: &mut [u8], offset: &mut usize) -> MsgResult<()> { + assert!(!T::uses_fd()); + v.write_to_buffer(&mut buffer[*offset..], &mut [])?; + *offset += v.msg_size(); + Ok(()) +} + +// Converts a slice into an array of fixed size inferred from by the return value. Panics if the +// slice is too small, but will tolerate slices that are too large. +fn slice_to_array(s: &[T]) -> O +where + T: Copy, + O: Default + AsMut<[T]>, +{ + let mut o = O::default(); + let o_slice = o.as_mut(); + let len = o_slice.len(); + o_slice.copy_from_slice(&s[..len]); + o +} + macro_rules! array_impls { ($N:expr, $t: ident $($ts:ident)*) => { @@ -414,46 +432,7 @@ macro_rules! array_impls { // themselves don't require initializing. let mut msgs: [MaybeUninit; $N] = MaybeUninit::uninit().assume_init(); - let mut offset = 0usize; - let mut fd_offset = 0usize; - - // In case of an error, we need to keep track of how many elements got initialized. - // In order to perform the necessary drops, the below loop is executed in a closure - // to capture errors without returning. - let mut last_index = 0; - let res = (|| { - for msg in &mut msgs[..] { - let element_size = match T::fixed_size() { - Some(s) => s, - None => { - let (element_size, _) = u64::read_from_buffer(&buffer[offset..], &[])?; - offset += element_size.msg_size(); - element_size as usize - } - }; - let (m, fd_size) = - T::read_from_buffer(&buffer[offset..], &fds[fd_offset..])?; - *msg = MaybeUninit::new(m); - offset += element_size; - fd_offset += fd_size; - last_index += 1; - } - Ok(()) - })(); - - // Because `MaybeUninit` will not automatically call drops, we have to drop the - // partially initialized array manually in the case of an error. - if let Err(e) = res { - for msg in &mut msgs[..last_index] { - // The call to `as_mut_ptr()` turns the `MaybeUninit` element of the array - // into a pointer, which can be used with `drop_in_place` to call the - // destructor without moving the element, which is impossible. This is safe - // because `last_index` prevents this loop from traversing into the - // uninitialized parts of the array. - drop_in_place(msg.as_mut_ptr()); - } - return Err(e) - } + let fd_count = slice_read_helper(buffer, fds, &mut msgs)?; // Also taken from the canonical example, we initialized every member of the array // in the first loop of this function, so it is safe to `transmute_copy` the array @@ -463,7 +442,7 @@ macro_rules! array_impls { // Because this function operates on generic data, the type is "dependently-sized" // and so the compiler will not check that the size of the input and output match. // See this issue for details: https://github.com/rust-lang/rust/issues/61956 - Ok((transmute_copy::<_, [T; $N]>(&msgs), fd_offset)) + Ok((transmute_copy::<_, [T; $N]>(&msgs), fd_count)) } fn write_to_buffer( @@ -471,25 +450,7 @@ macro_rules! array_impls { buffer: &mut [u8], fds: &mut [RawFd], ) -> MsgResult { - let mut offset = 0usize; - let mut fd_offset = 0usize; - for idx in 0..$N { - let element_size = match T::fixed_size() { - Some(s) => s, - None => { - let element_size = self[idx].msg_size() as u64; - element_size.write_to_buffer(&mut buffer[offset..], &mut [])?; - offset += element_size.msg_size(); - element_size as usize - } - }; - let fd_size = self[idx].write_to_buffer(&mut buffer[offset..], - &mut fds[fd_offset..])?; - offset += element_size; - fd_offset += fd_size; - } - - Ok(fd_offset) + slice_write_helper(self, buffer, fds) } } #[cfg(test)] @@ -504,7 +465,7 @@ macro_rules! array_impls { array.write_to_buffer(&mut buffer, &mut []).unwrap(); let read_array = unsafe { ArrayType::read_from_buffer(&buffer, &[]) }.unwrap().0; - assert_eq!(array, read_array); + assert!(array.iter().eq(read_array.iter())); } #[test] @@ -515,7 +476,7 @@ macro_rules! array_impls { array.write_to_buffer(&mut buffer, &mut []).unwrap(); let read_array = unsafe { ArrayType::read_from_buffer(&buffer, &[]) }.unwrap().0; - assert_eq!(array, read_array); + assert!(array.iter().eq(read_array.iter())); } } array_impls!(($N - 1), $($ts)*); @@ -524,9 +485,9 @@ macro_rules! array_impls { } array_impls! { - 32, tmp1 tmp2 tmp3 tmp4 tmp5 tmp6 tmp7 tmp8 tmp9 tmp10 tmp11 tmp12 tmp13 tmp14 tmp15 tmp16 + 64, tmp1 tmp2 tmp3 tmp4 tmp5 tmp6 tmp7 tmp8 tmp9 tmp10 tmp11 tmp12 tmp13 tmp14 tmp15 tmp16 tmp17 tmp18 tmp19 tmp20 tmp21 tmp22 tmp23 tmp24 tmp25 tmp26 tmp27 tmp28 tmp29 tmp30 tmp31 - tmp32 + tmp32 tmp33 tmp34 tmp35 tmp36 tmp37 tmp38 tmp39 tmp40 tmp41 tmp42 tmp43 tmp44 tmp45 tmp46 + tmp47 tmp48 tmp49 tmp50 tmp51 tmp52 tmp53 tmp54 tmp55 tmp56 tmp57 tmp58 tmp59 tmp60 tmp61 + tmp62 tmp63 tmp64 } - -// TODO(jkwang) Define MsgOnSocket for tuple? diff --git a/msg_socket/src/msg_on_socket/slice.rs b/msg_socket/src/msg_on_socket/slice.rs new file mode 100644 index 0000000..7b6ef28 --- /dev/null +++ b/msg_socket/src/msg_on_socket/slice.rs @@ -0,0 +1,184 @@ +// Copyright 2020 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +use std::mem::{size_of, ManuallyDrop, MaybeUninit}; +use std::os::unix::io::RawFd; +use std::ptr::drop_in_place; + +use crate::{MsgOnSocket, MsgResult}; + +use super::{simple_read, simple_write}; + +/// Helper used by the types that read a slice of homegenously typed data. +/// +/// # Safety +/// This function has the same safety requirements as `T::read_from_buffer`, with the additional +/// requirements that the `msgs` are only used on success of this function +pub unsafe fn slice_read_helper( + buffer: &[u8], + fds: &[RawFd], + msgs: &mut [MaybeUninit], +) -> MsgResult { + let mut offset = 0usize; + let mut fd_offset = 0usize; + + // In case of an error, we need to keep track of how many elements got initialized. + // In order to perform the necessary drops, the below loop is executed in a closure + // to capture errors without returning. + let mut last_index = 0; + let res = (|| { + for msg in &mut msgs[..] { + let element_size = match T::fixed_size() { + Some(s) => s, + None => simple_read::(buffer, &mut offset)? as usize, + }; + // Assuming the unsafe caller gave valid FDs, this call should be safe. + let (m, fd_size) = T::read_from_buffer(&buffer[offset..], &fds[fd_offset..])?; + *msg = MaybeUninit::new(m); + offset += element_size; + fd_offset += fd_size; + last_index += 1; + } + Ok(()) + })(); + + // Because `MaybeUninit` will not automatically call drops, we have to drop the + // partially initialized array manually in the case of an error. + if let Err(e) = res { + for msg in &mut msgs[..last_index] { + // The call to `as_mut_ptr()` turns the `MaybeUninit` element of the array + // into a pointer, which can be used with `drop_in_place` to call the + // destructor without moving the element, which is impossible. This is safe + // because `last_index` prevents this loop from traversing into the + // uninitialized parts of the array. + drop_in_place(msg.as_mut_ptr()); + } + return Err(e); + } + + Ok(fd_offset) +} + +/// Helper used by the types that write a slice of homegenously typed data. +pub fn slice_write_helper( + msgs: &[T], + buffer: &mut [u8], + fds: &mut [RawFd], +) -> MsgResult { + let mut offset = 0usize; + let mut fd_offset = 0usize; + for msg in msgs { + let element_size = match T::fixed_size() { + Some(s) => s, + None => { + let element_size = msg.msg_size(); + simple_write(element_size as u64, buffer, &mut offset)?; + element_size as usize + } + }; + let fd_size = msg.write_to_buffer(&mut buffer[offset..], &mut fds[fd_offset..])?; + offset += element_size; + fd_offset += fd_size; + } + + Ok(fd_offset) +} + +impl MsgOnSocket for Vec { + fn uses_fd() -> bool { + T::uses_fd() + } + + fn fixed_size() -> Option { + None + } + + fn msg_size(&self) -> usize { + let vec_size = match T::fixed_size() { + Some(s) => s * self.len(), + None => self.iter().map(|i| i.msg_size() + size_of::()).sum(), + }; + size_of::() + vec_size + } + + fn fd_count(&self) -> usize { + if T::uses_fd() { + self.iter().map(|i| i.fd_count()).sum() + } else { + 0 + } + } + + unsafe fn read_from_buffer(buffer: &[u8], fds: &[RawFd]) -> MsgResult<(Self, usize)> { + let mut offset = 0; + let len = simple_read::(buffer, &mut offset)? as usize; + let mut msgs: Vec> = Vec::with_capacity(len); + msgs.set_len(len); + let fd_count = slice_read_helper(&buffer[offset..], fds, &mut msgs)?; + let mut msgs = ManuallyDrop::new(msgs); + Ok(( + Vec::from_raw_parts(msgs.as_mut_ptr() as *mut T, msgs.len(), msgs.capacity()), + fd_count, + )) + } + + fn write_to_buffer(&self, buffer: &mut [u8], fds: &mut [RawFd]) -> MsgResult { + let mut offset = 0; + simple_write(self.len() as u64, buffer, &mut offset)?; + slice_write_helper(self, &mut buffer[offset..], fds) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn read_write_1_fixed() { + let vec = vec![1u32]; + let mut buffer = vec![0; vec.msg_size()]; + vec.write_to_buffer(&mut buffer, &mut []).unwrap(); + let read_vec = unsafe { >::read_from_buffer(&buffer, &[]) } + .unwrap() + .0; + + assert_eq!(vec, read_vec); + } + + #[test] + fn read_write_8_fixed() { + let vec = vec![1u16, 1, 3, 5, 8, 13, 21, 34]; + let mut buffer = vec![0; vec.msg_size()]; + vec.write_to_buffer(&mut buffer, &mut []).unwrap(); + let read_vec = unsafe { >::read_from_buffer(&buffer, &[]) } + .unwrap() + .0; + assert_eq!(vec, read_vec); + } + + #[test] + fn read_write_1() { + let vec = vec![Some(1u64)]; + let mut buffer = vec![0; vec.msg_size()]; + println!("{:?}", vec.msg_size()); + vec.write_to_buffer(&mut buffer, &mut []).unwrap(); + let read_vec = unsafe { >::read_from_buffer(&buffer, &[]) } + .unwrap() + .0; + + assert_eq!(vec, read_vec); + } + + #[test] + fn read_write_4() { + let vec = vec![Some(12u16), Some(0), None, None]; + let mut buffer = vec![0; vec.msg_size()]; + vec.write_to_buffer(&mut buffer, &mut []).unwrap(); + let read_vec = unsafe { >::read_from_buffer(&buffer, &[]) } + .unwrap() + .0; + + assert_eq!(vec, read_vec); + } +} diff --git a/msg_socket/src/msg_on_socket/tuple.rs b/msg_socket/src/msg_on_socket/tuple.rs new file mode 100644 index 0000000..f960ce5 --- /dev/null +++ b/msg_socket/src/msg_on_socket/tuple.rs @@ -0,0 +1,205 @@ +// Copyright 2020 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +use std::mem::size_of; +use std::os::unix::io::RawFd; + +use crate::{MsgOnSocket, MsgResult}; + +use super::{simple_read, simple_write}; + +// Returns the size of one part of a tuple. +fn tuple_size_helper(v: &T) -> usize { + T::fixed_size().unwrap_or_else(|| v.msg_size() + size_of::()) +} + +unsafe fn tuple_read_helper( + buffer: &[u8], + fds: &[RawFd], + buffer_index: &mut usize, + fd_index: &mut usize, +) -> MsgResult { + let end = match T::fixed_size() { + Some(_) => buffer.len(), + None => { + let len = simple_read::(buffer, buffer_index)? as usize; + *buffer_index + len + } + }; + let (v, fd_read) = T::read_from_buffer(&buffer[*buffer_index..end], &fds[*fd_index..])?; + *buffer_index += v.msg_size(); + *fd_index += fd_read; + Ok(v) +} + +fn tuple_write_helper( + v: &T, + buffer: &mut [u8], + fds: &mut [RawFd], + buffer_index: &mut usize, + fd_index: &mut usize, +) -> MsgResult<()> { + let end = match T::fixed_size() { + Some(_) => buffer.len(), + None => { + let len = v.msg_size(); + simple_write(len as u64, buffer, buffer_index)?; + *buffer_index + len + } + }; + let fd_written = v.write_to_buffer(&mut buffer[*buffer_index..end], &mut fds[*fd_index..])?; + *buffer_index += v.msg_size(); + *fd_index += fd_written; + Ok(()) +} + +macro_rules! tuple_impls { + () => {}; + ($t: ident) => { + #[allow(unused_variables, non_snake_case)] + impl<$t: MsgOnSocket> MsgOnSocket for ($t,) { + fn uses_fd() -> bool { + $t::uses_fd() + } + + fn fd_count(&self) -> usize { + self.0.fd_count() + } + + fn fixed_size() -> Option { + $t::fixed_size() + } + + fn msg_size(&self) -> usize { + self.0.msg_size() + } + + unsafe fn read_from_buffer(buffer: &[u8], fds: &[RawFd]) -> MsgResult<(Self, usize)> { + let (t, s) = $t::read_from_buffer(buffer, fds)?; + Ok(((t,), s)) + } + + fn write_to_buffer( + &self, + buffer: &mut [u8], + fds: &mut [RawFd], + ) -> MsgResult { + self.0.write_to_buffer(buffer, fds) + } + } + }; + ($t: ident, $($ts:ident),*) => { + #[allow(unused_variables, non_snake_case)] + impl<$t: MsgOnSocket $(, $ts: MsgOnSocket)*> MsgOnSocket for ($t$(, $ts)*) { + fn uses_fd() -> bool { + $t::uses_fd() $(|| $ts::uses_fd())* + } + + fn fd_count(&self) -> usize { + if Self::uses_fd() { + return 0; + } + let ($t $(,$ts)*) = self; + $t.fd_count() $(+ $ts.fd_count())* + } + + fn fixed_size() -> Option { + // Returns None if any element is not fixed size. + Some($t::fixed_size()? $(+ $ts::fixed_size()?)*) + } + + fn msg_size(&self) -> usize { + if let Some(size) = Self::fixed_size() { + return size + } + + let ($t $(,$ts)*) = self; + tuple_size_helper($t) $(+ tuple_size_helper($ts))* + } + + unsafe fn read_from_buffer(buffer: &[u8], fds: &[RawFd]) -> MsgResult<(Self, usize)> { + let mut buffer_index = 0; + let mut fd_index = 0; + Ok(( + ( + tuple_read_helper(buffer, fds, &mut buffer_index, &mut fd_index)?, + $({ + // Dummy let used to trigger the correct number of iterations. + let $ts = (); + tuple_read_helper(buffer, fds, &mut buffer_index, &mut fd_index)? + },)* + ), + fd_index + )) + } + + fn write_to_buffer( + &self, + buffer: &mut [u8], + fds: &mut [RawFd], + ) -> MsgResult { + let mut buffer_index = 0; + let mut fd_index = 0; + let ($t $(,$ts)*) = self; + tuple_write_helper($t, buffer, fds, &mut buffer_index, &mut fd_index)?; + $( + tuple_write_helper($ts, buffer, fds, &mut buffer_index, &mut fd_index)?; + )* + Ok(fd_index) + } + } + tuple_impls!{ $($ts),* } + } +} + +// Imlpement tuple for up to 8 elements. +tuple_impls! { A, B, C, D, E, F, G, H } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn read_write_1_fixed() { + let tuple = (1,); + let mut buffer = vec![0; tuple.msg_size()]; + tuple.write_to_buffer(&mut buffer, &mut []).unwrap(); + let read_tuple = unsafe { <(u32,)>::read_from_buffer(&buffer, &[]) } + .unwrap() + .0; + + assert_eq!(tuple, read_tuple); + } + + #[test] + fn read_write_8_fixed() { + let tuple = (1u32, 2u8, 3u16, 4u64, 5u32, 6u16, 7u8, 8u8); + let mut buffer = vec![0; tuple.msg_size()]; + tuple.write_to_buffer(&mut buffer, &mut []).unwrap(); + let read_tuple = unsafe { <_>::read_from_buffer(&buffer, &[]) }.unwrap().0; + + assert_eq!(tuple, read_tuple); + } + + #[test] + fn read_write_1() { + let tuple = (Some(1u64),); + let mut buffer = vec![0; tuple.msg_size()]; + tuple.write_to_buffer(&mut buffer, &mut []).unwrap(); + let read_tuple = unsafe { <_>::read_from_buffer(&buffer, &[]) }.unwrap().0; + + assert_eq!(tuple, read_tuple); + } + + #[test] + fn read_write_4() { + let tuple = (Some(12u16), Some(false), None::, None::); + let mut buffer = vec![0; tuple.msg_size()]; + println!("{:?}", tuple.msg_size()); + tuple.write_to_buffer(&mut buffer, &mut []).unwrap(); + let read_tuple = unsafe { <_>::read_from_buffer(&buffer, &[]) }.unwrap().0; + + assert_eq!(tuple, read_tuple); + } +} -- cgit 1.4.1 From e618bf7ec514aaf4d76fd83319a97c7ab9796035 Mon Sep 17 00:00:00 2001 From: Chirantan Ekbote Date: Mon, 25 May 2020 18:30:24 +0900 Subject: sys_util: Make ioctl number method a const fn This allows us to define const variables that are the return value of the method, which we can then use in match statements. BUG=b:157189438 TEST=unit tests Change-Id: I2475c59bfd43ec9ec149a6b688bf680fa2361a0b Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2214962 Tested-by: kokoro Reviewed-by: Dylan Reid Commit-Queue: Chirantan Ekbote --- sys_util/src/ioctl.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys_util/src/ioctl.rs b/sys_util/src/ioctl.rs index f8c7604..3cf622b 100644 --- a/sys_util/src/ioctl.rs +++ b/sys_util/src/ioctl.rs @@ -23,13 +23,13 @@ macro_rules! ioctl_expr { macro_rules! ioctl_ioc_nr { ($name:ident, $dir:expr, $ty:expr, $nr:expr, $size:expr) => { #[allow(non_snake_case)] - pub fn $name() -> ::std::os::raw::c_ulong { + pub const fn $name() -> ::std::os::raw::c_ulong { $crate::ioctl_expr!($dir, $ty, $nr, $size) } }; ($name:ident, $dir:expr, $ty:expr, $nr:expr, $size:expr, $($v:ident),+) => { #[allow(non_snake_case)] - pub fn $name($($v: ::std::os::raw::c_uint),+) -> ::std::os::raw::c_ulong { + pub const fn $name($($v: ::std::os::raw::c_uint),+) -> ::std::os::raw::c_ulong { $crate::ioctl_expr!($dir, $ty, $nr, $size) } }; -- cgit 1.4.1 From c4707badd013c37ff3c1c0847d752bf69f12f86a Mon Sep 17 00:00:00 2001 From: Chirantan Ekbote Date: Mon, 25 May 2020 18:31:51 +0900 Subject: devices: fs: Refactor ioctl handling code Now that the ioctl number method is const we can use a match statement rather than a series of if-else expressions. BUG=b:157189438 TEST=unit tests Change-Id: I9839f2de842ec512811101c07445ca5f99f3fe2f Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2214963 Tested-by: kokoro Commit-Queue: Chirantan Ekbote Reviewed-by: Zach Reizner Reviewed-by: Stephen Barber --- devices/src/virtio/fs/passthrough.rs | 50 +++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/devices/src/virtio/fs/passthrough.rs b/devices/src/virtio/fs/passthrough.rs index bcc7c5d..189b0c3 100644 --- a/devices/src/virtio/fs/passthrough.rs +++ b/devices/src/virtio/fs/passthrough.rs @@ -1694,34 +1694,38 @@ impl FileSystem for PassthroughFs { out_size: u32, r: R, ) -> io::Result { + const GET_ENCRYPTION_POLICY: u32 = FS_IOC_GET_ENCRYPTION_POLICY() as u32; + const SET_ENCRYPTION_POLICY: u32 = FS_IOC_SET_ENCRYPTION_POLICY() as u32; + // Normally, we wouldn't need to retry the FS_IOC_GET_ENCRYPTION_POLICY and // FS_IOC_SET_ENCRYPTION_POLICY ioctls. Unfortunately, the I/O directions for both of them // are encoded backwards so they can only be handled as unrestricted fuse ioctls. - if cmd == FS_IOC_GET_ENCRYPTION_POLICY() as u32 { - if out_size < size_of::() as u32 { - let input = Vec::new(); - let output = vec![IoctlIovec { - base: arg, - len: size_of::() as u64, - }]; - Ok(IoctlReply::Retry { input, output }) - } else { - self.get_encryption_policy(handle) + match cmd { + GET_ENCRYPTION_POLICY => { + if out_size < size_of::() as u32 { + let input = Vec::new(); + let output = vec![IoctlIovec { + base: arg, + len: size_of::() as u64, + }]; + Ok(IoctlReply::Retry { input, output }) + } else { + self.get_encryption_policy(handle) + } } - } else if cmd == FS_IOC_SET_ENCRYPTION_POLICY() as u32 { - if in_size < size_of::() as u32 { - let input = vec![IoctlIovec { - base: arg, - len: size_of::() as u64, - }]; - let output = Vec::new(); - Ok(IoctlReply::Retry { input, output }) - } else { - self.set_encryption_policy(handle, r) + SET_ENCRYPTION_POLICY => { + if in_size < size_of::() as u32 { + let input = vec![IoctlIovec { + base: arg, + len: size_of::() as u64, + }]; + let output = Vec::new(); + Ok(IoctlReply::Retry { input, output }) + } else { + self.set_encryption_policy(handle, r) + } } - } else { - // Did you know that a file/directory is not a TTY? - Err(io::Error::from_raw_os_error(libc::ENOTTY)) + _ => Err(io::Error::from_raw_os_error(libc::ENOTTY)), } } -- cgit 1.4.1 From 814a8da0ed70187bf06618ee3a545ca3361b5933 Mon Sep 17 00:00:00 2001 From: Chirantan Ekbote Date: Wed, 27 May 2020 17:18:07 +0900 Subject: devices: fs: Use 2 stage create and mkdir When creating a file or directory the virtio-fs server changes its effective uid and gid to the uid and gid of the process that made the call. This ensures that the file or directory has the correct owner and group when it is created and also serves as an access check to ensure that the process that made the call has permission to modify the parent directory. However, this causes an EACCES error when the following conditions are met: * The parent directory has g+rw permissions with gid A * The process has gid B but has A in its list of supplementary groups In this case the fuse context only contains gid B, which doesn't have permission to modify the parent directory. Unfortunately there's no way for us to detect this on the server side so instead we just have to rely on the permission checks carried out by the kernel driver. If the server receives a create call, then assume that the kernel has verified that the process is allowed to create that file/directory and just create it without changing the server thread's uid and gid. Additionally, in order to ensure that a newly created file appears atomically in the parent directory with the proper owner and group, change the create implementation to use `O_TMPFILE` and `linkat` as described in the open(2) manpage. There is no `O_TMPFILE` equivalent for directories so create a "hidden" directory with a randomly generated name, modify the uid/gid and mode, and then rename it into place. BUG=b:156696212 TEST=tast run $DUT vm.Virtiofs TEST=Create a test directory with group wayland and permissions g+rw. Then run `su -s /bin/bash -c 'touch ${dir}/foo' - crosvm` and `su -s /bin/bash -c 'mkdir ${dir}/bar' - crosvm`. Change-Id: If5fbcb1b011664c7c1ac29542a2f90d129c34962 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2217534 Reviewed-by: Chirantan Ekbote Commit-Queue: Chirantan Ekbote Tested-by: Chirantan Ekbote --- devices/src/virtio/fs/passthrough.rs | 274 +++++++++++++++++++++++++++++++---- seccomp/aarch64/fs_device.policy | 2 + seccomp/arm/fs_device.policy | 3 + seccomp/x86_64/fs_device.policy | 3 + 4 files changed, 254 insertions(+), 28 deletions(-) diff --git a/devices/src/virtio/fs/passthrough.rs b/devices/src/virtio/fs/passthrough.rs index 189b0c3..4833bb7 100644 --- a/devices/src/virtio/fs/passthrough.rs +++ b/devices/src/virtio/fs/passthrough.rs @@ -9,6 +9,7 @@ use std::fs::File; use std::io; use std::mem::{self, size_of, MaybeUninit}; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; +use std::ptr; use std::str::FromStr; use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; use std::sync::Arc; @@ -765,6 +766,99 @@ fn forget_one( } } +// A temporary directory that is automatically deleted when dropped unless `into_inner()` is called. +// This isn't a general-purpose temporary directory and is only intended to be used to ensure that +// there are no leaks when initializing a newly created directory with the correct metadata (see the +// implementation of `mkdir()` below). The directory is removed via a call to `unlinkat` so callers +// are not allowed to actually populate this temporary directory with any entries (or else deleting +// the directory will fail). +struct TempDir { + path: CString, + file: File, +} + +impl TempDir { + // Creates a new temporary directory in `path` with a randomly generated name. `path` must be a + // directory. + fn new(path: CString) -> io::Result { + let mut template = path.into_bytes(); + + // mkdtemp requires that the last 6 bytes are XXXXXX. We're not using PathBuf here because + // the round-trip from Vec to PathBuf and back is really tedious due to Windows/Unix + // differences. + template.extend(b"/.XXXXXX"); + + // The only way this can cause an error is if the caller passed in a malformed CString. + let ptr = CString::new(template) + .map(CString::into_raw) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; + + // Safe because this will only modify `ptr`. + let ret = unsafe { libc::mkdtemp(ptr) }; + + // Safe because this pointer was originally created by a call to `CString::into_raw`. + let tmpdir = unsafe { CString::from_raw(ptr) }; + + if ret.is_null() { + return Err(io::Error::last_os_error()); + } + + // Safe because this doesn't modify any memory and we check the return value. + let fd = unsafe { + libc::openat( + libc::AT_FDCWD, + tmpdir.as_ptr(), + libc::O_DIRECTORY | libc::O_CLOEXEC, + ) + }; + if fd < 0 { + return Err(io::Error::last_os_error()); + } + + Ok(TempDir { + path: tmpdir, + // Safe because we just opened this fd. + file: unsafe { File::from_raw_fd(fd) }, + }) + } + + fn path(&self) -> &CStr { + &self.path + } + + // Consumes the `TempDir`, returning the inner `File` without deleting the temporary + // directory. + fn into_inner(self) -> (CString, File) { + // Safe because this is a valid pointer and we are going to call `mem::forget` on `self` so + // we will not be aliasing memory. + let path = unsafe { ptr::read(&self.path) }; + let file = unsafe { ptr::read(&self.file) }; + mem::forget(self); + + (path, file) + } +} + +impl AsRawFd for TempDir { + fn as_raw_fd(&self) -> RawFd { + self.file.as_raw_fd() + } +} + +impl Drop for TempDir { + fn drop(&mut self) { + // There is no `AT_EMPTY_PATH` equivalent for `unlinkat` and using "." as the path returns + // EINVAL. So we have no choice but to use the real path. Safe because this doesn't modify + // any memory and we check the return value. + let ret = + unsafe { libc::unlinkat(libc::AT_FDCWD, self.path().as_ptr(), libc::AT_REMOVEDIR) }; + if ret < 0 { + println!("Failed to remove tempdir: {}", io::Error::last_os_error()); + error!("Failed to remove tempdir: {}", io::Error::last_os_error()); + } + } +} + impl FileSystem for PassthroughFs { type Inode = Inode; type Handle = Handle; @@ -892,7 +986,14 @@ impl FileSystem for PassthroughFs { mode: u32, umask: u32, ) -> io::Result { - let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?; + // This method has the same issues as `create()`: namely that the kernel may have allowed a + // process to make a directory due to one of its supplementary groups but that information + // is not forwarded to us. However, there is no `O_TMPDIR` equivalent for directories so + // instead we create a "hidden" directory with a randomly generated name in the parent + // directory, modify the uid/gid and mode to the proper values, and then rename it to the + // requested name. This ensures that even in the case of a power loss the directory is not + // visible in the filesystem with the requested name but incorrect metadata. The only thing + // left would be a empty hidden directory with a random name. let data = self .inodes .lock() @@ -900,13 +1001,43 @@ impl FileSystem for PassthroughFs { .map(Arc::clone) .ok_or_else(ebadf)?; - // Safe because this doesn't modify any memory and we check the return value. - let res = unsafe { libc::mkdirat(data.file.as_raw_fd(), name.as_ptr(), mode & !umask) }; - if res == 0 { - self.do_lookup(parent, name) - } else { - Err(io::Error::last_os_error()) + let tmpdir = self.get_path(parent).and_then(TempDir::new)?; + + // Set the uid and gid for the directory. Safe because this doesn't modify any memory and we + // check the return value. + let ret = unsafe { libc::fchown(tmpdir.as_raw_fd(), ctx.uid, ctx.gid) }; + if ret < 0 { + return Err(io::Error::last_os_error()); } + + // Set the mode. Safe because this doesn't modify any memory and we check the return value. + let ret = unsafe { libc::fchmod(tmpdir.as_raw_fd(), mode & !umask) }; + if ret < 0 { + return Err(io::Error::last_os_error()); + } + + // Now rename it into place. Safe because this doesn't modify any memory and we check the + // return value. TODO: Switch to libc::renameat2 once + // https://github.com/rust-lang/libc/pull/1508 lands and we have glibc 2.28. + let ret = unsafe { + libc::syscall( + libc::SYS_renameat2, + libc::AT_FDCWD, + tmpdir.path().as_ptr(), + data.file.as_raw_fd(), + name.as_ptr(), + libc::RENAME_NOREPLACE, + ) + }; + if ret < 0 { + return Err(io::Error::last_os_error()); + } + + // Now that we've moved the directory make sure we don't try to delete the now non-existent + // `tmpdir`. + tmpdir.into_inner(); + + self.do_lookup(parent, name) } fn rmdir(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<()> { @@ -1007,7 +1138,16 @@ impl FileSystem for PassthroughFs { flags: u32, umask: u32, ) -> io::Result<(Entry, Option, OpenOptions)> { - let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?; + // The `Context` may not contain all the information we need to create the file here. For + // example, a process may be part of several groups, one of which gives it permission to + // create a file in `parent`, but is not the gid of the process. This information is not + // forwarded to the server so we don't know when this is happening. Instead, we just rely on + // the access checks in the kernel driver: if we received this request then the kernel has + // determined that the process is allowed to create the file and we shouldn't reject it now + // based on acls. + // + // To ensure that the file is created atomically with the proper uid/gid we use `O_TMPFILE` + // + `linkat` as described in the `open(2)` manpage. let data = self .inodes .lock() @@ -1015,15 +1155,26 @@ impl FileSystem for PassthroughFs { .map(Arc::clone) .ok_or_else(ebadf)?; - // Safe because this doesn't modify any memory and we check the return value. We don't - // really check `flags` because if the kernel can't handle poorly specified flags then we - // have much bigger problems. + // We don't want to use `O_EXCL` with `O_TMPFILE` as it has a different meaning when used in + // that combination. + let mut tmpflags = (flags as i32 | libc::O_TMPFILE | libc::O_CLOEXEC | libc::O_NOFOLLOW) + & !(libc::O_EXCL | libc::O_CREAT); + + // O_TMPFILE requires that we use O_RDWR or O_WRONLY. + if flags as i32 & libc::O_ACCMODE == libc::O_RDONLY { + tmpflags &= !libc::O_ACCMODE; + tmpflags |= libc::O_RDWR; + } + + // Safe because this is a valid c string. + let current_dir = unsafe { CStr::from_bytes_with_nul_unchecked(b".\0") }; + + // Safe because this doesn't modify any memory and we check the return value. let fd = unsafe { libc::openat( data.file.as_raw_fd(), - name.as_ptr(), - (flags as i32 | libc::O_CREAT | libc::O_CLOEXEC | libc::O_NOFOLLOW) - & !libc::O_DIRECT, + current_dir.as_ptr(), + tmpflags, mode & !(umask & 0o777), ) }; @@ -1032,26 +1183,49 @@ impl FileSystem for PassthroughFs { } // Safe because we just opened this fd. - let file = Mutex::new(unsafe { File::from_raw_fd(fd) }); + let tmpfile = unsafe { File::from_raw_fd(fd) }; - let entry = self.do_lookup(parent, name)?; + // Now set the uid and gid for the file. Safe because this doesn't modify any memory and we + // check the return value. + let ret = unsafe { libc::fchown(tmpfile.as_raw_fd(), ctx.uid, ctx.gid) }; + if ret < 0 { + return Err(io::Error::last_os_error()); + } - let handle = self.next_handle.fetch_add(1, Ordering::Relaxed); - let data = HandleData { - inode: entry.inode, - file, + let proc_path = CString::new(format!("self/fd/{}", tmpfile.as_raw_fd())) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; + + // Finally link it into the file system tree so that it's visible to other processes. Safe + // because this doesn't modify any memory and we check the return value. + let ret = unsafe { + libc::linkat( + self.proc.as_raw_fd(), + proc_path.as_ptr(), + data.file.as_raw_fd(), + name.as_ptr(), + libc::AT_SYMLINK_FOLLOW, + ) }; + if ret < 0 { + return Err(io::Error::last_os_error()); + } - self.handles.lock().insert(handle, Arc::new(data)); + // We no longer need the tmpfile. + mem::drop(tmpfile); - let mut opts = OpenOptions::empty(); - match self.cfg.cache_policy { - CachePolicy::Never => opts |= OpenOptions::DIRECT_IO, - CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE, - _ => {} - }; + let entry = self.do_lookup(parent, name)?; + let (handle, opts) = self + .do_open( + entry.inode, + flags & !((libc::O_CREAT | libc::O_EXCL | libc::O_NOCTTY) as u32), + ) + .map_err(|e| { + // Don't leak the entry. + self.forget(ctx, entry.inode, 1); + e + })?; - Ok((entry, Some(handle), opts)) + Ok((entry, handle, opts)) } fn unlink(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<()> { @@ -1786,6 +1960,10 @@ impl FileSystem for PassthroughFs { mod tests { use super::*; + use std::env; + use std::os::unix::ffi::OsStringExt; + use std::path::{Path, PathBuf}; + #[test] fn padded_cstrings() { assert_eq!(strip_padding(b".\0\0\0\0\0\0\0").to_bytes(), b"."); @@ -1806,4 +1984,44 @@ mod tests { fn no_nul_byte() { strip_padding(b"no nul bytes in string"); } + + #[test] + fn create_temp_dir() { + let t = TempDir::new( + CString::new(env::temp_dir().into_os_string().into_vec()) + .expect("env::temp_dir() is not a valid c-string"), + ) + .expect("Failed to create temporary directory"); + + let cstr = t.path().to_string_lossy(); + let path = Path::new(&*cstr); + assert!(path.exists()); + assert!(path.is_dir()); + } + + #[test] + fn remove_temp_dir() { + let t = TempDir::new( + CString::new(env::temp_dir().into_os_string().into_vec()) + .expect("env::temp_dir() is not a valid c-string"), + ) + .expect("Failed to create temporary directory"); + let cstr = t.path().to_string_lossy(); + let path = PathBuf::from(&*cstr); + mem::drop(t); + assert!(!path.exists()); + } + + #[test] + fn temp_dir_into_inner() { + let t = TempDir::new( + CString::new(env::temp_dir().into_os_string().into_vec()) + .expect("env::temp_dir() is not a valid c-string"), + ) + .expect("Failed to create temporary directory"); + let (cstr, _) = t.into_inner(); + let cow_str = cstr.to_string_lossy(); + let path = Path::new(&*cow_str); + assert!(path.exists()); + } } diff --git a/seccomp/aarch64/fs_device.policy b/seccomp/aarch64/fs_device.policy index 7bf794a..adeb9b6 100644 --- a/seccomp/aarch64/fs_device.policy +++ b/seccomp/aarch64/fs_device.policy @@ -6,7 +6,9 @@ copy_file_range: 1 fallocate: 1 +fchmod: 1 fchmodat: 1 +fchown: 1 fchownat: 1 fdatasync: 1 lgetxattr: 1 diff --git a/seccomp/arm/fs_device.policy b/seccomp/arm/fs_device.policy index 661883a..5290afa 100644 --- a/seccomp/arm/fs_device.policy +++ b/seccomp/arm/fs_device.policy @@ -6,7 +6,9 @@ copy_file_range: 1 fallocate: 1 +fchmod: 1 fchmodat: 1 +fchown32: 1 fchownat: 1 fdatasync: 1 lgetxattr: 1 @@ -23,6 +25,7 @@ geteuid32: 1 ioctl: arg1 == FS_IOC_GET_ENCRYPTION_POLICY || arg1 == FS_IOC_SET_ENCRYPTION_POLICY linkat: 1 _llseek: 1 +mkdir: 1 mkdirat: 1 mknodat: 1 open: return ENOENT diff --git a/seccomp/x86_64/fs_device.policy b/seccomp/x86_64/fs_device.policy index 1c10601..1454770 100644 --- a/seccomp/x86_64/fs_device.policy +++ b/seccomp/x86_64/fs_device.policy @@ -6,7 +6,9 @@ copy_file_range: 1 fallocate: 1 +fchmod: 1 fchmodat: 1 +fchown: 1 fchownat: 1 fdatasync: 1 lgetxattr: 1 @@ -22,6 +24,7 @@ geteuid: 1 ioctl: arg1 == FS_IOC_GET_ENCRYPTION_POLICY || arg1 == FS_IOC_SET_ENCRYPTION_POLICY linkat: 1 lseek: 1 +mkdir: 1 mkdirat: 1 mknodat: 1 newfstatat: 1 -- cgit 1.4.1 From ba7662437018e380bbf208c23fda39cb62907a62 Mon Sep 17 00:00:00 2001 From: Colin Downs-Razouk Date: Mon, 11 May 2020 08:13:50 -0700 Subject: devices: irqchip: IrqChipX86_64 trait This trait handles the x86-specific features of an irqchip, including getting and setting the state of the pic, ioapic, lapics, and pit. Also includes an empty implementation of this trait for the KvmKernelIrqChip. BUG=chromium:1077058 TEST=cargo test -p devices Change-Id: I36034661f4a2baedc7ac2b8f311cab6327afefba Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2197717 Tested-by: kokoro Commit-Queue: Colin Downs-Razouk Reviewed-by: Daniel Verkamp Reviewed-by: Stephen Barber --- devices/src/irqchip/kvm/mod.rs | 5 ++++ devices/src/irqchip/kvm/x86_64.rs | 57 +++++++++++++++++++++++++++++++++++++++ devices/src/irqchip/mod.rs | 5 ++++ devices/src/irqchip/x86_64.rs | 38 ++++++++++++++++++++++++++ 4 files changed, 105 insertions(+) create mode 100644 devices/src/irqchip/kvm/x86_64.rs create mode 100644 devices/src/irqchip/x86_64.rs diff --git a/devices/src/irqchip/kvm/mod.rs b/devices/src/irqchip/kvm/mod.rs index 56794a3..bdc9d3f 100644 --- a/devices/src/irqchip/kvm/mod.rs +++ b/devices/src/irqchip/kvm/mod.rs @@ -8,6 +8,11 @@ use std::sync::Arc; use sync::Mutex; use sys_util::{EventFd, Result}; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +mod x86_64; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +pub use x86_64::*; + use crate::IrqChip; /// IrqChip implementation where the entire IrqChip is emulated by KVM. diff --git a/devices/src/irqchip/kvm/x86_64.rs b/devices/src/irqchip/kvm/x86_64.rs new file mode 100644 index 0000000..6f59935 --- /dev/null +++ b/devices/src/irqchip/kvm/x86_64.rs @@ -0,0 +1,57 @@ +// Copyright 2020 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +use hypervisor::kvm::KvmVcpu; +use hypervisor::{IoapicState, LapicState, PicSelect, PicState, PitState}; + +use sys_util::Result; + +use crate::{Bus, IrqChipX86_64, KvmKernelIrqChip}; + +impl IrqChipX86_64 for KvmKernelIrqChip { + /// Get the current state of the PIC + fn get_pic_state(&self, _select: PicSelect) -> Result { + unimplemented!("get_pic_state for KvmKernelIrqChip is not yet implemented"); + } + + /// Set the current state of the PIC + fn set_pic_state(&mut self, _select: PicSelect, _state: &PicState) -> Result<()> { + unimplemented!("set_pic_state for KvmKernelIrqChip is not yet implemented"); + } + + /// Get the current state of the IOAPIC + fn get_ioapic_state(&self) -> Result { + unimplemented!("get_ioapic_state for KvmKernelIrqChip is not yet implemented"); + } + + /// Set the current state of the IOAPIC + fn set_ioapic_state(&mut self, _state: &IoapicState) -> Result<()> { + unimplemented!("set_ioapic_state for KvmKernelIrqChip is not yet implemented"); + } + + /// Get the current state of the specified VCPU's local APIC + fn get_lapic_state(&self, _vcpu_id: usize) -> Result { + unimplemented!("get_lapic_state for KvmKernelIrqChip is not yet implemented"); + } + + /// Set the current state of the specified VCPU's local APIC + fn set_lapic_state(&mut self, _vcpu_id: usize, _state: &LapicState) -> Result<()> { + unimplemented!("set_lapic_state for KvmKernelIrqChip is not yet implemented"); + } + + /// Create a PIT (Programmable Interval Timer) for this VM. + fn create_pit(&mut self, _io_bus: &mut Bus) -> Result<()> { + unimplemented!("create_pit for KvmKernelIrqChip is not yet implemented"); + } + + /// Retrieves the state of the PIT. + fn get_pit(&self) -> Result { + unimplemented!("get_pit for KvmKernelIrqChip is not yet implemented"); + } + + /// Sets the state of the PIT. + fn set_pit(&mut self, _state: &PitState) -> Result<()> { + unimplemented!("set_pit for KvmKernelIrqChip is not yet implemented"); + } +} diff --git a/devices/src/irqchip/mod.rs b/devices/src/irqchip/mod.rs index a8023e9..3284bc6 100644 --- a/devices/src/irqchip/mod.rs +++ b/devices/src/irqchip/mod.rs @@ -10,6 +10,11 @@ use std::marker::{Send, Sized}; use hypervisor::{IrqRoute, Vcpu}; use sys_util::{EventFd, Result}; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +mod x86_64; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +pub use x86_64::*; + /// Trait that abstracts interactions with interrupt controllers. /// /// Each VM will have one IrqChip instance which is responsible for routing IRQ lines and diff --git a/devices/src/irqchip/x86_64.rs b/devices/src/irqchip/x86_64.rs new file mode 100644 index 0000000..4b50427 --- /dev/null +++ b/devices/src/irqchip/x86_64.rs @@ -0,0 +1,38 @@ +// Copyright 2020 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +use crate::Bus; +use hypervisor::{IoapicState, LapicState, PicSelect, PicState, PitState, VcpuX86_64}; +use sys_util::Result; + +use crate::IrqChip; + +pub trait IrqChipX86_64: IrqChip { + /// Get the current state of the PIC + fn get_pic_state(&self, select: PicSelect) -> Result; + + /// Set the current state of the PIC + fn set_pic_state(&mut self, select: PicSelect, state: &PicState) -> Result<()>; + + /// Get the current state of the IOAPIC + fn get_ioapic_state(&self) -> Result; + + /// Set the current state of the IOAPIC + fn set_ioapic_state(&mut self, state: &IoapicState) -> Result<()>; + + /// Get the current state of the specified VCPU's local APIC + fn get_lapic_state(&self, vcpu_id: usize) -> Result; + + /// Set the current state of the specified VCPU's local APIC + fn set_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()>; + + /// Create a PIT (Programmable Interval Timer) for this VM. + fn create_pit(&mut self, io_bus: &mut Bus) -> Result<()>; + + /// Retrieves the state of the PIT. + fn get_pit(&self) -> Result; + + /// Sets the state of the PIT. + fn set_pit(&mut self, state: &PitState) -> Result<()>; +} -- cgit 1.4.1 From e187df2a5e4cb1a99b440552c5d4165cc3d245eb Mon Sep 17 00:00:00 2001 From: Dylan Reid Date: Tue, 2 Jun 2020 13:28:44 -0700 Subject: io_uring: Take Vecs or iterators of iovecs The pointer passed to the kernel for the iovecs must remain valid until the operation completes. This is true even if the kernel copies the sqe containing the pointer, that isn't a deep copy. If the iovecs are freed before the kernel actually processes the op, it will fail with a bad address error. Move the responsibility for maintaining the list in memory from the caller to io_uring itself. This mean io_uring must allocate. Taking iterators for IoSlice/IoSliceMut, means that the caller doesn't need to allocate as well if there isn't a Vec already allocated. Change-Id: I63a009d8ab543c8bac4132809fb851536d4ad82c Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2227082 Reviewed-by: Dylan Reid Tested-by: Dylan Reid Tested-by: kokoro Commit-Queue: Dylan Reid --- io_uring/src/uring.rs | 118 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 96 insertions(+), 22 deletions(-) diff --git a/io_uring/src/uring.rs b/io_uring/src/uring.rs index 8d569aa..8a69872 100644 --- a/io_uring/src/uring.rs +++ b/io_uring/src/uring.rs @@ -2,9 +2,9 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +use std::collections::BTreeMap; use std::fmt; use std::fs::File; -use std::io::IoSlice; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::ptr::null_mut; use std::sync::atomic::{AtomicU32, Ordering}; @@ -78,11 +78,11 @@ pub struct URingStats { /// let f = File::open(Path::new("/dev/zero")).unwrap(); /// let mut uring = URingContext::new(16).unwrap(); /// uring -/// .add_poll_fd(f.as_raw_fd(), WatchingEvents::empty().set_read(), 454) +/// .add_poll_fd(f.as_raw_fd(), &WatchingEvents::empty().set_read(), 454) /// .unwrap(); /// let (user_data, res) = uring.wait().unwrap().next().unwrap(); -/// assert_eq!(user_data, 454 as UserData); -/// assert_eq!(res.unwrap(), 1 as i32); +/// assert_eq!(user_data, 454 as io_uring::UserData); +/// assert_eq!(res.unwrap(), 1 as u32); /// /// ``` pub struct URingContext { @@ -260,6 +260,21 @@ impl URingContext { self.add_rw_op(ptr, len, fd, offset, user_data, IORING_OP_READV as u8) } + /// See 'writev' but accepts an iterator instead of a vector if there isn't already a vector in + /// existence. + pub unsafe fn add_writev_iter( + &mut self, + iovecs: I, + fd: RawFd, + offset: u64, + user_data: UserData, + ) -> Result<()> + where + I: Iterator, + { + self.add_writev(iovecs.collect(), fd, offset, user_data) + } + /// Asynchronously writes to `fd` from the addresses given in `iovecs`. /// # Safety /// `add_writev` will write to the address given by `iovecs`. This is only safe if the caller @@ -267,9 +282,10 @@ impl URingContext { /// transaction is complete and that completion has been returned from the `wait` function. In /// addition there must not be any mutable references to the data pointed to by `iovecs` until /// the operation completes. Ensure that the fd remains open until the op completes as well. + /// The iovecs reference must be kept alive until the op returns. pub unsafe fn add_writev( &mut self, - iovecs: &[IoSlice], + iovecs: Vec, fd: RawFd, offset: u64, user_data: UserData, @@ -284,7 +300,24 @@ impl URingContext { sqe.user_data = user_data; sqe.flags = 0; sqe.fd = fd; - }) + })?; + self.complete_ring.add_op_data(user_data, iovecs); + Ok(()) + } + + /// See 'readv' but accepts an iterator instead of a vector if there isn't already a vector in + /// existence. + pub unsafe fn add_readv_iter( + &mut self, + iovecs: I, + fd: RawFd, + offset: u64, + user_data: UserData, + ) -> Result<()> + where + I: Iterator, + { + self.add_readv(iovecs.collect(), fd, offset, user_data) } /// Asynchronously reads from `fd` to the addresses given in `iovecs`. @@ -294,9 +327,10 @@ impl URingContext { /// transaction is complete and that completion has been returned from the `wait` function. In /// addition there must not be any references to the data pointed to by `iovecs` until the /// operation completes. Ensure that the fd remains open until the op completes as well. + /// The iovecs reference must be kept alive until the op returns. pub unsafe fn add_readv( &mut self, - iovecs: &[IoSlice], + iovecs: Vec, fd: RawFd, offset: u64, user_data: UserData, @@ -311,7 +345,9 @@ impl URingContext { sqe.user_data = user_data; sqe.flags = 0; sqe.fd = fd; - }) + })?; + self.complete_ring.add_op_data(user_data, iovecs); + Ok(()) } /// Syncs all completed operations, the ordering with in-flight async ops is not @@ -367,7 +403,7 @@ impl URingContext { pub fn add_poll_fd( &mut self, fd: RawFd, - events: WatchingEvents, + events: &WatchingEvents, user_data: UserData, ) -> Result<()> { self.prep_next_sqe(|sqe, _iovec| { @@ -389,7 +425,7 @@ impl URingContext { pub fn remove_poll_fd( &mut self, fd: RawFd, - events: WatchingEvents, + events: &WatchingEvents, user_data: UserData, ) -> Result<()> { self.prep_next_sqe(|sqe, _iovec| { @@ -524,6 +560,9 @@ struct CompleteQueueState { ring_mask: u32, cqes_offset: u32, completed: usize, + //For ops that pass in arrays of iovecs, they need to be valid for the duration of the + //operation because the kernel might read them at any time. + pending_op_addrs: BTreeMap>, } impl CompleteQueueState { @@ -541,9 +580,14 @@ impl CompleteQueueState { ring_mask, cqes_offset: params.cq_off.cqes, completed: 0, + pending_op_addrs: BTreeMap::new(), } } + fn add_op_data(&mut self, user_data: UserData, addrs: Vec) { + self.pending_op_addrs.insert(user_data, addrs); + } + fn get_cqe(&self, head: u32) -> &io_uring_cqe { unsafe { // Safe because we trust that the kernel has returned enough memory in io_uring_setup @@ -582,6 +626,9 @@ impl Iterator for CompleteQueueState { let user_data = cqe.user_data; let res = cqe.res; + // free the addrs saved for this op. + let _ = self.pending_op_addrs.remove(&user_data); + // Store the new head and ensure the reads above complete before the kernel sees the // update to head, `set_head` uses `Release` ordering let new_head = head.wrapping_add(1); @@ -637,6 +684,7 @@ impl QueuePointers { #[cfg(test)] mod tests { use std::fs::OpenOptions; + use std::io::{IoSlice, IoSliceMut}; use std::io::{Read, Seek, SeekFrom, Write}; use std::path::{Path, PathBuf}; use std::time::Duration; @@ -677,10 +725,18 @@ mod tests { offset: u64, user_data: UserData, ) { - let iovecs = [IoSlice::new(buf)]; + let io_vecs = unsafe { + //safe to transmut from IoSlice to iovec. + vec![IoSliceMut::new(buf)] + .into_iter() + .map(|slice| std::mem::transmute::(slice)) + .collect::>() + }; let (user_data_ret, res) = unsafe { // Safe because the `wait` call waits until the kernel is done with `buf`. - uring.add_readv(&iovecs, fd, offset, user_data).unwrap(); + uring + .add_readv_iter(io_vecs.into_iter(), fd, offset, user_data) + .unwrap(); uring.wait().unwrap().next().unwrap() }; assert_eq!(user_data_ret, user_data); @@ -771,15 +827,27 @@ mod tests { const BUF_SIZE: usize = 0x2000; let mut uring = URingContext::new(queue_size).unwrap(); - let buf = [0u8; BUF_SIZE]; - let buf2 = [0u8; BUF_SIZE]; - let buf3 = [0u8; BUF_SIZE]; - let io_slices = vec![IoSlice::new(&buf), IoSlice::new(&buf2), IoSlice::new(&buf3)]; - let total_len = io_slices.iter().fold(0, |a, iovec| a + iovec.len()); + let mut buf = [0u8; BUF_SIZE]; + let mut buf2 = [0u8; BUF_SIZE]; + let mut buf3 = [0u8; BUF_SIZE]; + let io_vecs = unsafe { + //safe to transmut from IoSlice to iovec. + vec![ + IoSliceMut::new(&mut buf), + IoSliceMut::new(&mut buf2), + IoSliceMut::new(&mut buf3), + ] + .into_iter() + .map(|slice| std::mem::transmute::(slice)) + .collect::>() + }; + let total_len = io_vecs.iter().fold(0, |a, iovec| a + iovec.iov_len); let f = create_test_file(&temp_dir, total_len as u64 * 2); let (user_data_ret, res) = unsafe { // Safe because the `wait` call waits until the kernel is done with `buf`. - uring.add_readv(&io_slices, f.as_raw_fd(), 0, 55).unwrap(); + uring + .add_readv_iter(io_vecs.into_iter(), f.as_raw_fd(), 0, 55) + .unwrap(); uring.wait().unwrap().next().unwrap() }; assert_eq!(user_data_ret, 55); @@ -865,13 +933,19 @@ mod tests { let buf = [0xaau8; BUF_SIZE]; let buf2 = [0xffu8; BUF_SIZE]; let buf3 = [0x55u8; BUF_SIZE]; - let io_slices = vec![IoSlice::new(&buf), IoSlice::new(&buf2), IoSlice::new(&buf3)]; - let total_len = io_slices.iter().fold(0, |a, iovec| a + iovec.len()); + let io_vecs = unsafe { + //safe to transmut from IoSlice to iovec. + vec![IoSlice::new(&buf), IoSlice::new(&buf2), IoSlice::new(&buf3)] + .into_iter() + .map(|slice| std::mem::transmute::(slice)) + .collect::>() + }; + let total_len = io_vecs.iter().fold(0, |a, iovec| a + iovec.iov_len); let mut f = create_test_file(&temp_dir, total_len as u64 * 2); let (user_data_ret, res) = unsafe { // Safe because the `wait` call waits until the kernel is done with `buf`. uring - .add_writev(&io_slices, f.as_raw_fd(), OFFSET, 55) + .add_writev_iter(io_vecs.into_iter(), f.as_raw_fd(), OFFSET, 55) .unwrap(); uring.wait().unwrap().next().unwrap() }; @@ -951,7 +1025,7 @@ mod tests { let f = File::open(Path::new("/dev/zero")).unwrap(); let mut uring = URingContext::new(16).unwrap(); uring - .add_poll_fd(f.as_raw_fd(), WatchingEvents::empty().set_read(), 454) + .add_poll_fd(f.as_raw_fd(), &WatchingEvents::empty().set_read(), 454) .unwrap(); let (user_data, res) = uring.wait().unwrap().next().unwrap(); assert_eq!(user_data, 454 as UserData); -- cgit 1.4.1 From 55f21f743481dc44da6457757358262940b81286 Mon Sep 17 00:00:00 2001 From: Andrew Walbran Date: Fri, 5 Jun 2020 11:31:58 +0100 Subject: README: Document permissions needed for running. Change-Id: Ia86cb49ffc89ad66fe67698e05968e6d6f19f743 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2231364 Auto-Submit: Andrew Walbran Reviewed-by: Stephen Barber Commit-Queue: Stephen Barber Tested-by: Stephen Barber --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index f2d72aa..f23e208 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,11 @@ Known issues: policies [into the crosvm binary](http://crbug.com/1052126). * Devices can't be jailed if `/var/empty` doesn't exist. `sudo mkdir -p /var/empty` to work around this for now. +* You need read/write permissions for `/dev/kvm` to run tests or other crosvm + instances. Usually it's owned by the `kvm` group, so `sudo usermod -a -G kvm + $USER` and then log out and back in again to fix this. +* Some other features (networking) require `CAP_NET_ADMIN` so those usually + need to be run as root. And that's it! You should be able to `cargo build/run/test`. -- cgit 1.4.1 From 2a0ce34f31533b8e6e8c7b7ed1a55990702958ac Mon Sep 17 00:00:00 2001 From: Colin Downs-Razouk Date: Tue, 26 May 2020 08:43:18 -0700 Subject: devices: irqchip: KvmKernelIrqchip x86_64 impl Implemented get/set_pic/ioapic/pit functions for the KvmKernelIrqchip. Added respective functions on KvmVm for interacting with the underlying KVM API. Added associated tests for get/set functions. BUG=chromium:1077058 TEST=ran devices tests and added get/set function tests Change-Id: I66a29828fe2f1fbdf54d7325656a003ac09e36d0 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2219422 Reviewed-by: Udam Saini Reviewed-by: Stephen Barber Tested-by: kokoro Commit-Queue: Colin Downs-Razouk --- devices/src/irqchip/kvm/mod.rs | 7 +- devices/src/irqchip/kvm/x86_64.rs | 187 +++++++++++++++++++++++++++++++++++--- hypervisor/src/kvm/mod.rs | 28 ++++++ hypervisor/src/kvm/x86_64.rs | 124 ++++++++++++++++++++++++- hypervisor/src/x86_64.rs | 2 +- 5 files changed, 330 insertions(+), 18 deletions(-) diff --git a/devices/src/irqchip/kvm/mod.rs b/devices/src/irqchip/kvm/mod.rs index bdc9d3f..a5fd89f 100644 --- a/devices/src/irqchip/kvm/mod.rs +++ b/devices/src/irqchip/kvm/mod.rs @@ -19,15 +19,18 @@ use crate::IrqChip; /// /// This implementation will use the KVM API to create and configure the in-kernel irqchip. pub struct KvmKernelIrqChip { - _vm: KvmVm, + vm: KvmVm, vcpus: Arc>>>, } impl KvmKernelIrqChip { /// Construct a new KvmKernelIrqchip. pub fn new(vm: KvmVm, num_vcpus: usize) -> Result { + // TODO (colindr): this constructor needs aarch64 vs x86_64 implementations because we + // want to use vm.create_device instead of create_irq_chip on aarch64 + vm.create_irq_chip()?; Ok(KvmKernelIrqChip { - _vm: vm, + vm, vcpus: Arc::new(Mutex::new((0..num_vcpus).map(|_| None).collect())), }) } diff --git a/devices/src/irqchip/kvm/x86_64.rs b/devices/src/irqchip/kvm/x86_64.rs index 6f59935..acede53 100644 --- a/devices/src/irqchip/kvm/x86_64.rs +++ b/devices/src/irqchip/kvm/x86_64.rs @@ -4,30 +4,30 @@ use hypervisor::kvm::KvmVcpu; use hypervisor::{IoapicState, LapicState, PicSelect, PicState, PitState}; - +use kvm_sys::*; use sys_util::Result; use crate::{Bus, IrqChipX86_64, KvmKernelIrqChip}; impl IrqChipX86_64 for KvmKernelIrqChip { /// Get the current state of the PIC - fn get_pic_state(&self, _select: PicSelect) -> Result { - unimplemented!("get_pic_state for KvmKernelIrqChip is not yet implemented"); + fn get_pic_state(&self, select: PicSelect) -> Result { + Ok(PicState::from(&self.vm.get_pic_state(select)?)) } /// Set the current state of the PIC - fn set_pic_state(&mut self, _select: PicSelect, _state: &PicState) -> Result<()> { - unimplemented!("set_pic_state for KvmKernelIrqChip is not yet implemented"); + fn set_pic_state(&mut self, select: PicSelect, state: &PicState) -> Result<()> { + self.vm.set_pic_state(select, &kvm_pic_state::from(state)) } /// Get the current state of the IOAPIC fn get_ioapic_state(&self) -> Result { - unimplemented!("get_ioapic_state for KvmKernelIrqChip is not yet implemented"); + Ok(IoapicState::from(&self.vm.get_ioapic_state()?)) } /// Set the current state of the IOAPIC - fn set_ioapic_state(&mut self, _state: &IoapicState) -> Result<()> { - unimplemented!("set_ioapic_state for KvmKernelIrqChip is not yet implemented"); + fn set_ioapic_state(&mut self, state: &IoapicState) -> Result<()> { + self.vm.set_ioapic_state(&kvm_ioapic_state::from(state)) } /// Get the current state of the specified VCPU's local APIC @@ -41,17 +41,176 @@ impl IrqChipX86_64 for KvmKernelIrqChip { } /// Create a PIT (Programmable Interval Timer) for this VM. + /// The KvmKernelIrqchip creates the PIT by calling the KVM_CREATE_PIT2 KVM API. The + /// io_bus is not used in this case because KVM handles intercepting port-mapped io intended + /// for the PIT. fn create_pit(&mut self, _io_bus: &mut Bus) -> Result<()> { - unimplemented!("create_pit for KvmKernelIrqChip is not yet implemented"); + self.vm.create_pit() } - /// Retrieves the state of the PIT. + /// Retrieves the state of the PIT. Gets the pit state via the KVM API. fn get_pit(&self) -> Result { - unimplemented!("get_pit for KvmKernelIrqChip is not yet implemented"); + Ok(PitState::from(&self.vm.get_pit_state()?)) + } + + /// Sets the state of the PIT. Sets the pit state via the KVM API. + fn set_pit(&mut self, state: &PitState) -> Result<()> { + self.vm.set_pit_state(&kvm_pit_state2::from(state)) + } +} + +#[cfg(test)] +mod tests { + + use hypervisor::kvm::{Kvm, KvmVm}; + use sys_util::GuestMemory; + + use crate::irqchip::{IrqChip, IrqChipX86_64, KvmKernelIrqChip}; + use crate::Bus; + + use hypervisor::{PicSelect, Vm, VmX86_64}; + + fn get_chip() -> (KvmKernelIrqChip, KvmVm) { + let kvm = Kvm::new().expect("failed to instantiate Kvm"); + let mem = GuestMemory::new(&[]).unwrap(); + let vm = KvmVm::new(&kvm, mem).expect("failed tso instantiate vm"); + let vcpu = vm.create_vcpu(0).expect("failed to instantiate vcpu"); + + let mut chip = KvmKernelIrqChip::new(vm.try_clone().expect("failed to clone vm"), 1) + .expect("failed to instantiate KvmKernelIrqChip"); + + chip.add_vcpu(0, vcpu).expect("failed to add vcpu"); + + (chip, vm) } - /// Sets the state of the PIT. - fn set_pit(&mut self, _state: &PitState) -> Result<()> { - unimplemented!("set_pit for KvmKernelIrqChip is not yet implemented"); + #[test] + fn get_pic() { + let (chip, vm) = get_chip(); + + let state = chip + .get_pic_state(PicSelect::Primary) + .expect("could not get pic state"); + + // Default is that no irq lines are asserted + assert_eq!(state.irr, 0); + + // Assert Irq Line 0 + vm.set_irq_line(0, true).expect("could not set irq line"); + + let state = chip + .get_pic_state(PicSelect::Primary) + .expect("could not get pic state"); + + // Bit 0 should now be 1 + assert_eq!(state.irr, 1); + } + + #[test] + fn set_pic() { + let (mut chip, _) = get_chip(); + + let mut state = chip + .get_pic_state(PicSelect::Primary) + .expect("could not get pic state"); + + // set bits 0 and 1 + state.irr = 3; + + chip.set_pic_state(PicSelect::Primary, &state) + .expect("could not set the pic state"); + + let state = chip + .get_pic_state(PicSelect::Primary) + .expect("could not get pic state"); + + // Bits 1 and 0 should now be 1 + assert_eq!(state.irr, 3); + } + + #[test] + fn get_ioapic() { + let (chip, vm) = get_chip(); + + let state = chip.get_ioapic_state().expect("could not get ioapic state"); + + // Default is that no irq lines are asserted + assert_eq!(state.current_interrupt_level_bitmap, 0); + + // Default routing entries has routes 0..24 routed to vectors 0..24 + for i in 0..24 { + // when the ioapic is reset by kvm, it defaults to all zeroes except the + // interrupt mask is set to 1, which is bit 16 + assert_eq!(state.redirect_table[i].get(0, 64), 1 << 16); + } + + // Assert Irq Line 1 + vm.set_irq_line(1, true).expect("could not set irq line"); + + let state = chip.get_ioapic_state().expect("could not get ioapic state"); + + // Bit 1 should now be 1 + assert_eq!(state.current_interrupt_level_bitmap, 2); + } + + #[test] + fn set_ioapic() { + let (mut chip, _) = get_chip(); + + let mut state = chip.get_ioapic_state().expect("could not get ioapic state"); + + // set a vector in the redirect table + state.redirect_table[2].set_vector(15); + // set the irq line status on that entry + state.current_interrupt_level_bitmap = 4; + + chip.set_ioapic_state(&state) + .expect("could not set the ioapic state"); + + let state = chip.get_ioapic_state().expect("could not get ioapic state"); + + // verify that get_ioapic_state returns what we set + assert_eq!(state.redirect_table[2].get_vector(), 15); + assert_eq!(state.current_interrupt_level_bitmap, 4); + } + + #[test] + fn get_pit() { + let (mut chip, _) = get_chip(); + let mut io_bus = Bus::new(); + chip.create_pit(&mut io_bus).expect("failed to create pit"); + + let state = chip.get_pit().expect("failed to get pit state"); + + assert_eq!(state.flags, 0); + // assert reset state of pit + for i in 0..3 { + // initial count of 0 sets it to 0x10000; + assert_eq!(state.channels[i].count, 0x10000); + assert_eq!(state.channels[i].mode, 0xff); + assert_eq!(state.channels[i].gate, i != 2); + } + } + + #[test] + fn set_pit() { + let (mut chip, _) = get_chip(); + let mut io_bus = Bus::new(); + chip.create_pit(&mut io_bus).expect("failed to create pit"); + + let mut state = chip.get_pit().expect("failed to get pit state"); + + // set some values + state.channels[0].count = 500; + state.channels[0].mode = 1; + + // Setting the pit should initialize the one-shot timer + chip.set_pit(&state).expect("failed to set pit state"); + + let state = chip.get_pit().expect("failed to get pit state"); + + // check the values we set + assert_eq!(state.channels[0].count, 500); + assert_eq!(state.channels[0].mode, 1); } } diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index 5fdf124..bf87e3d 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -171,6 +171,34 @@ impl KvmVm { fn create_kvm_vcpu(&self, _id: usize) -> Result { Ok(KvmVcpu {}) } + + /// Crates an in kernel interrupt controller. + /// + /// See the documentation on the KVM_CREATE_IRQCHIP ioctl. + pub fn create_irq_chip(&self) -> Result<()> { + // Safe because we know that our file is a VM fd and we verify the return result. + let ret = unsafe { ioctl(self, KVM_CREATE_IRQCHIP()) }; + if ret == 0 { + Ok(()) + } else { + errno_result() + } + } + /// Sets the level on the given irq to 1 if `active` is true, and 0 otherwise. + pub fn set_irq_line(&self, irq: u32, active: bool) -> Result<()> { + let mut irq_level = kvm_irq_level::default(); + irq_level.__bindgen_anon_1.irq = irq; + irq_level.level = if active { 1 } else { 0 }; + + // Safe because we know that our file is a VM fd, we know the kernel will only read the + // correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_IRQ_LINE(), &irq_level) }; + if ret == 0 { + Ok(()) + } else { + errno_result() + } + } } impl Vm for KvmVm { diff --git a/hypervisor/src/kvm/x86_64.rs b/hypervisor/src/kvm/x86_64.rs index f3fbc59..eaa34cf 100644 --- a/hypervisor/src/kvm/x86_64.rs +++ b/hypervisor/src/kvm/x86_64.rs @@ -14,7 +14,7 @@ use sys_util::{ use super::{Kvm, KvmVcpu, KvmVm}; use crate::{ ClockState, CpuId, CpuIdEntry, HypervisorX86_64, IoapicRedirectionTableEntry, IoapicState, - LapicState, PicState, PitChannelState, PitState, Regs, VcpuX86_64, VmX86_64, + LapicState, PicSelect, PicState, PitChannelState, PitState, Regs, VcpuX86_64, VmX86_64, }; type KvmCpuId = kvm::CpuId; @@ -93,6 +93,128 @@ impl KvmVm { errno_result() } } + + /// Retrieves the state of given interrupt controller by issuing KVM_GET_IRQCHIP ioctl. + /// + /// Note that this call can only succeed after a call to `Vm::create_irq_chip`. + pub fn get_pic_state(&self, id: PicSelect) -> Result { + let mut irqchip_state = kvm_irqchip::default(); + irqchip_state.chip_id = id as u32; + let ret = unsafe { + // Safe because we know our file is a VM fd, we know the kernel will only write + // correct amount of memory to our pointer, and we verify the return result. + ioctl_with_mut_ref(self, KVM_GET_IRQCHIP(), &mut irqchip_state) + }; + if ret == 0 { + Ok(unsafe { + // Safe as we know that we are retrieving data related to the + // PIC (primary or secondary) and not IOAPIC. + irqchip_state.chip.pic + }) + } else { + errno_result() + } + } + + /// Sets the state of given interrupt controller by issuing KVM_SET_IRQCHIP ioctl. + /// + /// Note that this call can only succeed after a call to `Vm::create_irq_chip`. + pub fn set_pic_state(&self, id: PicSelect, state: &kvm_pic_state) -> Result<()> { + let mut irqchip_state = kvm_irqchip::default(); + irqchip_state.chip_id = id as u32; + irqchip_state.chip.pic = *state; + // Safe because we know that our file is a VM fd, we know the kernel will only read + // correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP(), &irqchip_state) }; + if ret == 0 { + Ok(()) + } else { + errno_result() + } + } + + /// Retrieves the state of IOAPIC by issuing KVM_GET_IRQCHIP ioctl. + /// + /// Note that this call can only succeed after a call to `Vm::create_irq_chip`. + pub fn get_ioapic_state(&self) -> Result { + let mut irqchip_state = kvm_irqchip::default(); + irqchip_state.chip_id = 2; + let ret = unsafe { + // Safe because we know our file is a VM fd, we know the kernel will only write + // correct amount of memory to our pointer, and we verify the return result. + ioctl_with_mut_ref(self, KVM_GET_IRQCHIP(), &mut irqchip_state) + }; + if ret == 0 { + Ok(unsafe { + // Safe as we know that we are retrieving data related to the + // IOAPIC and not PIC. + irqchip_state.chip.ioapic + }) + } else { + errno_result() + } + } + + /// Sets the state of IOAPIC by issuing KVM_SET_IRQCHIP ioctl. + /// + /// Note that this call can only succeed after a call to `Vm::create_irq_chip`. + pub fn set_ioapic_state(&self, state: &kvm_ioapic_state) -> Result<()> { + let mut irqchip_state = kvm_irqchip::default(); + irqchip_state.chip_id = 2; + irqchip_state.chip.ioapic = *state; + // Safe because we know that our file is a VM fd, we know the kernel will only read + // correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP(), &irqchip_state) }; + if ret == 0 { + Ok(()) + } else { + errno_result() + } + } + + /// Creates a PIT as per the KVM_CREATE_PIT2 ioctl. + /// + /// Note that this call can only succeed after a call to `Vm::create_irq_chip`. + pub fn create_pit(&self) -> Result<()> { + let pit_config = kvm_pit_config::default(); + // Safe because we know that our file is a VM fd, we know the kernel will only read the + // correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_CREATE_PIT2(), &pit_config) }; + if ret == 0 { + Ok(()) + } else { + errno_result() + } + } + + /// Retrieves the state of PIT by issuing KVM_GET_PIT2 ioctl. + /// + /// Note that this call can only succeed after a call to `Vm::create_pit`. + pub fn get_pit_state(&self) -> Result { + // Safe because we know that our file is a VM fd, we know the kernel will only write + // correct amount of memory to our pointer, and we verify the return result. + let mut pit_state = unsafe { std::mem::zeroed() }; + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_PIT2(), &mut pit_state) }; + if ret == 0 { + Ok(pit_state) + } else { + errno_result() + } + } + + /// Sets the state of PIT by issuing KVM_SET_PIT2 ioctl. + /// + /// Note that this call can only succeed after a call to `Vm::create_pit`. + pub fn set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()> { + // Safe because we know that our file is a VM fd, we know the kernel will only read + // correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_PIT2(), pit_state) }; + if ret == 0 { + Ok(()) + } else { + errno_result() + } + } } impl VmX86_64 for KvmVm { diff --git a/hypervisor/src/x86_64.rs b/hypervisor/src/x86_64.rs index c311924..859ebff 100644 --- a/hypervisor/src/x86_64.rs +++ b/hypervisor/src/x86_64.rs @@ -339,6 +339,6 @@ pub struct PitChannelState { pub bcd: bool, /// Value of the gate input pin. This only applies to channel 2. pub gate: bool, - /// Guest boot nanosecond timestamp of when the count value was loaded. + /// Nanosecond timestamp of when the count value was loaded. pub count_load_time: u64, } -- cgit 1.4.1 From 7c359d617f82361d03910a07ab54e17f07a54b23 Mon Sep 17 00:00:00 2001 From: Steven Richman Date: Wed, 13 May 2020 23:05:58 -0700 Subject: hypervisor: add Vm user memory region functions The separate Vm functions for MemoryMappings and MemoryMappingArenas have been combined and now use a MappedRegion trait that the mappings implement. msync_memory_region replaces the get_mmap_arena function, which is used by VmMsyncRequest. Since Vm uses mutexes for cloning, it can't return mem region references. BUG=chromium:1077058 TEST=cargo test, cargo test -p sys_util, cargo test -p hypervisor Change-Id: If257b16ee34d07820ae7ebdb9a3a598a41df013c Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2202845 Reviewed-by: Daniel Verkamp Reviewed-by: Gurchetan Singh Tested-by: kokoro Commit-Queue: Gurchetan Singh --- devices/src/pci/vfio_pci.rs | 2 +- hypervisor/src/kvm/mod.rs | 156 +++++++++++++++++++++++++++++++++++++++++-- hypervisor/src/lib.rs | 47 ++++++++----- kvm/src/lib.rs | 2 +- sys_util/src/guest_memory.rs | 2 +- sys_util/src/mmap.rs | 140 ++++++++++++++++++++++++-------------- vm_control/src/lib.rs | 6 +- 7 files changed, 276 insertions(+), 79 deletions(-) diff --git a/devices/src/pci/vfio_pci.rs b/devices/src/pci/vfio_pci.rs index 7b5c233..fa27dec 100644 --- a/devices/src/pci/vfio_pci.rs +++ b/devices/src/pci/vfio_pci.rs @@ -9,7 +9,7 @@ use std::u32; use kvm::Datamatch; use msg_socket::{MsgReceiver, MsgSender}; use resources::{Alloc, MmioType, SystemAllocator}; -use sys_util::{error, EventFd, MemoryMapping}; +use sys_util::{error, EventFd, MappedRegion, MemoryMapping}; use vfio_sys::*; use vm_control::{ diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index bf87e3d..abffdd5 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -15,18 +15,16 @@ use std::os::raw::{c_char, c_ulong}; use std::os::unix::io::{AsRawFd, RawFd}; use std::sync::Arc; -use libc::{open, O_CLOEXEC, O_RDWR}; +use libc::{open, EFAULT, EINVAL, EIO, ENOENT, ENOSPC, EOVERFLOW, O_CLOEXEC, O_RDWR}; use kvm_sys::*; use sync::Mutex; use sys_util::{ errno_result, ioctl, ioctl_with_ref, ioctl_with_val, AsRawDescriptor, Error, FromRawDescriptor, - GuestMemory, RawDescriptor, Result, SafeDescriptor, + GuestAddress, GuestMemory, MappedRegion, MmapError, RawDescriptor, Result, SafeDescriptor, }; -use crate::{ - ClockState, Hypervisor, HypervisorCap, MappedRegion, RunnableVcpu, Vcpu, VcpuExit, Vm, -}; +use crate::{ClockState, Hypervisor, HypervisorCap, RunnableVcpu, Vcpu, VcpuExit, Vm}; // Wrapper around KVM_SET_USER_MEMORY_REGION ioctl, which creates, modifies, or deletes a mapping // from guest physical to host user pages. @@ -153,7 +151,7 @@ impl KvmVm { index as u32, false, false, - guest_addr.offset() as u64, + guest_addr.offset(), size as u64, host_addr as *mut u8, ) @@ -215,6 +213,75 @@ impl Vm for KvmVm { &self.guest_mem } + fn add_memory_region( + &mut self, + guest_addr: GuestAddress, + mem: Box, + read_only: bool, + log_dirty_pages: bool, + ) -> Result { + let size = mem.size() as u64; + let end_addr = guest_addr.checked_add(size).ok_or(Error::new(EOVERFLOW))?; + if self.guest_mem.range_overlap(guest_addr, end_addr) { + return Err(Error::new(ENOSPC)); + } + let mut regions = self.mem_regions.lock(); + let mut gaps = self.mem_slot_gaps.lock(); + let slot = match gaps.pop() { + Some(gap) => gap.0, + None => (regions.len() + self.guest_mem.num_regions() as usize) as u32, + }; + + // Safe because we check that the given guest address is valid and has no overlaps. We also + // know that the pointer and size are correct because the MemoryMapping interface ensures + // this. We take ownership of the memory mapping so that it won't be unmapped until the slot + // is removed. + let res = unsafe { + set_user_memory_region( + &self.vm, + slot, + read_only, + log_dirty_pages, + guest_addr.offset() as u64, + size, + mem.as_ptr(), + ) + }; + + if let Err(e) = res { + gaps.push(MemSlot(slot)); + return Err(e); + } + regions.insert(slot, mem); + Ok(slot) + } + + fn msync_memory_region(&mut self, slot: u32, offset: usize, size: usize) -> Result<()> { + let mut regions = self.mem_regions.lock(); + let mem = regions.get_mut(&slot).ok_or(Error::new(ENOENT))?; + + mem.msync(offset, size).map_err(|err| match err { + MmapError::InvalidAddress => Error::new(EFAULT), + MmapError::NotPageAligned => Error::new(EINVAL), + MmapError::SystemCallFailed(e) => e, + _ => Error::new(EIO), + }) + } + + fn remove_memory_region(&mut self, slot: u32) -> Result<()> { + let mut regions = self.mem_regions.lock(); + if !regions.contains_key(&slot) { + return Err(Error::new(ENOENT)); + } + // Safe because the slot is checked against the list of memory slots. + unsafe { + set_user_memory_region(&self.vm, slot, false, false, 0, 0, std::ptr::null_mut())?; + } + self.mem_slot_gaps.lock().push(MemSlot(slot)); + regions.remove(&slot); + Ok(()) + } + fn get_pvclock(&self) -> Result { self.get_pvclock_arch() } @@ -303,7 +370,7 @@ impl<'a> TryFrom<&'a HypervisorCap> for KvmCap { mod tests { use super::*; use std::thread; - use sys_util::GuestAddress; + use sys_util::{GuestAddress, MemoryMapping, MemoryMappingArena}; #[test] fn new() { @@ -354,4 +421,79 @@ mod tests { let read_val: u8 = vm.get_memory().read_obj_from_addr(obj_addr).unwrap(); assert_eq!(read_val, 67u8); } + + #[test] + fn add_memory() { + let kvm = Kvm::new().unwrap(); + let gm = + GuestMemory::new(&[(GuestAddress(0), 0x1000), (GuestAddress(0x5000), 0x5000)]).unwrap(); + let mut vm = KvmVm::new(&kvm, gm).unwrap(); + let mem_size = 0x1000; + let mem = MemoryMapping::new(mem_size).unwrap(); + vm.add_memory_region(GuestAddress(0x1000), Box::new(mem), false, false) + .unwrap(); + let mem = MemoryMapping::new(mem_size).unwrap(); + vm.add_memory_region(GuestAddress(0x10000), Box::new(mem), false, false) + .unwrap(); + } + + #[test] + fn add_memory_ro() { + let kvm = Kvm::new().unwrap(); + let gm = GuestMemory::new(&[(GuestAddress(0), 0x1000)]).unwrap(); + let mut vm = KvmVm::new(&kvm, gm).unwrap(); + let mem_size = 0x1000; + let mem = MemoryMapping::new(mem_size).unwrap(); + vm.add_memory_region(GuestAddress(0x1000), Box::new(mem), true, false) + .unwrap(); + } + + #[test] + fn remove_memory() { + let kvm = Kvm::new().unwrap(); + let gm = GuestMemory::new(&[(GuestAddress(0), 0x1000)]).unwrap(); + let mut vm = KvmVm::new(&kvm, gm).unwrap(); + let mem_size = 0x1000; + let mem = MemoryMapping::new(mem_size).unwrap(); + let slot = vm + .add_memory_region(GuestAddress(0x1000), Box::new(mem), false, false) + .unwrap(); + vm.remove_memory_region(slot).unwrap(); + } + + #[test] + fn remove_invalid_memory() { + let kvm = Kvm::new().unwrap(); + let gm = GuestMemory::new(&[(GuestAddress(0), 0x1000)]).unwrap(); + let mut vm = KvmVm::new(&kvm, gm).unwrap(); + assert!(vm.remove_memory_region(0).is_err()); + } + + #[test] + fn overlap_memory() { + let kvm = Kvm::new().unwrap(); + let gm = GuestMemory::new(&[(GuestAddress(0), 0x10000)]).unwrap(); + let mut vm = KvmVm::new(&kvm, gm).unwrap(); + let mem_size = 0x2000; + let mem = MemoryMapping::new(mem_size).unwrap(); + assert!(vm + .add_memory_region(GuestAddress(0x2000), Box::new(mem), false, false) + .is_err()); + } + + #[test] + fn sync_memory() { + let kvm = Kvm::new().unwrap(); + let gm = + GuestMemory::new(&[(GuestAddress(0), 0x1000), (GuestAddress(0x5000), 0x5000)]).unwrap(); + let mut vm = KvmVm::new(&kvm, gm).unwrap(); + let mem_size = 0x1000; + let mem = MemoryMappingArena::new(mem_size).unwrap(); + let slot = vm + .add_memory_region(GuestAddress(0x1000), Box::new(mem), false, false) + .unwrap(); + vm.msync_memory_region(slot, mem_size, 0).unwrap(); + assert!(vm.msync_memory_region(slot, mem_size + 1, 0).is_err()); + assert!(vm.msync_memory_region(slot + 1, mem_size, 0).is_err()); + } } diff --git a/hypervisor/src/lib.rs b/hypervisor/src/lib.rs index ee0bcaf..f098720 100644 --- a/hypervisor/src/lib.rs +++ b/hypervisor/src/lib.rs @@ -12,7 +12,7 @@ pub mod x86_64; use std::ops::{Deref, DerefMut}; -use sys_util::{GuestMemory, Result}; +use sys_util::{GuestAddress, GuestMemory, MappedRegion, Result}; #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] pub use crate::aarch64::*; @@ -34,6 +34,35 @@ pub trait Vm: Send + Sized { /// Gets the guest-mapped memory for the Vm. fn get_memory(&self) -> &GuestMemory; + /// Inserts the given `MappedRegion` into the VM's address space at `guest_addr`. + /// + /// The slot that was assigned the memory mapping is returned on success. The slot can be given + /// to `Vm::remove_memory_region` to remove the memory from the VM's address space and take back + /// ownership of `mem_region`. + /// + /// Note that memory inserted into the VM's address space must not overlap with any other memory + /// slot's region. + /// + /// If `read_only` is true, the guest will be able to read the memory as normal, but attempts to + /// write will trigger a mmio VM exit, leaving the memory untouched. + /// + /// If `log_dirty_pages` is true, the slot number can be used to retrieve the pages written to + /// by the guest with `get_dirty_log`. + fn add_memory_region( + &mut self, + guest_addr: GuestAddress, + mem_region: Box, + read_only: bool, + log_dirty_pages: bool, + ) -> Result; + + /// Does a synchronous msync of the memory mapped at `slot`, syncing `size` bytes starting at + /// `offset` from the start of the region. `offset` must be page aligned. + fn msync_memory_region(&mut self, slot: u32, offset: usize, size: usize) -> Result<()>; + + /// Removes and drops the `UserMemoryRegion` that was previously added at the given slot. + fn remove_memory_region(&mut self, slot: u32) -> Result<()>; + /// Retrieves the current timestamp of the paravirtual clock as seen by the current guest. /// Only works on VMs that support `VmCap::PvClock`. fn get_pvclock(&self) -> Result; @@ -69,22 +98,6 @@ pub trait RunnableVcpu: Deref::Vcpu> + DerefMut fn run(&self) -> Result; } -/// A memory region in the current process that can be mapped into the guest's memory. -/// -/// Safe when implementers guarantee `ptr`..`ptr+size` is an mmaped region owned by this object that -/// can't be unmapped during the `MappedRegion`'s lifetime. -pub unsafe trait MappedRegion: Send + Sync { - /// Returns a pointer to the beginning of the memory region. Should only be - /// used for passing this region to ioctls for setting guest memory. - fn as_ptr(&self) -> *mut u8; - - /// Returns the size of the memory region in bytes. - fn size(&self) -> usize; - - /// Flushes changes to this memory region to the backing file. - fn msync(&self) -> Result<()>; -} - /// A reason why a VCPU exited. One of these returns every time `Vcpu::run` is called. #[derive(Debug)] pub enum VcpuExit { diff --git a/kvm/src/lib.rs b/kvm/src/lib.rs index 1b06b39..3695f47 100644 --- a/kvm/src/lib.rs +++ b/kvm/src/lib.rs @@ -28,7 +28,7 @@ use msg_socket::MsgOnSocket; use sys_util::{ block_signal, ioctl, ioctl_with_mut_ptr, ioctl_with_mut_ref, ioctl_with_ptr, ioctl_with_ref, ioctl_with_val, pagesize, signal, unblock_signal, warn, Error, EventFd, GuestAddress, - GuestMemory, MemoryMapping, MemoryMappingArena, Result, SIGRTMIN, + GuestMemory, MappedRegion, MemoryMapping, MemoryMappingArena, Result, SIGRTMIN, }; pub use crate::cap::*; diff --git a/sys_util/src/guest_memory.rs b/sys_util/src/guest_memory.rs index 60b775a..b13b8cc 100644 --- a/sys_util/src/guest_memory.rs +++ b/sys_util/src/guest_memory.rs @@ -13,7 +13,7 @@ use std::result; use std::sync::Arc; use crate::guest_address::GuestAddress; -use crate::mmap::{self, MemoryMapping}; +use crate::mmap::{self, MappedRegion, MemoryMapping}; use crate::shm::{MemfdSeals, SharedMemory}; use crate::{errno, pagesize}; use data_model::volatile_memory::*; diff --git a/sys_util/src/mmap.rs b/sys_util/src/mmap.rs index 64ffe17..0a67d88 100644 --- a/sys_util/src/mmap.rs +++ b/sys_util/src/mmap.rs @@ -105,6 +105,58 @@ impl Into for Protection { } } +/// Validates that `offset`..`offset+range_size` lies within the bounds of a memory mapping of +/// `mmap_size` bytes. Also checks for any overflow. +fn validate_includes_range(mmap_size: usize, offset: usize, range_size: usize) -> Result<()> { + // Ensure offset + size doesn't overflow + let end_offset = offset + .checked_add(range_size) + .ok_or(Error::InvalidAddress)?; + // Ensure offset + size are within the mapping bounds + if end_offset <= mmap_size { + Ok(()) + } else { + Err(Error::InvalidAddress) + } +} + +/// A range of memory that can be msynced, for abstracting over different types of memory mappings. +/// +/// Safe when implementers guarantee `ptr`..`ptr+size` is an mmaped region owned by this object that +/// can't be unmapped during the `MappedRegion`'s lifetime. +pub unsafe trait MappedRegion: Send + Sync { + /// Returns a pointer to the beginning of the memory region. Should only be + /// used for passing this region to ioctls for setting guest memory. + fn as_ptr(&self) -> *mut u8; + + /// Returns the size of the memory region in bytes. + fn size(&self) -> usize; +} + +impl dyn MappedRegion { + /// Calls msync with MS_SYNC on a mapping of `size` bytes starting at `offset` from the start of + /// the region. `offset`..`offset+size` must be contained within the `MappedRegion`. + pub fn msync(&self, offset: usize, size: usize) -> Result<()> { + validate_includes_range(self.size(), offset, size)?; + + // Safe because the MemoryMapping/MemoryMappingArena interface ensures our pointer and size + // are correct, and we've validated that `offset`..`offset+size` is in the range owned by + // this `MappedRegion`. + let ret = unsafe { + libc::msync( + (self.as_ptr() as usize + offset) as *mut libc::c_void, + size, + libc::MS_SYNC, + ) + }; + if ret != -1 { + Ok(()) + } else { + Err(Error::SystemCallFailed(errno::Error::last())) + } + } +} + /// Wraps an anonymous shared memory mapping in the current process. #[derive(Debug)] pub struct MemoryMapping { @@ -314,17 +366,6 @@ impl MemoryMapping { }) } - /// Returns a pointer to the beginning of the memory region. Should only be - /// used for passing this region to ioctls for setting guest memory. - pub fn as_ptr(&self) -> *mut u8 { - self.addr - } - - /// Returns the size of the memory region in bytes. - pub fn size(&self) -> usize { - self.size - } - /// Calls msync with MS_SYNC on the mapping. pub fn msync(&self) -> Result<()> { // This is safe since we use the exact address and length of a known @@ -607,6 +648,18 @@ impl MemoryMapping { } } +// Safe because the pointer and size point to a memory range owned by this MemoryMapping that won't +// be unmapped until it's Dropped. +unsafe impl MappedRegion for MemoryMapping { + fn as_ptr(&self) -> *mut u8 { + self.addr + } + + fn size(&self) -> usize { + self.size + } +} + impl VolatileMemory for MemoryMapping { fn get_slice(&self, offset: usize, count: usize) -> VolatileMemoryResult { let mem_end = calc_offset(offset, count)?; @@ -732,7 +785,11 @@ impl MemoryMappingArena { prot: Protection, fd: Option<(&dyn AsRawFd, u64)>, ) -> Result<()> { - self.validate_range(offset, size)?; + // Ensure offset is page-aligned + if offset % pagesize() != 0 { + return Err(Error::NotPageAligned); + } + validate_includes_range(self.size(), offset, size)?; // This is safe since the range has been validated. let mmap = unsafe { @@ -762,50 +819,18 @@ impl MemoryMappingArena { pub fn remove(&mut self, offset: usize, size: usize) -> Result<()> { self.try_add(offset, size, Protection::read(), None) } +} - /// Calls msync with MS_SYNC on a mapping of `size` bytes starting at `offset` from the start of - /// the arena. `offset` must be page aligned. - pub fn msync(&self, offset: usize, size: usize) -> Result<()> { - self.validate_range(offset, size)?; - - // Safe because we've validated that this memory range is owned by this `MemoryMappingArena`. - let ret = - unsafe { libc::msync(self.addr as *mut libc::c_void, self.size(), libc::MS_SYNC) }; - if ret == -1 { - return Err(Error::SystemCallFailed(errno::Error::last())); - } - Ok(()) - } - - /// Returns a pointer to the beginning of the memory region. Should only be - /// used for passing this region to ioctls for setting guest memory. - pub fn as_ptr(&self) -> *mut u8 { +// Safe because the pointer and size point to a memory range owned by this MemoryMappingArena that +// won't be unmapped until it's Dropped. +unsafe impl MappedRegion for MemoryMappingArena { + fn as_ptr(&self) -> *mut u8 { self.addr } - /// Returns the size of the memory region in bytes. - pub fn size(&self) -> usize { + fn size(&self) -> usize { self.size } - - /// Validates `offset` and `size`. - /// Checks that offset..offset+size doesn't overlap with existing mappings. - /// Also ensures correct alignment, and checks for any overflow. - /// Note: offset..offset+size is considered valid if it _perfectly_ overlaps - /// with single other region. - fn validate_range(&self, offset: usize, size: usize) -> Result<()> { - // Ensure offset is page-aligned - if offset % pagesize() != 0 { - return Err(Error::NotPageAligned); - } - // Ensure offset + size doesn't overflow - let end_offset = offset.checked_add(size).ok_or(Error::InvalidAddress)?; - // Ensure offset + size are within the arena bounds - if end_offset > self.size { - return Err(Error::InvalidAddress); - } - Ok(()) - } } impl From for MemoryMappingArena { @@ -1021,4 +1046,19 @@ mod tests { m.remove(0, ps - 1) .expect("failed to remove unaligned mapping"); } + + #[test] + fn arena_msync() { + let size = 0x40000; + let m = MemoryMappingArena::new(size).unwrap(); + let ps = pagesize(); + MappedRegion::msync(&m, 0, ps).unwrap(); + MappedRegion::msync(&m, 0, size).unwrap(); + MappedRegion::msync(&m, ps, size - ps).unwrap(); + let res = MappedRegion::msync(&m, ps, size).unwrap_err(); + match res { + Error::InvalidAddress => {} + e => panic!("unexpected error: {}", e), + } + } } diff --git a/vm_control/src/lib.rs b/vm_control/src/lib.rs index 81a11d3..1d06e4b 100644 --- a/vm_control/src/lib.rs +++ b/vm_control/src/lib.rs @@ -21,7 +21,9 @@ use libc::{EINVAL, EIO, ENODEV}; use kvm::{IrqRoute, IrqSource, Vm}; use msg_socket::{MsgError, MsgOnSocket, MsgReceiver, MsgResult, MsgSender, MsgSocket}; use resources::{Alloc, GpuMemoryDesc, MmioType, SystemAllocator}; -use sys_util::{error, Error as SysError, EventFd, GuestAddress, MemoryMapping, MmapError, Result}; +use sys_util::{ + error, Error as SysError, EventFd, GuestAddress, MappedRegion, MemoryMapping, MmapError, Result, +}; /// A file descriptor either borrowed or owned by this. #[derive(Debug)] @@ -481,7 +483,7 @@ impl VmMsyncRequest { match *self { MsyncArena { slot, offset, size } => { if let Some(arena) = vm.get_mmap_arena(slot) { - match arena.msync(offset, size) { + match MappedRegion::msync(arena, offset, size) { Ok(()) => VmMsyncResponse::Ok, Err(e) => match e { MmapError::SystemCallFailed(errno) => VmMsyncResponse::Err(errno), -- cgit 1.4.1 From 97d6359febdabc2f44d5ab404a0cd41e65172163 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Wed, 3 Jun 2020 17:40:12 +0900 Subject: seccomp: add policy file video_device on ARM. BUG=b:151399776 BUG=b:151394062 TEST=Video device is properly probed with policy enabled on a guest kernel with VIRTIO_VIDEO enabled. Change-Id: Ia29afa0ab3eb969291c046d8657cd28e88d54b96 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2230418 Reviewed-by: Keiichi Watanabe Reviewed-by: Chirantan Ekbote Tested-by: Alexandre Courbot Commit-Queue: Alexandre Courbot --- seccomp/arm/video_device.policy | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 seccomp/arm/video_device.policy diff --git a/seccomp/arm/video_device.policy b/seccomp/arm/video_device.policy new file mode 100644 index 0000000..f8a722d --- /dev/null +++ b/seccomp/arm/video_device.policy @@ -0,0 +1,25 @@ +# Copyright 2020 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +@include /usr/share/policy/crosvm/common_device.policy + +# Syscalls specific to video devices. +clock_getres: 1 +clock_gettime: 1 +connect: 1 +fcntl64: arg1 == F_GETFL || arg1 == F_SETFL || arg1 == F_DUPFD_CLOEXEC || arg1 == F_GETFD || arg1 == F_SETFD +getegid32: 1 +geteuid32: 1 +getgid32: 1 +getresgid32: 1 +getresuid32: 1 +getsockname: 1 +getuid32: 1 +# ioctl: arg1 == DRM_IOCTL_* +ioctl: arg1 & 0x6400 +openat: 1 +send: 1 +setpriority: 1 +socket: arg0 == AF_UNIX +stat64: 1 -- cgit 1.4.1 From 4ffb3d06bdadcfcb79ee2b7bf445ac09da34c218 Mon Sep 17 00:00:00 2001 From: Keiichi Watanabe Date: Wed, 13 May 2020 19:43:02 +0900 Subject: devices: video: dec: Support arbitrary buffers to be mapped as resources Support a case where a guest client who may use arbitrary numbers of buffers. (e.g. C2V4L2Component with default pool in ARCVM) Such a client is valid as long as it uses at most 32 buffers at the same time. More specifically, this CL allows the guest to call ResourceCreate for an output resource_id which was already processed by the host. Such ResourceCreate calls will be handled as reassignment of DMAbuf to a FrameBufferId. BUG=b:157702336 TEST=Play a YouTube video on ARCVM w/ C2V4L2Component using default pool Change-Id: Ie9c457867abd91b6b7a17a5bca4a1a1e9f53c1ae Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2198327 Reviewed-by: Alexandre Courbot Tested-by: Keiichi Watanabe Commit-Queue: Keiichi Watanabe --- devices/src/virtio/video/decoder/mod.rs | 85 +++++++++++++++++++++++---------- 1 file changed, 59 insertions(+), 26 deletions(-) diff --git a/devices/src/virtio/video/decoder/mod.rs b/devices/src/virtio/video/decoder/mod.rs index 3516cad..1dfd35c 100644 --- a/devices/src/virtio/video/decoder/mod.rs +++ b/devices/src/virtio/video/decoder/mod.rs @@ -5,7 +5,7 @@ //! Implementation of a virtio video decoder device backed by LibVDA. use std::collections::btree_map::Entry; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, BTreeSet}; use std::convert::TryInto; use std::fs::File; use std::os::unix::io::{AsRawFd, IntoRawFd}; @@ -42,6 +42,8 @@ type FrameBufferId = i32; type ResourceHandle = u32; type Timestamp = u64; +const OUTPUT_BUFFER_COUNT: usize = 32; + // Represents queue types of pending Clear commands if exist. #[derive(Default)] struct PendingClearCmds { @@ -64,9 +66,14 @@ struct Context { res_id_to_res_handle: BTreeMap, // OutputResourceId <-> FrameBufferId + // TODO: Encapsulate the following two maps as a type of a bijective map. res_id_to_frame_buf_id: BTreeMap, frame_buf_id_to_res_id: BTreeMap, + // Stores free FrameBufferIds which `use_output_buffer()` or `reuse_output_buffer()` can be + // called with. + available_frame_buf_ids: BTreeSet, + keep_resources: Vec, // Stores queue types of pending Clear commands if exist. @@ -127,12 +134,31 @@ impl Context { self.res_id_to_res_handle.insert(resource_id, handle); } - fn register_queued_frame_buffer(&mut self, resource_id: OutputResourceId) -> FrameBufferId { - // Generate a new FrameBufferId - let id = self.res_id_to_frame_buf_id.len() as FrameBufferId; + fn register_queued_frame_buffer( + &mut self, + resource_id: OutputResourceId, + ) -> VideoResult { + // Use the first free frame buffer id. + let id = *self.available_frame_buf_ids.iter().next().ok_or_else(|| { + error!( + "no frame buffer ID is available for resource_id {}", + resource_id + ); + VideoError::InvalidOperation + })?; + self.available_frame_buf_ids.remove(&id); + + // Invalidate old entries for `resource_id` and `id` from two maps to keep them bijective. + if let Some(old_frame_buf_id) = self.res_id_to_frame_buf_id.remove(&resource_id) { + self.frame_buf_id_to_res_id.remove(&old_frame_buf_id); + } + if let Some(old_res_id) = self.frame_buf_id_to_res_id.remove(&id) { + self.res_id_to_frame_buf_id.remove(&old_res_id); + } + self.res_id_to_frame_buf_id.insert(resource_id, id); self.frame_buf_id_to_res_id.insert(id, resource_id); - id + Ok(id) } fn reset(&mut self) { @@ -200,6 +226,9 @@ impl Context { right: i32, bottom: i32, ) -> Option { + // `buffer_id` becomes available for another frame. + self.available_frame_buf_ids.insert(buffer_id); + let plane_size = ((right - left) * (bottom - top)) as u32; for fmt in self.out_params.plane_formats.iter_mut() { fmt.plane_size = plane_size; @@ -239,6 +268,10 @@ impl Context { } else if self.pending_clear_cmds.output { self.pending_clear_cmds.output = false; + // Reinitialize `available_frame_buf_ids`. + self.available_frame_buf_ids.clear(); + self.available_frame_buf_ids = (0..OUTPUT_BUFFER_COUNT as i32).collect(); + Some(QueueType::Output) } else { error!("unexpected ResetResponse"); @@ -426,8 +459,6 @@ impl<'a> Decoder<'a> { // first time. let mut ctx = self.contexts.get_mut(&stream_id)?; if !ctx.set_output_buffer_count { - const OUTPUT_BUFFER_COUNT: usize = 32; - // Set the buffer count to the maximum value. // TODO(b/1518105): This is a hack due to the lack of way of telling a number of // frame buffers explictly in virtio-video v3 RFC. Once we have the way, @@ -436,20 +467,18 @@ impl<'a> Decoder<'a> { .get(&stream_id)? .set_output_buffer_count(OUTPUT_BUFFER_COUNT) .map_err(VideoError::VdaError)?; + + // FrameBufferId must be in the range of 0.. ((# of buffers) - 1). + ctx.available_frame_buf_ids = (0..OUTPUT_BUFFER_COUNT as i32).collect(); + ctx.set_output_buffer_count = true; } - // We assume ResourceCreate is not called to an output resource that is already - // imported to Chrome for now. - // TODO(keiichiw): We need to support this case for a guest client who may use - // arbitrary numbers of buffers. (e.g. C2V4L2Component in ARCVM) - // Such a client is valid as long as it uses at most 32 buffers at the same time. + // If the given resource_id was associated with an old frame_buf_id, + // dissociate them. if let Some(frame_buf_id) = ctx.res_id_to_frame_buf_id.get(&resource_id) { - error!( - "resource {} has already been imported to Chrome as a frame buffer {}", - resource_id, frame_buf_id - ); - return Err(VideoError::InvalidOperation); + ctx.frame_buf_id_to_res_id.remove(&frame_buf_id); + ctx.res_id_to_frame_buf_id.remove(&resource_id); } Ok(()) @@ -545,14 +574,18 @@ impl<'a> Decoder<'a> { // In case a given resource has been imported to VDA, call // `session.reuse_output_buffer()` and return a response. // Otherwise, `session.use_output_buffer()` will be called below. - match ctx.res_id_to_frame_buf_id.get(&resource_id) { - Some(buffer_id) => { - session - .reuse_output_buffer(*buffer_id) - .map_err(VideoError::VdaError)?; - return Ok(()); - } - None => (), + if let Some(buffer_id) = ctx.res_id_to_frame_buf_id.get(&resource_id) { + if !ctx.available_frame_buf_ids.remove(&buffer_id) { + error!( + "resource_id {} is associated with VDA's frame_buffer id {}, which is in use or out of range.", + resource_id, buffer_id + ); + return Err(VideoError::InvalidOperation); + } + session + .reuse_output_buffer(*buffer_id) + .map_err(VideoError::VdaError)?; + return Ok(()); }; let resource_info = ctx.get_resource_info(resource_bridge, resource_id)?; @@ -579,7 +612,7 @@ impl<'a> Decoder<'a> { let buffer_id = self .contexts .get_mut(&stream_id)? - .register_queued_frame_buffer(resource_id); + .register_queued_frame_buffer(resource_id)?; session .use_output_buffer(buffer_id as i32, libvda::PixelFormat::NV12, fd, &planes) -- cgit 1.4.1 From 173fe62df2b82f4d09a36066200f0a1727bd1d22 Mon Sep 17 00:00:00 2001 From: Gurchetan Singh Date: Thu, 21 May 2020 18:05:06 -0700 Subject: kvm: use MappedRegion trait - Reduces code duplication between MMIO and mmap arenas - Makes adding future types easier - Makes upcoming deprecation of kvm crate easier - Use BTreeMap instead of HashMap since it's more efficient BUG=chromium:924405 TEST=compile and test Change-Id: I520abed0926489e64aac046e0dc0cfeb72fae7b2 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2216446 Tested-by: Gurchetan Singh Tested-by: kokoro Commit-Queue: Gurchetan Singh Reviewed-by: Steven Richman Reviewed-by: Daniel Verkamp Auto-Submit: Gurchetan Singh --- Cargo.lock | 1 + arch/src/pstore.rs | 9 +- hypervisor/src/kvm/mod.rs | 6 +- kvm/Cargo.toml | 1 + kvm/src/lib.rs | 209 ++++++++++++------------------------------ kvm/tests/dirty_log.rs | 8 +- kvm/tests/read_only_memory.rs | 11 ++- src/linux.rs | 4 +- src/plugin/mod.rs | 2 +- src/plugin/process.rs | 3 +- vm_control/src/lib.rs | 30 ++---- 11 files changed, 98 insertions(+), 186 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7a9a5c4..47e937b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -390,6 +390,7 @@ dependencies = [ "kvm_sys 0.1.0", "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", "msg_socket 0.1.0", + "sync 0.1.0", "sys_util 0.1.0", ] diff --git a/arch/src/pstore.rs b/arch/src/pstore.rs index a06ea1b..a6c5e61 100644 --- a/arch/src/pstore.rs +++ b/arch/src/pstore.rs @@ -64,8 +64,13 @@ pub fn create_memory_region( let memory_mapping = MemoryMapping::from_fd(&file, pstore.size as usize).map_err(Error::MmapError)?; - vm.add_mmio_memory(GuestAddress(address), memory_mapping, false, false) - .map_err(Error::SysUtilError)?; + vm.add_memory_region( + GuestAddress(address), + Box::new(memory_mapping), + false, + false, + ) + .map_err(Error::SysUtilError)?; Ok(RamoopsRegion { address, diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index abffdd5..c738cfa 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -8,7 +8,7 @@ mod aarch64; mod x86_64; use std::cmp::Ordering; -use std::collections::{BinaryHeap, HashMap}; +use std::collections::{BTreeMap, BinaryHeap}; use std::convert::TryFrom; use std::ops::{Deref, DerefMut}; use std::os::raw::{c_char, c_ulong}; @@ -128,7 +128,7 @@ impl PartialOrd for MemSlot { pub struct KvmVm { vm: SafeDescriptor, guest_mem: GuestMemory, - mem_regions: Arc>>>, + mem_regions: Arc>>>, mem_slot_gaps: Arc>>, } @@ -161,7 +161,7 @@ impl KvmVm { Ok(KvmVm { vm: vm_descriptor, guest_mem, - mem_regions: Arc::new(Mutex::new(HashMap::new())), + mem_regions: Arc::new(Mutex::new(BTreeMap::new())), mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())), }) } diff --git a/kvm/Cargo.toml b/kvm/Cargo.toml index f784ae7..9a6feee 100644 --- a/kvm/Cargo.toml +++ b/kvm/Cargo.toml @@ -10,3 +10,4 @@ kvm_sys = { path = "../kvm_sys" } libc = "*" msg_socket = { path = "../msg_socket" } sys_util = { path = "../sys_util" } +sync = { path = "../sync" } diff --git a/kvm/src/lib.rs b/kvm/src/lib.rs index 3695f47..a05bde7 100644 --- a/kvm/src/lib.rs +++ b/kvm/src/lib.rs @@ -8,18 +8,20 @@ mod cap; use std::cell::RefCell; use std::cmp::{min, Ordering}; -use std::collections::{BinaryHeap, HashMap}; +use std::collections::{BTreeMap, BinaryHeap}; use std::fs::File; use std::mem::size_of; use std::ops::{Deref, DerefMut}; use std::os::raw::*; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::ptr::copy_nonoverlapping; +use std::sync::Arc; +use sync::Mutex; use data_model::vec_with_array_field; use libc::sigset_t; -use libc::{open, EBUSY, EINVAL, ENOENT, ENOSPC, EOVERFLOW, O_CLOEXEC, O_RDWR}; +use libc::{open, EBUSY, EFAULT, EINVAL, EIO, ENOENT, ENOSPC, EOVERFLOW, O_CLOEXEC, O_RDWR}; use kvm_sys::*; @@ -28,7 +30,7 @@ use msg_socket::MsgOnSocket; use sys_util::{ block_signal, ioctl, ioctl_with_mut_ptr, ioctl_with_mut_ref, ioctl_with_ptr, ioctl_with_ref, ioctl_with_val, pagesize, signal, unblock_signal, warn, Error, EventFd, GuestAddress, - GuestMemory, MappedRegion, MemoryMapping, MemoryMappingArena, Result, SIGRTMIN, + GuestMemory, MappedRegion, MemoryMapping, MmapError, Result, SIGRTMIN, }; pub use crate::cap::*; @@ -307,9 +309,8 @@ impl PartialOrd for MemSlot { pub struct Vm { vm: File, guest_mem: GuestMemory, - mmio_memory: HashMap, - mmap_arenas: HashMap, - mem_slot_gaps: BinaryHeap, + mem_regions: Arc>>>, + mem_slot_gaps: Arc>>, #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] routes: Vec, } @@ -341,9 +342,8 @@ impl Vm { Ok(Vm { vm: vm_file, guest_mem, - mmio_memory: HashMap::new(), - mmap_arenas: HashMap::new(), - mem_slot_gaps: BinaryHeap::new(), + mem_regions: Arc::new(Mutex::new(BTreeMap::new())), + mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())), #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] routes: kvm_default_irq_routing_table(), }) @@ -352,49 +352,6 @@ impl Vm { } } - // Helper method for `set_user_memory_region` that tracks available slots. - unsafe fn set_user_memory_region( - &mut self, - read_only: bool, - log_dirty_pages: bool, - guest_addr: u64, - memory_size: u64, - userspace_addr: *mut u8, - ) -> Result { - let slot = match self.mem_slot_gaps.pop() { - Some(gap) => gap.0, - None => { - (self.mmio_memory.len() - + self.guest_mem.num_regions() as usize - + self.mmap_arenas.len()) as u32 - } - }; - - let res = set_user_memory_region( - &self.vm, - slot, - read_only, - log_dirty_pages, - guest_addr, - memory_size, - userspace_addr, - ); - match res { - Ok(_) => Ok(slot), - Err(e) => { - self.mem_slot_gaps.push(MemSlot(slot)); - Err(e) - } - } - } - - // Helper method for `set_user_memory_region` that tracks available slots. - unsafe fn remove_user_memory_region(&mut self, slot: u32) -> Result<()> { - set_user_memory_region(&self.vm, slot, false, false, 0, 0, std::ptr::null_mut())?; - self.mem_slot_gaps.push(MemSlot(slot)); - Ok(()) - } - /// Checks if a particular `Cap` is available. /// /// This is distinct from the `Kvm` version of this method because the some extensions depend on @@ -406,10 +363,10 @@ impl Vm { unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), c as c_ulong) == 1 } } - /// Inserts the given `MemoryMapping` into the VM's address space at `guest_addr`. + /// Inserts the given `mem` into the VM's address space at `guest_addr`. /// - /// The slot that was assigned the mmio memory mapping is returned on success. The slot can be - /// given to `Vm::remove_mmio_memory` to remove the memory from the VM's address space and + /// The slot that was assigned the kvm memory mapping is returned on success. The slot can be + /// given to `Vm::remove_memory_region` to remove the memory from the VM's address space and /// take back ownership of `mem`. /// /// Note that memory inserted into the VM's address space must not overlap with any other memory @@ -420,10 +377,10 @@ impl Vm { /// /// If `log_dirty_pages` is true, the slot number can be used to retrieve the pages written to /// by the guest with `get_dirty_log`. - pub fn add_mmio_memory( + pub fn add_memory_region( &mut self, guest_addr: GuestAddress, - mem: MemoryMapping, + mem: Box, read_only: bool, log_dirty_pages: bool, ) -> Result { @@ -432,105 +389,64 @@ impl Vm { if self.guest_mem.range_overlap(guest_addr, end_addr) { return Err(Error::new(ENOSPC)); } + let mut regions = self.mem_regions.lock(); + let mut gaps = self.mem_slot_gaps.lock(); + let slot = match gaps.pop() { + Some(gap) => gap.0, + None => (regions.len() + self.guest_mem.num_regions() as usize) as u32, + }; // Safe because we check that the given guest address is valid and has no overlaps. We also // know that the pointer and size are correct because the MemoryMapping interface ensures // this. We take ownership of the memory mapping so that it won't be unmapped until the slot // is removed. - let slot = unsafe { - self.set_user_memory_region( + let res = unsafe { + set_user_memory_region( + &self.vm, + slot, read_only, log_dirty_pages, guest_addr.offset() as u64, size, mem.as_ptr(), - )? + ) }; - self.mmio_memory.insert(slot, mem); + if let Err(e) = res { + gaps.push(MemSlot(slot)); + return Err(e); + } + regions.insert(slot, mem); Ok(slot) } - /// Removes mmio memory that was previously added at the given slot. + /// Removes memory that was previously added at the given slot. /// /// Ownership of the host memory mapping associated with the given slot is returned on success. - pub fn remove_mmio_memory(&mut self, slot: u32) -> Result { - if self.mmio_memory.contains_key(&slot) { - // Safe because the slot is checked against the list of mmio memory slots. - unsafe { - self.remove_user_memory_region(slot)?; - } - // Safe to unwrap since map is checked to contain key - Ok(self.mmio_memory.remove(&slot).unwrap()) - } else { - Err(Error::new(ENOENT)) + pub fn remove_memory_region(&mut self, slot: u32) -> Result<()> { + let mut regions = self.mem_regions.lock(); + if !regions.contains_key(&slot) { + return Err(Error::new(ENOENT)); } - } - - /// Inserts the given `MemoryMappingArena` into the VM's address space at `guest_addr`. - /// - /// The slot that was assigned the mmio memory mapping is returned on success. The slot can be - /// given to `Vm::remove_mmap_arena` to remove the memory from the VM's address space and - /// take back ownership of `mmap_arena`. - /// - /// Note that memory inserted into the VM's address space must not overlap with any other memory - /// slot's region. - /// - /// If `read_only` is true, the guest will be able to read the memory as normal, but attempts to - /// write will trigger a mmio VM exit, leaving the memory untouched. - /// - /// If `log_dirty_pages` is true, the slot number can be used to retrieve the pages written to - /// by the guest with `get_dirty_log`. - pub fn add_mmap_arena( - &mut self, - guest_addr: GuestAddress, - mmap_arena: MemoryMappingArena, - read_only: bool, - log_dirty_pages: bool, - ) -> Result { - let size = mmap_arena.size() as u64; - let end_addr = guest_addr.checked_add(size).ok_or(Error::new(EOVERFLOW))?; - if self.guest_mem.range_overlap(guest_addr, end_addr) { - return Err(Error::new(ENOSPC)); + // Safe because the slot is checked against the list of memory slots. + unsafe { + set_user_memory_region(&self.vm, slot, false, false, 0, 0, std::ptr::null_mut())?; } - - // Safe because we check that the given guest address is valid and has no overlaps. We also - // know that the pointer and size are correct because the MemoryMapping interface ensures - // this. We take ownership of the memory mapping so that it won't be unmapped until the slot - // is removed. - let slot = unsafe { - self.set_user_memory_region( - read_only, - log_dirty_pages, - guest_addr.offset() as u64, - size, - mmap_arena.as_ptr(), - )? - }; - self.mmap_arenas.insert(slot, mmap_arena); - - Ok(slot) + self.mem_slot_gaps.lock().push(MemSlot(slot)); + regions.remove(&slot); + Ok(()) } - /// Removes memory map arena that was previously added at the given slot. - /// - /// Ownership of the host memory mapping associated with the given slot is returned on success. - pub fn remove_mmap_arena(&mut self, slot: u32) -> Result { - if self.mmap_arenas.contains_key(&slot) { - // Safe because the slot is checked against the list of mmio memory slots. - unsafe { - self.remove_user_memory_region(slot)?; - } - // Safe to unwrap since map is checked to contain key - Ok(self.mmap_arenas.remove(&slot).unwrap()) - } else { - Err(Error::new(ENOENT)) - } - } + pub fn mysnc_memory_region(&mut self, slot: u32, offset: usize, size: usize) -> Result<()> { + let mut regions = self.mem_regions.lock(); + let mem = regions.get_mut(&slot).ok_or(Error::new(ENOENT))?; - /// Get a mutable reference to the memory map arena added at the given slot. - pub fn get_mmap_arena(&mut self, slot: u32) -> Option<&mut MemoryMappingArena> { - self.mmap_arenas.get_mut(&slot) + mem.msync(offset, size).map_err(|err| match err { + MmapError::InvalidAddress => Error::new(EFAULT), + MmapError::NotPageAligned => Error::new(EINVAL), + MmapError::SystemCallFailed(e) => e, + _ => Error::new(EIO), + }) } /// Gets the bitmap of dirty pages since the last call to `get_dirty_log` for the memory at @@ -540,10 +456,10 @@ impl Vm { /// region `slot` represents. For example, if the size of `slot` is 16 pages, `dirty_log` must /// be 2 bytes or greater. pub fn get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()> { - match self.mmio_memory.get(&slot) { - Some(mmap) => { + match self.mem_regions.lock().get(&slot) { + Some(mem) => { // Ensures that there are as many bytes in dirty_log as there are pages in the mmap. - if dirty_log_bitmap_size(mmap.size()) > dirty_log.len() { + if dirty_log_bitmap_size(mem.size()) > dirty_log.len() { return Err(Error::new(EINVAL)); } let mut dirty_log_kvm = kvm_dirty_log { @@ -2167,10 +2083,10 @@ mod tests { let mut vm = Vm::new(&kvm, gm).unwrap(); let mem_size = 0x1000; let mem = MemoryMapping::new(mem_size).unwrap(); - vm.add_mmio_memory(GuestAddress(0x1000), mem, false, false) + vm.add_memory_region(GuestAddress(0x1000), Box::new(mem), false, false) .unwrap(); let mem = MemoryMapping::new(mem_size).unwrap(); - vm.add_mmio_memory(GuestAddress(0x10000), mem, false, false) + vm.add_memory_region(GuestAddress(0x10000), Box::new(mem), false, false) .unwrap(); } @@ -2181,24 +2097,21 @@ mod tests { let mut vm = Vm::new(&kvm, gm).unwrap(); let mem_size = 0x1000; let mem = MemoryMapping::new(mem_size).unwrap(); - vm.add_mmio_memory(GuestAddress(0x1000), mem, true, false) + vm.add_memory_region(GuestAddress(0x1000), Box::new(mem), true, false) .unwrap(); } #[test] - fn remove_memory() { + fn remove_memory_region() { let kvm = Kvm::new().unwrap(); let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x1000)]).unwrap(); let mut vm = Vm::new(&kvm, gm).unwrap(); let mem_size = 0x1000; let mem = MemoryMapping::new(mem_size).unwrap(); - let mem_ptr = mem.as_ptr(); let slot = vm - .add_mmio_memory(GuestAddress(0x1000), mem, false, false) + .add_memory_region(GuestAddress(0x1000), Box::new(mem), false, false) .unwrap(); - let mem = vm.remove_mmio_memory(slot).unwrap(); - assert_eq!(mem.size(), mem_size); - assert_eq!(mem.as_ptr(), mem_ptr); + vm.remove_memory_region(slot).unwrap(); } #[test] @@ -2206,7 +2119,7 @@ mod tests { let kvm = Kvm::new().unwrap(); let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x1000)]).unwrap(); let mut vm = Vm::new(&kvm, gm).unwrap(); - assert!(vm.remove_mmio_memory(0).is_err()); + assert!(vm.remove_memory_region(0).is_err()); } #[test] @@ -2217,7 +2130,7 @@ mod tests { let mem_size = 0x2000; let mem = MemoryMapping::new(mem_size).unwrap(); assert!(vm - .add_mmio_memory(GuestAddress(0x2000), mem, false, false) + .add_memory_region(GuestAddress(0x2000), Box::new(mem), false, false) .is_err()); } diff --git a/kvm/tests/dirty_log.rs b/kvm/tests/dirty_log.rs index 1efe135..57d9866 100644 --- a/kvm/tests/dirty_log.rs +++ b/kvm/tests/dirty_log.rs @@ -43,10 +43,12 @@ fn test_run() { vcpu_regs.rbx = 0x12; vcpu.set_regs(&vcpu_regs).expect("set regs failed"); let slot = vm - .add_mmio_memory( + .add_memory_region( GuestAddress(0), - MemoryMapping::from_fd(&mem, mem_size as usize) - .expect("failed to create memory mapping"), + Box::new( + MemoryMapping::from_fd(&mem, mem_size as usize) + .expect("failed to create memory mapping"), + ), false, true, ) diff --git a/kvm/tests/read_only_memory.rs b/kvm/tests/read_only_memory.rs index d8c0e1d..36ba1b2 100644 --- a/kvm/tests/read_only_memory.rs +++ b/kvm/tests/read_only_memory.rs @@ -45,9 +45,12 @@ fn test_run() { vcpu_regs.rax = 0x66; vcpu_regs.rbx = 0; vcpu.set_regs(&vcpu_regs).expect("set regs failed"); - vm.add_mmio_memory( + vm.add_memory_region( GuestAddress(0), - MemoryMapping::from_fd(&mem, mem_size as usize).expect("failed to create memory mapping"), + Box::new( + MemoryMapping::from_fd(&mem, mem_size as usize) + .expect("failed to create memory mapping"), + ), false, false, ) @@ -63,9 +66,9 @@ fn test_run() { mmap_ro .write_obj(vcpu_regs.rax as u8, 0) .expect("failed writing data to ro memory"); - vm.add_mmio_memory( + vm.add_memory_region( GuestAddress(vcpu_sregs.es.base), - MemoryMapping::from_fd(&mem_ro, 0x1000).expect("failed to create memory mapping"), + Box::new(MemoryMapping::from_fd(&mem_ro, 0x1000).expect("failed to create memory mapping")), true, false, ) diff --git a/src/linux.rs b/src/linux.rs index fb463c2..687aae4 100644 --- a/src/linux.rs +++ b/src/linux.rs @@ -1012,9 +1012,9 @@ fn create_pmem_device( .map_err(Error::AllocatePmemDeviceAddress)?; let slot = vm - .add_mmap_arena( + .add_memory_region( GuestAddress(mapping_address), - arena, + Box::new(arena), /* read_only = */ disk.read_only, /* log_dirty_pages = */ false, ) diff --git a/src/plugin/mod.rs b/src/plugin/mod.rs index 470d5f0..28a43f7 100644 --- a/src/plugin/mod.rs +++ b/src/plugin/mod.rs @@ -384,7 +384,7 @@ impl PluginObject { 8 => vm.unregister_ioevent(&evt, addr, Datamatch::U64(Some(datamatch as u64))), _ => Err(SysError::new(EINVAL)), }, - PluginObject::Memory { slot, .. } => vm.remove_mmio_memory(slot).and(Ok(())), + PluginObject::Memory { slot, .. } => vm.remove_memory_region(slot).and(Ok(())), PluginObject::IrqEvent { irq_id, evt } => vm.unregister_irqfd(&evt, irq_id), } } diff --git a/src/plugin/process.rs b/src/plugin/process.rs index 688aa85..f48c1d0 100644 --- a/src/plugin/process.rs +++ b/src/plugin/process.rs @@ -361,7 +361,8 @@ impl Process { } let mem = MemoryMapping::from_fd_offset(&shm, length as usize, offset) .map_err(mmap_to_sys_err)?; - let slot = vm.add_mmio_memory(GuestAddress(start), mem, read_only, dirty_log)?; + let slot = + vm.add_memory_region(GuestAddress(start), Box::new(mem), read_only, dirty_log)?; entry.insert(PluginObject::Memory { slot, length: length as usize, diff --git a/vm_control/src/lib.rs b/vm_control/src/lib.rs index 1d06e4b..f7514c2 100644 --- a/vm_control/src/lib.rs +++ b/vm_control/src/lib.rs @@ -21,9 +21,7 @@ use libc::{EINVAL, EIO, ENODEV}; use kvm::{IrqRoute, IrqSource, Vm}; use msg_socket::{MsgError, MsgOnSocket, MsgReceiver, MsgResult, MsgSender, MsgSocket}; use resources::{Alloc, GpuMemoryDesc, MmioType, SystemAllocator}; -use sys_util::{ - error, Error as SysError, EventFd, GuestAddress, MappedRegion, MemoryMapping, MmapError, Result, -}; +use sys_util::{error, Error as SysError, EventFd, GuestAddress, MemoryMapping, MmapError, Result}; /// A file descriptor either borrowed or owned by this. #[derive(Debug)] @@ -307,7 +305,7 @@ impl VmMemoryRequest { Err(e) => VmMemoryResponse::Err(e), } } - UnregisterMemory(slot) => match vm.remove_mmio_memory(slot) { + UnregisterMemory(slot) => match vm.remove_memory_region(slot) { Ok(_) => VmMemoryResponse::Ok, Err(e) => VmMemoryResponse::Err(e), }, @@ -349,7 +347,7 @@ impl VmMemoryRequest { Ok(v) => v, Err(_e) => return VmMemoryResponse::Err(SysError::new(EINVAL)), }; - match vm.add_mmio_memory(GuestAddress(gpa), mmap, false, false) { + match vm.add_memory_region(GuestAddress(gpa), Box::new(mmap), false, false) { Ok(_) => VmMemoryResponse::Ok, Err(e) => VmMemoryResponse::Err(e), } @@ -481,19 +479,10 @@ impl VmMsyncRequest { pub fn execute(&self, vm: &mut Vm) -> VmMsyncResponse { use self::VmMsyncRequest::*; match *self { - MsyncArena { slot, offset, size } => { - if let Some(arena) = vm.get_mmap_arena(slot) { - match MappedRegion::msync(arena, offset, size) { - Ok(()) => VmMsyncResponse::Ok, - Err(e) => match e { - MmapError::SystemCallFailed(errno) => VmMsyncResponse::Err(errno), - _ => VmMsyncResponse::Err(SysError::new(EINVAL)), - }, - } - } else { - VmMsyncResponse::Err(SysError::new(EINVAL)) - } - } + MsyncArena { slot, offset, size } => match vm.mysnc_memory_region(slot, offset, size) { + Ok(()) => VmMsyncResponse::Ok, + Err(e) => VmMsyncResponse::Err(e), + }, } } } @@ -598,10 +587,7 @@ fn register_memory( _ => return Err(SysError::new(EINVAL)), }; - let slot = match vm.add_mmio_memory(GuestAddress(addr), mmap, false, false) { - Ok(v) => v, - Err(e) => return Err(e), - }; + let slot = vm.add_memory_region(GuestAddress(addr), Box::new(mmap), false, false)?; Ok((addr >> 12, slot)) } -- cgit 1.4.1 From 014b351f588c85577520898eb73e72bee973b7f2 Mon Sep 17 00:00:00 2001 From: Gurchetan Singh Date: Mon, 4 May 2020 09:39:43 -0700 Subject: resources: add address_from_pci_offset function Refactor current code and add tests. BUG=chromium:924405 TEST=compile and test Change-Id: I9476f3a4ffd8ae85fc95d6889ada6b056613bbfa Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2216447 Reviewed-by: Daniel Verkamp Commit-Queue: Gurchetan Singh Tested-by: Gurchetan Singh Tested-by: kokoro Auto-Submit: Gurchetan Singh --- resources/src/address_allocator.rs | 82 ++++++++++++++++++++++++++++++++++++++ resources/src/lib.rs | 4 ++ vm_control/src/lib.rs | 50 +++++------------------ 3 files changed, 96 insertions(+), 40 deletions(-) diff --git a/resources/src/address_allocator.rs b/resources/src/address_allocator.rs index 88f652a..dce904f 100644 --- a/resources/src/address_allocator.rs +++ b/resources/src/address_allocator.rs @@ -215,6 +215,27 @@ impl AddressAllocator { Ok(()) } + + /// Returns an address from associated PCI `alloc` given an allocation offset and size. + pub fn address_from_pci_offset(&self, alloc: Alloc, offset: u64, size: u64) -> Result { + match alloc { + Alloc::PciBar { .. } => (), + _ => return Err(Error::InvalidAlloc(alloc)), + }; + + match self.allocs.get(&alloc) { + Some((start_addr, length, _)) => { + let address = start_addr.checked_add(offset).ok_or(Error::OutOfBounds)?; + let range = *start_addr..*start_addr + *length; + let end = address.checked_add(size).ok_or(Error::OutOfBounds)?; + match (range.contains(&address), range.contains(&end)) { + (true, true) => Ok(address), + _ => return Err(Error::OutOfBounds), + } + } + None => return Err(Error::InvalidAlloc(alloc)), + } + } } #[cfg(test)] @@ -422,4 +443,65 @@ mod tests { Ok(0x1000) ); } + + #[test] + fn allocate_and_verify_pci_offset() { + let mut pool = AddressAllocator::new(0x1000, 0x10000, None).unwrap(); + let pci_bar0 = Alloc::PciBar { + bus: 1, + dev: 2, + func: 0, + bar: 0, + }; + let pci_bar1 = Alloc::PciBar { + bus: 1, + dev: 2, + func: 0, + bar: 1, + }; + let pci_bar2 = Alloc::PciBar { + bus: 1, + dev: 2, + func: 0, + bar: 2, + }; + let anon = Alloc::Anon(1); + + assert_eq!( + pool.allocate(0x800, pci_bar0, String::from("bar0")), + Ok(0x1000) + ); + assert_eq!( + pool.allocate(0x800, pci_bar1, String::from("bar1")), + Ok(0x1800) + ); + assert_eq!(pool.allocate(0x800, anon, String::from("anon")), Ok(0x2000)); + + assert_eq!( + pool.address_from_pci_offset(pci_bar0, 0x600, 0x100), + Ok(0x1600) + ); + assert_eq!( + pool.address_from_pci_offset(pci_bar1, 0x600, 0x100), + Ok(0x1E00) + ); + assert_eq!( + pool.address_from_pci_offset(pci_bar0, 0x7FE, 0x001), + Ok(0x17FE) + ); + assert_eq!( + pool.address_from_pci_offset(pci_bar0, 0x7FF, 0x001), + Err(Error::OutOfBounds) + ); + + assert_eq!( + pool.address_from_pci_offset(pci_bar2, 0x7FF, 0x001), + Err(Error::InvalidAlloc(pci_bar2)) + ); + + assert_eq!( + pool.address_from_pci_offset(anon, 0x600, 0x100), + Err(Error::InvalidAlloc(anon)) + ); + } } diff --git a/resources/src/lib.rs b/resources/src/lib.rs index 4da5d22..95e4b98 100644 --- a/resources/src/lib.rs +++ b/resources/src/lib.rs @@ -46,10 +46,12 @@ pub enum Error { BadAlignment, CreateGpuAllocator(GpuAllocatorError), ExistingAlloc(Alloc), + InvalidAlloc(Alloc), MissingHighMMIOAddresses, MissingLowMMIOAddresses, NoIoAllocator, OutOfSpace, + OutOfBounds, PoolOverflow { base: u64, size: u64 }, PoolSizeZero, RegionOverlap { base: u64, size: u64 }, @@ -66,10 +68,12 @@ impl Display for Error { BadAlignment => write!(f, "Pool alignment must be a power of 2"), CreateGpuAllocator(e) => write!(f, "Failed to create GPU allocator: {:?}", e), ExistingAlloc(tag) => write!(f, "Alloc already exists: {:?}", tag), + InvalidAlloc(tag) => write!(f, "Invalid Alloc: {:?}", tag), MissingHighMMIOAddresses => write!(f, "High MMIO address range not specified"), MissingLowMMIOAddresses => write!(f, "Low MMIO address range not specified"), NoIoAllocator => write!(f, "No IO address range specified"), OutOfSpace => write!(f, "Out of space"), + OutOfBounds => write!(f, "Out of bounds"), PoolOverflow { base, size } => write!(f, "base={} + size={} overflows", base, size), PoolSizeZero => write!(f, "Pool cannot have size of 0"), RegionOverlap { base, size } => { diff --git a/vm_control/src/lib.rs b/vm_control/src/lib.rs index f7514c2..5336f3b 100644 --- a/vm_control/src/lib.rs +++ b/vm_control/src/lib.rs @@ -535,7 +535,7 @@ fn register_memory( allocator: &mut SystemAllocator, fd: &dyn AsRawFd, size: usize, - allocation: Option<(Alloc, u64)>, + pci_allocation: Option<(Alloc, u64)>, ) -> Result<(u64, u32)> { let mmap = match MemoryMapping::from_fd(fd, size) { Ok(v) => v, @@ -543,48 +543,18 @@ fn register_memory( _ => return Err(SysError::new(EINVAL)), }; - let addr = match allocation { - Some(( - Alloc::PciBar { - bus, - dev, - func, - bar, - }, - offset, - )) => { - match allocator - .mmio_allocator(MmioType::High) - .get(&Alloc::PciBar { - bus, - dev, - func, - bar, - }) { - Some((start_addr, length, _)) => { - let address = *start_addr + offset; - let range = *start_addr..*start_addr + *length; - let end = address + (size as u64); - match (range.contains(&address), range.contains(&end)) { - (true, true) => address, - _ => return Err(SysError::new(EINVAL)), - } - } - None => return Err(SysError::new(EINVAL)), - } - } + let addr = match pci_allocation { + Some(pci_allocation) => allocator + .mmio_allocator(MmioType::High) + .address_from_pci_offset(pci_allocation.0, pci_allocation.1, size as u64) + .map_err(|_e| SysError::new(EINVAL))?, None => { let alloc = allocator.get_anon_alloc(); - match allocator.mmio_allocator(MmioType::High).allocate( - size as u64, - alloc, - "vmcontrol_register_memory".to_string(), - ) { - Ok(a) => a, - _ => return Err(SysError::new(EINVAL)), - } + allocator + .mmio_allocator(MmioType::High) + .allocate(size as u64, alloc, "vmcontrol_register_memory".to_string()) + .map_err(|_e| SysError::new(EINVAL))? } - _ => return Err(SysError::new(EINVAL)), }; let slot = vm.add_memory_region(GuestAddress(addr), Box::new(mmap), false, false)?; -- cgit 1.4.1 From d42d3fec7a9535b664b89d30fd48c90feda59957 Mon Sep 17 00:00:00 2001 From: Dylan Reid Date: Thu, 11 Jun 2020 14:40:57 -0700 Subject: io_uring: update for mmap api change A recent change to sys_util moved the definition of 'as_ptr' for an mmap to a new trait, include that trait here. Change-Id: Ib48113738fdace50e2dc7f72a5107dd95db867f9 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2241590 Reviewed-by: Daniel Verkamp Reviewed-by: Stephen Barber Tested-by: Dylan Reid Tested-by: kokoro Commit-Queue: Dylan Reid --- io_uring/src/uring.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/src/uring.rs b/io_uring/src/uring.rs index 8a69872..2a78753 100644 --- a/io_uring/src/uring.rs +++ b/io_uring/src/uring.rs @@ -9,7 +9,7 @@ use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::ptr::null_mut; use std::sync::atomic::{AtomicU32, Ordering}; -use sys_util::{MemoryMapping, WatchingEvents}; +use sys_util::{MappedRegion, MemoryMapping, WatchingEvents}; use crate::bindings::*; use crate::syscalls::*; -- cgit 1.4.1