summary refs log tree commit diff
diff options
context:
space:
mode:
authorDaniel Verkamp <dverkamp@chromium.org>2019-04-05 09:58:48 -0700
committerchrome-bot <chrome-bot@chromium.org>2019-04-09 06:20:04 -0700
commit107edb3eec98a707118ae9a4a804a256e53892a0 (patch)
tree7f33cebb1a004e8380648ba4d7a67392f8051a21
parent1f9ae42c73c020ca77c7c0fbe2e09be3b90fe573 (diff)
downloadcrosvm-107edb3eec98a707118ae9a4a804a256e53892a0.tar
crosvm-107edb3eec98a707118ae9a4a804a256e53892a0.tar.gz
crosvm-107edb3eec98a707118ae9a4a804a256e53892a0.tar.bz2
crosvm-107edb3eec98a707118ae9a4a804a256e53892a0.tar.lz
crosvm-107edb3eec98a707118ae9a4a804a256e53892a0.tar.xz
crosvm-107edb3eec98a707118ae9a4a804a256e53892a0.tar.zst
crosvm-107edb3eec98a707118ae9a4a804a256e53892a0.zip
main: add --cpu-affinity option to pin VCPUs
This allows setting the affinity of the VCPU threads to specific host
CPUs.  Note that each individual CPU has its affinity set to the full
set of CPUs specified, so the host kernel may still reschedule VCPU
threads on whichever host CPUs it sees fit (within the specified set).

BUG=chromium:909793
TEST=build_test

Change-Id: I09b893901caf91368b64f5329a6e9f39027fef23
Signed-off-by: Daniel Verkamp <dverkamp@chromium.org>
Reviewed-on: https://chromium-review.googlesource.com/1554865
Commit-Ready: ChromeOS CL Exonerator Bot <chromiumos-cl-exonerator@appspot.gserviceaccount.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Chirantan Ekbote <chirantan@chromium.org>
Reviewed-by: Dylan Reid <dgreid@chromium.org>
-rw-r--r--aarch64/src/lib.rs3
-rw-r--r--arch/src/lib.rs2
-rw-r--r--src/argument.rs1
-rw-r--r--src/linux.rs15
-rw-r--r--src/main.rs115
-rw-r--r--sys_util/src/affinity.rs64
-rw-r--r--sys_util/src/lib.rs2
-rw-r--r--x86_64/src/lib.rs3
8 files changed, 202 insertions, 3 deletions
diff --git a/aarch64/src/lib.rs b/aarch64/src/lib.rs
index d5b080f..e8cabe4 100644
--- a/aarch64/src/lib.rs
+++ b/aarch64/src/lib.rs
@@ -225,6 +225,8 @@ impl arch::LinuxArch for AArch64 {
             vcpus.push(vcpu);
         }
 
+        let vcpu_affinity = components.vcpu_affinity;
+
         let irq_chip = Self::create_irq_chip(&vm)?;
         let mut cmdline = Self::get_base_linux_cmdline();
 
@@ -284,6 +286,7 @@ impl arch::LinuxArch for AArch64 {
             stdio_serial,
             exit_evt,
             vcpus,
+            vcpu_affinity,
             irq_chip,
             io_bus,
             mmio_bus,
diff --git a/arch/src/lib.rs b/arch/src/lib.rs
index 9cc55e1..1e55caf 100644
--- a/arch/src/lib.rs
+++ b/arch/src/lib.rs
@@ -38,6 +38,7 @@ use sys_util::{syslog, EventFd, GuestAddress, GuestMemory, GuestMemoryError};
 pub struct VmComponents {
     pub memory_mb: u64,
     pub vcpu_count: u32,
+    pub vcpu_affinity: Vec<usize>,
     pub kernel_image: File,
     pub android_fstab: Option<File>,
     pub initrd_image: Option<File>,
@@ -53,6 +54,7 @@ pub struct RunnableLinuxVm {
     pub stdio_serial: Arc<Mutex<Serial>>,
     pub exit_evt: EventFd,
     pub vcpus: Vec<Vcpu>,
+    pub vcpu_affinity: Vec<usize>,
     pub irq_chip: Option<File>,
     pub io_bus: Bus,
     pub mmio_bus: Bus,
diff --git a/src/argument.rs b/src/argument.rs
index 43ae424..6b4cfe9 100644
--- a/src/argument.rs
+++ b/src/argument.rs
@@ -44,6 +44,7 @@ use std::fmt::{self, Display};
 use std::result;
 
 /// An error with argument parsing.
+#[derive(Debug)]
 pub enum Error {
     /// There was a syntax error with the argument.
     Syntax(String),
diff --git a/src/linux.rs b/src/linux.rs
index 7960dc1..a7b8945 100644
--- a/src/linux.rs
+++ b/src/linux.rs
@@ -37,9 +37,9 @@ use sync::{Condvar, Mutex};
 use sys_util::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
 use sys_util::{
     self, block_signal, clear_signal, drop_capabilities, flock, get_blocked_signals, get_group_id,
-    get_user_id, getegid, geteuid, register_signal_handler, validate_raw_fd, EventFd,
-    FlockOperation, GuestMemory, Killable, PollContext, PollToken, SignalFd, Terminal, TimerFd,
-    SIGRTMIN,
+    get_user_id, getegid, geteuid, register_signal_handler, set_cpu_affinity, validate_raw_fd,
+    EventFd, FlockOperation, GuestMemory, Killable, PollContext, PollToken, SignalFd, Terminal,
+    TimerFd, SIGRTMIN,
 };
 #[cfg(feature = "gpu-forward")]
 use sys_util::{GuestAddress, MemoryMapping, Protection};
@@ -925,6 +925,7 @@ impl VcpuRunMode {
 fn run_vcpu(
     vcpu: Vcpu,
     cpu_id: u32,
+    vcpu_affinity: Vec<usize>,
     start_barrier: Arc<Barrier>,
     io_bus: devices::Bus,
     mmio_bus: devices::Bus,
@@ -935,6 +936,12 @@ fn run_vcpu(
     thread::Builder::new()
         .name(format!("crosvm_vcpu{}", cpu_id))
         .spawn(move || {
+            if vcpu_affinity.len() != 0 {
+                if let Err(e) = set_cpu_affinity(vcpu_affinity) {
+                    error!("Failed to set CPU affinity: {}", e);
+                }
+            }
+
             let mut sig_ok = true;
             match get_blocked_signals() {
                 Ok(mut v) => {
@@ -1090,6 +1097,7 @@ pub fn run_config(cfg: Config) -> Result<()> {
     let components = VmComponents {
         memory_mb: (cfg.memory.unwrap_or(256) << 20) as u64,
         vcpu_count: cfg.vcpu_count.unwrap_or(1),
+        vcpu_affinity: cfg.vcpu_affinity.clone(),
         kernel_image: File::open(&cfg.kernel_path)
             .map_err(|e| Error::OpenKernel(cfg.kernel_path.clone(), e))?,
         android_fstab: cfg
@@ -1305,6 +1313,7 @@ fn run_control(
         let handle = run_vcpu(
             vcpu,
             cpu_id as u32,
+            linux.vcpu_affinity.clone(),
             vcpu_thread_barrier.clone(),
             linux.io_bus.clone(),
             linux.mmio_bus.clone(),
diff --git a/src/main.rs b/src/main.rs
index d48b166..2439b79 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -104,6 +104,7 @@ impl TouchDeviceOption {
 
 pub struct Config {
     vcpu_count: Option<u32>,
+    vcpu_affinity: Vec<usize>,
     memory: Option<usize>,
     kernel_path: PathBuf,
     android_fstab: Option<PathBuf>,
@@ -142,6 +143,7 @@ impl Default for Config {
     fn default() -> Config {
         Config {
             vcpu_count: None,
+            vcpu_affinity: Vec::new(),
             memory: None,
             kernel_path: PathBuf::default(),
             android_fstab: None,
@@ -206,6 +208,48 @@ fn wait_all_children() -> bool {
     false
 }
 
+/// Parse a comma-separated list of CPU numbers and ranges and convert it to a Vec of CPU numbers.
+fn parse_cpu_set(s: &str) -> argument::Result<Vec<usize>> {
+    let mut cpuset = Vec::new();
+    for part in s.split(',') {
+        let range: Vec<&str> = part.split('-').collect();
+        if range.len() == 0 || range.len() > 2 {
+            return Err(argument::Error::InvalidValue {
+                value: part.to_owned(),
+                expected: "invalid list syntax",
+            });
+        }
+        let first_cpu: usize = range[0]
+            .parse()
+            .map_err(|_| argument::Error::InvalidValue {
+                value: part.to_owned(),
+                expected: "CPU index must be a non-negative integer",
+            })?;
+        let last_cpu: usize = if range.len() == 2 {
+            range[1]
+                .parse()
+                .map_err(|_| argument::Error::InvalidValue {
+                    value: part.to_owned(),
+                    expected: "CPU index must be a non-negative integer",
+                })?
+        } else {
+            first_cpu
+        };
+
+        if last_cpu < first_cpu {
+            return Err(argument::Error::InvalidValue {
+                value: part.to_owned(),
+                expected: "CPU ranges must be from low to high",
+            });
+        }
+
+        for cpu in first_cpu..=last_cpu {
+            cpuset.push(cpu);
+        }
+    }
+    Ok(cpuset)
+}
+
 fn set_argument(cfg: &mut Config, name: &str, value: Option<&str>) -> argument::Result<()> {
     match name {
         "" => {
@@ -266,6 +310,14 @@ fn set_argument(cfg: &mut Config, name: &str, value: Option<&str>) -> argument::
                         })?,
                 )
         }
+        "cpu-affinity" => {
+            if cfg.vcpu_affinity.len() != 0 {
+                return Err(argument::Error::TooManyArguments(
+                    "`cpu-affinity` already given".to_owned(),
+                ));
+            }
+            cfg.vcpu_affinity = parse_cpu_set(value.unwrap())?;
+        }
         "mem" => {
             if cfg.memory.is_some() {
                 return Err(argument::Error::TooManyArguments(
@@ -659,6 +711,7 @@ fn run_vm(args: std::env::Args) -> std::result::Result<(), ()> {
                                 "PARAMS",
                                 "Extra kernel or plugin command line arguments. Can be given more than once."),
           Argument::short_value('c', "cpus", "N", "Number of VCPUs. (default: 1)"),
+          Argument::value("cpu-affinity", "CPUSET", "Comma-separated list of CPUs or CPU ranges to run VCPUs on. (e.g. 0,1-3,5) (default: no mask)"),
           Argument::short_value('m',
                                 "mem",
                                 "N",
@@ -1187,3 +1240,65 @@ fn crosvm_main() -> std::result::Result<(), ()> {
 fn main() {
     std::process::exit(if crosvm_main().is_ok() { 0 } else { 1 });
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parse_cpu_set_single() {
+        assert_eq!(parse_cpu_set("123").expect("parse failed"), vec![123]);
+    }
+
+    #[test]
+    fn parse_cpu_set_list() {
+        assert_eq!(
+            parse_cpu_set("0,1,2,3").expect("parse failed"),
+            vec![0, 1, 2, 3]
+        );
+    }
+
+    #[test]
+    fn parse_cpu_set_range() {
+        assert_eq!(
+            parse_cpu_set("0-3").expect("parse failed"),
+            vec![0, 1, 2, 3]
+        );
+    }
+
+    #[test]
+    fn parse_cpu_set_list_of_ranges() {
+        assert_eq!(
+            parse_cpu_set("3-4,7-9,18").expect("parse failed"),
+            vec![3, 4, 7, 8, 9, 18]
+        );
+    }
+
+    #[test]
+    fn parse_cpu_set_repeated() {
+        // For now, allow duplicates - they will be handled gracefully by the vec to cpu_set_t conversion.
+        assert_eq!(parse_cpu_set("1,1,1").expect("parse failed"), vec![1, 1, 1]);
+    }
+
+    #[test]
+    fn parse_cpu_set_negative() {
+        // Negative CPU numbers are not allowed.
+        parse_cpu_set("-3").expect_err("parse should have failed");
+    }
+
+    #[test]
+    fn parse_cpu_set_reverse_range() {
+        // Ranges must be from low to high.
+        parse_cpu_set("5-2").expect_err("parse should have failed");
+    }
+
+    #[test]
+    fn parse_cpu_set_open_range() {
+        parse_cpu_set("3-").expect_err("parse should have failed");
+    }
+
+    #[test]
+    fn parse_cpu_set_extra_comma() {
+        parse_cpu_set("0,1,2,").expect_err("parse should have failed");
+    }
+}
diff --git a/sys_util/src/affinity.rs b/sys_util/src/affinity.rs
new file mode 100644
index 0000000..d166562
--- /dev/null
+++ b/sys_util/src/affinity.rs
@@ -0,0 +1,64 @@
+// Copyright 2019 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+//! Wrappers for CPU affinity functions.
+
+use std::iter::FromIterator;
+use std::mem;
+
+use libc::{cpu_set_t, sched_setaffinity, CPU_SET, CPU_SETSIZE, CPU_ZERO, EINVAL};
+
+use crate::{errno_result, Error, Result};
+
+// This is needed because otherwise the compiler will complain that the
+// impl doesn't reference any types from inside this crate.
+struct CpuSet(cpu_set_t);
+
+impl FromIterator<usize> for CpuSet {
+    fn from_iter<I: IntoIterator<Item = usize>>(cpus: I) -> Self {
+        // cpu_set_t is a C struct and can be safely initialized with zeroed memory.
+        let mut cpuset: cpu_set_t = unsafe { mem::zeroed() };
+        // Safe because we pass a valid cpuset pointer.
+        unsafe { CPU_ZERO(&mut cpuset) };
+        for cpu in cpus.into_iter() {
+            // Safe because we pass a valid cpuset pointer and cpu index.
+            unsafe { CPU_SET(cpu, &mut cpuset) };
+        }
+        CpuSet(cpuset)
+    }
+}
+
+/// Set the CPU affinity of the current thread to a given set of CPUs.
+///
+/// # Examples
+///
+/// Set the calling thread's CPU affinity so it will run on only CPUs
+/// 0, 1, 5, and 6.
+///
+/// ```
+/// # use sys_util::set_cpu_affinity;
+///   set_cpu_affinity(vec![0, 1, 5, 6]).unwrap();
+/// ```
+pub fn set_cpu_affinity<I: IntoIterator<Item = usize>>(cpus: I) -> Result<()> {
+    let CpuSet(cpuset) = cpus
+        .into_iter()
+        .map(|cpu| {
+            if cpu < CPU_SETSIZE as usize {
+                Ok(cpu)
+            } else {
+                Err(Error::new(EINVAL))
+            }
+        })
+        .collect::<Result<CpuSet>>()?;
+
+    // Safe because we pass 0 for the current thread, and cpuset is a valid pointer and only
+    // used for the duration of this call.
+    let res = unsafe { sched_setaffinity(0, mem::size_of_val(&cpuset), &cpuset) };
+
+    if res != 0 {
+        errno_result()
+    } else {
+        Ok(())
+    }
+}
diff --git a/sys_util/src/lib.rs b/sys_util/src/lib.rs
index 5f7827e..d62ec48 100644
--- a/sys_util/src/lib.rs
+++ b/sys_util/src/lib.rs
@@ -12,6 +12,7 @@ extern crate syscall_defines;
 extern crate poll_token_derive;
 extern crate sync;
 
+pub mod affinity;
 #[macro_use]
 pub mod handle_eintr;
 #[macro_use]
@@ -44,6 +45,7 @@ mod terminal;
 mod timerfd;
 mod write_zeroes;
 
+pub use crate::affinity::*;
 pub use crate::capabilities::drop_capabilities;
 pub use crate::clock::{Clock, FakeClock};
 use crate::errno::errno_result;
diff --git a/x86_64/src/lib.rs b/x86_64/src/lib.rs
index 640d886..de12a10 100644
--- a/x86_64/src/lib.rs
+++ b/x86_64/src/lib.rs
@@ -329,6 +329,8 @@ impl arch::LinuxArch for X8664arch {
             vcpus.push(vcpu);
         }
 
+        let vcpu_affinity = components.vcpu_affinity;
+
         let irq_chip = Self::create_irq_chip(&vm)?;
         let mut cmdline = Self::get_base_linux_cmdline();
 
@@ -376,6 +378,7 @@ impl arch::LinuxArch for X8664arch {
             stdio_serial,
             exit_evt,
             vcpus,
+            vcpu_affinity,
             irq_chip,
             io_bus,
             mmio_bus,