summary refs log tree commit diff
diff options
context:
space:
mode:
authorMatt Delco <delco@chromium.org>2019-04-16 18:37:55 -0700
committerchrome-bot <chrome-bot@chromium.org>2019-04-24 15:51:11 -0700
commit2ec62db5f77cd52fe934e87d3be3c832251f4748 (patch)
tree210e85049bc1b92a43986a8f8c76240ee0225a7d
parentf82d6320e7192858553bb0e1b5464363d29a631a (diff)
downloadcrosvm-2ec62db5f77cd52fe934e87d3be3c832251f4748.tar
crosvm-2ec62db5f77cd52fe934e87d3be3c832251f4748.tar.gz
crosvm-2ec62db5f77cd52fe934e87d3be3c832251f4748.tar.bz2
crosvm-2ec62db5f77cd52fe934e87d3be3c832251f4748.tar.lz
crosvm-2ec62db5f77cd52fe934e87d3be3c832251f4748.tar.xz
crosvm-2ec62db5f77cd52fe934e87d3be3c832251f4748.tar.zst
crosvm-2ec62db5f77cd52fe934e87d3be3c832251f4748.zip
crosvm: reduce excess chatter with plugin
This change helps to improve performance in plugin communications by
removing unnecessary communication exchange.

The existing protocol basically requires the plugin to send a request
msg and wait for a reply msg.  Prior to this change a plugin had to send
a wait request before it got a wait reply (which typically contains an IO
event notication). Similarly, when the plugin sends a resume request
there's also a resume reply that's sent.

The reply to the resume message serves no worthwhile purpose and can be
removed. In the common case there's also no need for the plugin to send
a wait request message--the prior operation was a resume so both sides
know that the only next legal operation is a wait.  Thereforce, crosvm
can send a wait reply message without waiting for the plugin's request.

Another way to look at the situation is that a resume request message is
now answered by a wait reply message, and the overall message exchange
pattern looks less like http and more like async I/O.

The plugin's first call to wait is the one time that a wait request is
sent.  This in turn will receive an wait-init reply.

TEST=Ran my diagnostic plugin and confirmed that it still passes (after
working around an 8-byte limitation in crosvm).  Run my benchmarking
plugin and observed the time it takes to complete go down by 16.5%.
BUG=None

Change-Id: I9c93ba1d3a8f7814ca952f3dc7239d48675192e2
Signed-off-by: Matt Delco <delco@chromium.org>
Reviewed-on: https://chromium-review.googlesource.com/1571066
Tested-by: kokoro <noreply+kokoro@google.com>
Tested-by: Matt Delco <delco@google.com>
Reviewed-by: Zach Reizner <zachr@chromium.org>
-rw-r--r--crosvm_plugin/crosvm.h16
-rw-r--r--crosvm_plugin/src/lib.rs26
-rw-r--r--src/plugin/mod.rs2
-rw-r--r--src/plugin/vcpu.rs257
4 files changed, 175 insertions, 126 deletions
diff --git a/crosvm_plugin/crosvm.h b/crosvm_plugin/crosvm.h
index 81c8ade..d7a036c 100644
--- a/crosvm_plugin/crosvm.h
+++ b/crosvm_plugin/crosvm.h
@@ -235,7 +235,7 @@ int crosvm_get_pic_state(struct crosvm *, bool __primary,
 
 /* Sets the state of interrupt controller in a VM. */
 int crosvm_set_pic_state(struct crosvm *, bool __primary,
-                         struct kvm_pic_state *__pic_state);
+                         const struct kvm_pic_state *__pic_state);
 
 /* Gets the state of IOAPIC in a VM. */
 int crosvm_get_ioapic_state(struct crosvm *,
@@ -243,7 +243,7 @@ int crosvm_get_ioapic_state(struct crosvm *,
 
 /* Sets the state of IOAPIC in a VM. */
 int crosvm_set_ioapic_state(struct crosvm *,
-                            struct kvm_ioapic_state *__ioapic_state);
+                            const struct kvm_ioapic_state *__ioapic_state);
 
 /* Gets the state of interrupt controller in a VM. */
 int crosvm_get_pit_state(struct crosvm *, struct kvm_pit_state2 *__pit_state);
@@ -280,7 +280,7 @@ int crosvm_pause_vcpus(struct crosvm*, uint64_t __cpu_mask, void* __user);
 int crosvm_start(struct crosvm*);
 
 /*
- * Registers an eventfd that is triggered asynchronously on write in |__space|
+ * Allocates an eventfd that is triggered asynchronously on write in |__space|
  * at the given |__addr|.
  *
  * If |__datamatch| is non-NULL, it must be contain |__length| bytes that will
@@ -288,8 +288,8 @@ int crosvm_start(struct crosvm*);
  * the eventfd if equal. If datamatch is NULL all writes to the address will
  * trigger the eventfd.
  *
- * On successful registration, returns a non-negative eventfd owned by the
- * caller.
+ * On successful allocation, returns a crosvm_io.  Obtain the actual fd
+ * by passing this result to crosvm_io_event_fd().
  */
 int crosvm_create_io_event(struct crosvm*, uint32_t __space, uint64_t __addr,
                            uint32_t __len, const uint8_t* __datamatch,
@@ -423,6 +423,10 @@ struct crosvm_vcpu_event {
        * than 8 bytes, such as by AVX-512 instructions, multiple vcpu access
        * events are generated serially to cover each 8 byte fragment of the
        * access.
+       *
+       * Larger I/O accesses are possible.  "rep in" can generate I/Os larger
+       * than 8 bytes, though such accesses can also be split into multiple
+       * events.  Currently kvm doesn't seem to batch "rep out" I/Os.
        */
       uint32_t length;
 
@@ -435,7 +439,7 @@ struct crosvm_vcpu_event {
       uint8_t _reserved[3];
     } io_access;
 
-    /* CROSVM_VCPU_EVENT_KIND_USER */
+    /* CROSVM_VCPU_EVENT_KIND_PAUSED */
     void *user;
 
     uint8_t _reserved[64];
diff --git a/crosvm_plugin/src/lib.rs b/crosvm_plugin/src/lib.rs
index a334505..d1bb665 100644
--- a/crosvm_plugin/src/lib.rs
+++ b/crosvm_plugin/src/lib.rs
@@ -920,6 +920,7 @@ pub struct crosvm_vcpu_event {
 
 pub struct crosvm_vcpu {
     socket: UnixDatagram,
+    send_init: bool,
     request_buffer: Vec<u8>,
     response_buffer: Vec<u8>,
     resume_data: Vec<u8>,
@@ -929,13 +930,13 @@ impl crosvm_vcpu {
     fn new(socket: UnixDatagram) -> crosvm_vcpu {
         crosvm_vcpu {
             socket,
+            send_init: true,
             request_buffer: Vec::new(),
             response_buffer: vec![0; MAX_DATAGRAM_SIZE],
             resume_data: Vec::new(),
         }
     }
-
-    fn vcpu_transaction(&mut self, request: &VcpuRequest) -> result::Result<VcpuResponse, c_int> {
+    fn vcpu_send(&mut self, request: &VcpuRequest) -> result::Result<(), c_int> {
         self.request_buffer.clear();
         request
             .write_to_vec(&mut self.request_buffer)
@@ -943,7 +944,10 @@ impl crosvm_vcpu {
         self.socket
             .send(self.request_buffer.as_slice())
             .map_err(|e| -e.raw_os_error().unwrap_or(EINVAL))?;
+        Ok(())
+    }
 
+    fn vcpu_recv(&mut self) -> result::Result<VcpuResponse, c_int> {
         let msg_size = self
             .socket
             .recv(&mut self.response_buffer)
@@ -957,10 +961,20 @@ impl crosvm_vcpu {
         Ok(response)
     }
 
+    fn vcpu_transaction(&mut self, request: &VcpuRequest) -> result::Result<VcpuResponse, c_int> {
+        self.vcpu_send(request)?;
+        let response: VcpuResponse = self.vcpu_recv()?;
+        Ok(response)
+    }
+
     fn wait(&mut self, event: &mut crosvm_vcpu_event) -> result::Result<(), c_int> {
-        let mut r = VcpuRequest::new();
-        r.mut_wait();
-        let mut response: VcpuResponse = self.vcpu_transaction(&r)?;
+        if self.send_init {
+            self.send_init = false;
+            let mut r = VcpuRequest::new();
+            r.mut_wait();
+            self.vcpu_send(&r)?;
+        }
+        let mut response: VcpuResponse = self.vcpu_recv()?;
         if !response.has_wait() {
             return Err(EPROTO);
         }
@@ -997,7 +1011,7 @@ impl crosvm_vcpu {
         let resume: &mut VcpuRequest_Resume = r.mut_resume();
         swap(&mut resume.data, &mut self.resume_data);
 
-        self.vcpu_transaction(&r)?;
+        self.vcpu_send(&r)?;
         Ok(())
     }
 
diff --git a/src/plugin/mod.rs b/src/plugin/mod.rs
index d0809c6..128b434 100644
--- a/src/plugin/mod.rs
+++ b/src/plugin/mod.rs
@@ -364,7 +364,7 @@ pub fn run_vcpus(
                             match run_res {
                                 Ok(run) => match run {
                                     VcpuExit::IoIn { port, mut size } => {
-                                        let mut data = [0; 8];
+                                        let mut data = [0; 256];
                                         if size > data.len() {
                                             error!("unsupported IoIn size of {} bytes", size);
                                             size = data.len();
diff --git a/src/plugin/vcpu.rs b/src/plugin/vcpu.rs
index d19a5f8..17cd875 100644
--- a/src/plugin/vcpu.rs
+++ b/src/plugin/vcpu.rs
@@ -389,133 +389,164 @@ impl PluginVcpu {
     }
 
     fn handle_request(&self, vcpu: &Vcpu) -> SysResult<Option<Vec<u8>>> {
+        let mut wait_reason = self.wait_reason.take();
+        let mut do_recv = true;
         let mut resume_data = None;
-        let mut request_buffer = self.request_buffer.borrow_mut();
-        request_buffer.resize(MAX_VCPU_DATAGRAM_SIZE, 0);
+        let mut response = VcpuResponse::new();
+
+        // Typically a response is sent for every request received.  The odd (yet common)
+        // case is when a resume request is received.  This function will skip sending
+        // a resume reply, and instead we'll go run the VM and then later reply with a wait
+        // response message.  This code block handles checking if a wait reason is pending (where
+        // the wait reason isn't the first-time init [first time init needs to first
+        // receive a wait request from the plugin]) to send it as a reply before doing a recv()
+        // for the next request.  Note that if a wait reply is pending then this function
+        // will send the reply and do nothing else--the expectation is that handle_until_resume()
+        // is the only caller of this function, so the function will immediately get called again
+        // and this second call will no longer see a pending wait reason and do a recv() for the
+        // next message.
+        if let Some(reason) = wait_reason {
+            if reason.has_init() {
+                wait_reason = Some(reason);
+            } else {
+                response.set_wait(reason);
+                do_recv = false;
+                wait_reason = None;
+            }
+        }
 
-        let msg_size = self
-            .connection
-            .recv(&mut request_buffer)
-            .map_err(io_to_sys_err)?;
+        if do_recv {
+            let mut request_buffer = self.request_buffer.borrow_mut();
+            request_buffer.resize(MAX_VCPU_DATAGRAM_SIZE, 0);
 
-        let mut request = protobuf::parse_from_bytes::<VcpuRequest>(&request_buffer[..msg_size])
-            .map_err(proto_to_sys_err)?;
+            let msg_size = self
+                .connection
+                .recv(&mut request_buffer)
+                .map_err(io_to_sys_err)?;
 
-        let wait_reason = self.wait_reason.take();
+            let mut request =
+                protobuf::parse_from_bytes::<VcpuRequest>(&request_buffer[..msg_size])
+                    .map_err(proto_to_sys_err)?;
 
-        let mut response = VcpuResponse::new();
-        let res = if request.has_wait() {
-            match wait_reason {
-                Some(wait_reason) => {
-                    response.set_wait(wait_reason);
-                    Ok(())
+            let res = if request.has_wait() {
+                match wait_reason {
+                    Some(wait_reason) => {
+                        response.set_wait(wait_reason);
+                        Ok(())
+                    }
+                    None => Err(SysError::new(EPROTO)),
                 }
-                None => Err(SysError::new(EPROTO)),
-            }
-        } else if wait_reason.is_some() {
-            // Any request other than getting the wait_reason while there is one pending is invalid.
-            self.wait_reason.set(wait_reason);
-            Err(SysError::new(EPROTO))
-        } else if request.has_resume() {
-            response.mut_resume();
-            resume_data = Some(request.take_resume().take_data());
-            Ok(())
-        } else if request.has_get_state() {
-            let response_state = response.mut_get_state();
-            match get_vcpu_state(vcpu, request.get_get_state().set) {
-                Ok(state) => {
-                    response_state.state = state;
-                    Ok(())
+            } else if wait_reason.is_some() {
+                // Any request other than getting the wait_reason while there is one pending is invalid.
+                self.wait_reason.set(wait_reason);
+                Err(SysError::new(EPROTO))
+            } else if request.has_resume() {
+                response.mut_resume();
+                resume_data = Some(request.take_resume().take_data());
+                Ok(())
+            } else if request.has_get_state() {
+                let response_state = response.mut_get_state();
+                match get_vcpu_state(vcpu, request.get_get_state().set) {
+                    Ok(state) => {
+                        response_state.state = state;
+                        Ok(())
+                    }
+                    Err(e) => Err(e),
                 }
-                Err(e) => Err(e),
-            }
-        } else if request.has_set_state() {
-            response.mut_set_state();
-            let set_state = request.get_set_state();
-            set_vcpu_state(vcpu, set_state.set, set_state.get_state())
-        } else if request.has_get_msrs() {
-            let entry_data = &mut response.mut_get_msrs().entry_data;
-            let entry_indices = &request.get_get_msrs().entry_indices;
-            let mut msr_entries = Vec::with_capacity(entry_indices.len());
-            for &index in entry_indices {
-                msr_entries.push(kvm_msr_entry {
-                    index,
-                    ..Default::default()
-                });
-            }
-            match vcpu.get_msrs(&mut msr_entries) {
-                Ok(()) => {
-                    for msr_entry in msr_entries {
-                        entry_data.push(msr_entry.data);
+            } else if request.has_set_state() {
+                response.mut_set_state();
+                let set_state = request.get_set_state();
+                set_vcpu_state(vcpu, set_state.set, set_state.get_state())
+            } else if request.has_get_msrs() {
+                let entry_data = &mut response.mut_get_msrs().entry_data;
+                let entry_indices = &request.get_get_msrs().entry_indices;
+                let mut msr_entries = Vec::with_capacity(entry_indices.len());
+                for &index in entry_indices {
+                    msr_entries.push(kvm_msr_entry {
+                        index,
+                        ..Default::default()
+                    });
+                }
+                match vcpu.get_msrs(&mut msr_entries) {
+                    Ok(()) => {
+                        for msr_entry in msr_entries {
+                            entry_data.push(msr_entry.data);
+                        }
+                        Ok(())
                     }
-                    Ok(())
+                    Err(e) => Err(e),
                 }
-                Err(e) => Err(e),
-            }
-        } else if request.has_set_msrs() {
-            const SIZE_OF_MSRS: usize = mem::size_of::<kvm_msrs>();
-            const SIZE_OF_ENTRY: usize = mem::size_of::<kvm_msr_entry>();
-            const ALIGN_OF_MSRS: usize = mem::align_of::<kvm_msrs>();
-            const ALIGN_OF_ENTRY: usize = mem::align_of::<kvm_msr_entry>();
-            const_assert!(ALIGN_OF_MSRS >= ALIGN_OF_ENTRY);
-
-            response.mut_set_msrs();
-            let request_entries = &request.get_set_msrs().entries;
-            let size = SIZE_OF_MSRS + request_entries.len() * SIZE_OF_ENTRY;
-            let layout = Layout::from_size_align(size, ALIGN_OF_MSRS).expect("impossible layout");
-            let mut allocation = LayoutAllocation::zeroed(layout);
-
-            // Safe to obtain an exclusive reference because there are no other
-            // references to the allocation yet and all-zero is a valid bit
-            // pattern.
-            let kvm_msrs = unsafe { allocation.as_mut::<kvm_msrs>() };
-
-            unsafe {
-                // Mapping the unsized array to a slice is unsafe becase the length isn't known.
-                // Providing the length used to create the struct guarantees the entire slice is
-                // valid.
-                let kvm_msr_entries: &mut [kvm_msr_entry] =
-                    kvm_msrs.entries.as_mut_slice(request_entries.len());
-                for (msr_entry, entry) in kvm_msr_entries.iter_mut().zip(request_entries) {
-                    msr_entry.index = entry.index;
-                    msr_entry.data = entry.data;
+            } else if request.has_set_msrs() {
+                const SIZE_OF_MSRS: usize = mem::size_of::<kvm_msrs>();
+                const SIZE_OF_ENTRY: usize = mem::size_of::<kvm_msr_entry>();
+                const ALIGN_OF_MSRS: usize = mem::align_of::<kvm_msrs>();
+                const ALIGN_OF_ENTRY: usize = mem::align_of::<kvm_msr_entry>();
+                const_assert!(ALIGN_OF_MSRS >= ALIGN_OF_ENTRY);
+
+                response.mut_set_msrs();
+                let request_entries = &request.get_set_msrs().entries;
+
+                let size = SIZE_OF_MSRS + request_entries.len() * SIZE_OF_ENTRY;
+                let layout =
+                    Layout::from_size_align(size, ALIGN_OF_MSRS).expect("impossible layout");
+                let mut allocation = LayoutAllocation::zeroed(layout);
+
+                // Safe to obtain an exclusive reference because there are no other
+                // references to the allocation yet and all-zero is a valid bit
+                // pattern.
+                let kvm_msrs = unsafe { allocation.as_mut::<kvm_msrs>() };
+
+                unsafe {
+                    // Mapping the unsized array to a slice is unsafe becase the length isn't known.
+                    // Providing the length used to create the struct guarantees the entire slice is
+                    // valid.
+                    let kvm_msr_entries: &mut [kvm_msr_entry] =
+                        kvm_msrs.entries.as_mut_slice(request_entries.len());
+                    for (msr_entry, entry) in kvm_msr_entries.iter_mut().zip(request_entries) {
+                        msr_entry.index = entry.index;
+                        msr_entry.data = entry.data;
+                    }
                 }
-            }
-            kvm_msrs.nmsrs = request_entries.len() as u32;
-            vcpu.set_msrs(&kvm_msrs)
-        } else if request.has_set_cpuid() {
-            response.mut_set_cpuid();
-            let request_entries = &request.get_set_cpuid().entries;
-            let mut cpuid = CpuId::new(request_entries.len());
-            let cpuid_entries = cpuid.mut_entries_slice();
-            for (request_entry, cpuid_entry) in request_entries.iter().zip(cpuid_entries) {
-                cpuid_entry.function = request_entry.function;
-                if request_entry.has_index {
-                    cpuid_entry.index = request_entry.index;
-                    cpuid_entry.flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+                kvm_msrs.nmsrs = request_entries.len() as u32;
+                vcpu.set_msrs(&kvm_msrs)
+            } else if request.has_set_cpuid() {
+                response.mut_set_cpuid();
+                let request_entries = &request.get_set_cpuid().entries;
+                let mut cpuid = CpuId::new(request_entries.len());
+                let cpuid_entries = cpuid.mut_entries_slice();
+                for (request_entry, cpuid_entry) in request_entries.iter().zip(cpuid_entries) {
+                    cpuid_entry.function = request_entry.function;
+                    if request_entry.has_index {
+                        cpuid_entry.index = request_entry.index;
+                        cpuid_entry.flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+                    }
+                    cpuid_entry.eax = request_entry.eax;
+                    cpuid_entry.ebx = request_entry.ebx;
+                    cpuid_entry.ecx = request_entry.ecx;
+                    cpuid_entry.edx = request_entry.edx;
                 }
-                cpuid_entry.eax = request_entry.eax;
-                cpuid_entry.ebx = request_entry.ebx;
-                cpuid_entry.ecx = request_entry.ecx;
-                cpuid_entry.edx = request_entry.edx;
-            }
-            vcpu.set_cpuid2(&cpuid)
-        } else {
-            Err(SysError::new(ENOTTY))
-        };
+                vcpu.set_cpuid2(&cpuid)
+            } else {
+                Err(SysError::new(ENOTTY))
+            };
 
-        if let Err(e) = res {
-            response.errno = e.errno();
+            if let Err(e) = res {
+                response.errno = e.errno();
+            }
         }
 
-        let mut response_buffer = self.response_buffer.borrow_mut();
-        response_buffer.clear();
-        response
-            .write_to_vec(&mut response_buffer)
-            .map_err(proto_to_sys_err)?;
-        self.connection
-            .send(&response_buffer[..])
-            .map_err(io_to_sys_err)?;
+        // Send the response, except if it's a resume response (in which case
+        // we'll go run the VM and afterwards send a wait response message).
+        if !response.has_resume() {
+            let mut response_buffer = self.response_buffer.borrow_mut();
+            response_buffer.clear();
+            response
+                .write_to_vec(&mut response_buffer)
+                .map_err(proto_to_sys_err)?;
+            self.connection
+                .send(&response_buffer[..])
+                .map_err(io_to_sys_err)?;
+        }
 
         Ok(resume_data)
     }