diff options
-rw-r--r-- | crosvm_plugin/crosvm.h | 16 | ||||
-rw-r--r-- | crosvm_plugin/src/lib.rs | 26 | ||||
-rw-r--r-- | src/plugin/mod.rs | 2 | ||||
-rw-r--r-- | src/plugin/vcpu.rs | 257 |
4 files changed, 175 insertions, 126 deletions
diff --git a/crosvm_plugin/crosvm.h b/crosvm_plugin/crosvm.h index 81c8ade..d7a036c 100644 --- a/crosvm_plugin/crosvm.h +++ b/crosvm_plugin/crosvm.h @@ -235,7 +235,7 @@ int crosvm_get_pic_state(struct crosvm *, bool __primary, /* Sets the state of interrupt controller in a VM. */ int crosvm_set_pic_state(struct crosvm *, bool __primary, - struct kvm_pic_state *__pic_state); + const struct kvm_pic_state *__pic_state); /* Gets the state of IOAPIC in a VM. */ int crosvm_get_ioapic_state(struct crosvm *, @@ -243,7 +243,7 @@ int crosvm_get_ioapic_state(struct crosvm *, /* Sets the state of IOAPIC in a VM. */ int crosvm_set_ioapic_state(struct crosvm *, - struct kvm_ioapic_state *__ioapic_state); + const struct kvm_ioapic_state *__ioapic_state); /* Gets the state of interrupt controller in a VM. */ int crosvm_get_pit_state(struct crosvm *, struct kvm_pit_state2 *__pit_state); @@ -280,7 +280,7 @@ int crosvm_pause_vcpus(struct crosvm*, uint64_t __cpu_mask, void* __user); int crosvm_start(struct crosvm*); /* - * Registers an eventfd that is triggered asynchronously on write in |__space| + * Allocates an eventfd that is triggered asynchronously on write in |__space| * at the given |__addr|. * * If |__datamatch| is non-NULL, it must be contain |__length| bytes that will @@ -288,8 +288,8 @@ int crosvm_start(struct crosvm*); * the eventfd if equal. If datamatch is NULL all writes to the address will * trigger the eventfd. * - * On successful registration, returns a non-negative eventfd owned by the - * caller. + * On successful allocation, returns a crosvm_io. Obtain the actual fd + * by passing this result to crosvm_io_event_fd(). */ int crosvm_create_io_event(struct crosvm*, uint32_t __space, uint64_t __addr, uint32_t __len, const uint8_t* __datamatch, @@ -423,6 +423,10 @@ struct crosvm_vcpu_event { * than 8 bytes, such as by AVX-512 instructions, multiple vcpu access * events are generated serially to cover each 8 byte fragment of the * access. + * + * Larger I/O accesses are possible. "rep in" can generate I/Os larger + * than 8 bytes, though such accesses can also be split into multiple + * events. Currently kvm doesn't seem to batch "rep out" I/Os. */ uint32_t length; @@ -435,7 +439,7 @@ struct crosvm_vcpu_event { uint8_t _reserved[3]; } io_access; - /* CROSVM_VCPU_EVENT_KIND_USER */ + /* CROSVM_VCPU_EVENT_KIND_PAUSED */ void *user; uint8_t _reserved[64]; diff --git a/crosvm_plugin/src/lib.rs b/crosvm_plugin/src/lib.rs index a334505..d1bb665 100644 --- a/crosvm_plugin/src/lib.rs +++ b/crosvm_plugin/src/lib.rs @@ -920,6 +920,7 @@ pub struct crosvm_vcpu_event { pub struct crosvm_vcpu { socket: UnixDatagram, + send_init: bool, request_buffer: Vec<u8>, response_buffer: Vec<u8>, resume_data: Vec<u8>, @@ -929,13 +930,13 @@ impl crosvm_vcpu { fn new(socket: UnixDatagram) -> crosvm_vcpu { crosvm_vcpu { socket, + send_init: true, request_buffer: Vec::new(), response_buffer: vec![0; MAX_DATAGRAM_SIZE], resume_data: Vec::new(), } } - - fn vcpu_transaction(&mut self, request: &VcpuRequest) -> result::Result<VcpuResponse, c_int> { + fn vcpu_send(&mut self, request: &VcpuRequest) -> result::Result<(), c_int> { self.request_buffer.clear(); request .write_to_vec(&mut self.request_buffer) @@ -943,7 +944,10 @@ impl crosvm_vcpu { self.socket .send(self.request_buffer.as_slice()) .map_err(|e| -e.raw_os_error().unwrap_or(EINVAL))?; + Ok(()) + } + fn vcpu_recv(&mut self) -> result::Result<VcpuResponse, c_int> { let msg_size = self .socket .recv(&mut self.response_buffer) @@ -957,10 +961,20 @@ impl crosvm_vcpu { Ok(response) } + fn vcpu_transaction(&mut self, request: &VcpuRequest) -> result::Result<VcpuResponse, c_int> { + self.vcpu_send(request)?; + let response: VcpuResponse = self.vcpu_recv()?; + Ok(response) + } + fn wait(&mut self, event: &mut crosvm_vcpu_event) -> result::Result<(), c_int> { - let mut r = VcpuRequest::new(); - r.mut_wait(); - let mut response: VcpuResponse = self.vcpu_transaction(&r)?; + if self.send_init { + self.send_init = false; + let mut r = VcpuRequest::new(); + r.mut_wait(); + self.vcpu_send(&r)?; + } + let mut response: VcpuResponse = self.vcpu_recv()?; if !response.has_wait() { return Err(EPROTO); } @@ -997,7 +1011,7 @@ impl crosvm_vcpu { let resume: &mut VcpuRequest_Resume = r.mut_resume(); swap(&mut resume.data, &mut self.resume_data); - self.vcpu_transaction(&r)?; + self.vcpu_send(&r)?; Ok(()) } diff --git a/src/plugin/mod.rs b/src/plugin/mod.rs index d0809c6..128b434 100644 --- a/src/plugin/mod.rs +++ b/src/plugin/mod.rs @@ -364,7 +364,7 @@ pub fn run_vcpus( match run_res { Ok(run) => match run { VcpuExit::IoIn { port, mut size } => { - let mut data = [0; 8]; + let mut data = [0; 256]; if size > data.len() { error!("unsupported IoIn size of {} bytes", size); size = data.len(); diff --git a/src/plugin/vcpu.rs b/src/plugin/vcpu.rs index d19a5f8..17cd875 100644 --- a/src/plugin/vcpu.rs +++ b/src/plugin/vcpu.rs @@ -389,133 +389,164 @@ impl PluginVcpu { } fn handle_request(&self, vcpu: &Vcpu) -> SysResult<Option<Vec<u8>>> { + let mut wait_reason = self.wait_reason.take(); + let mut do_recv = true; let mut resume_data = None; - let mut request_buffer = self.request_buffer.borrow_mut(); - request_buffer.resize(MAX_VCPU_DATAGRAM_SIZE, 0); + let mut response = VcpuResponse::new(); + + // Typically a response is sent for every request received. The odd (yet common) + // case is when a resume request is received. This function will skip sending + // a resume reply, and instead we'll go run the VM and then later reply with a wait + // response message. This code block handles checking if a wait reason is pending (where + // the wait reason isn't the first-time init [first time init needs to first + // receive a wait request from the plugin]) to send it as a reply before doing a recv() + // for the next request. Note that if a wait reply is pending then this function + // will send the reply and do nothing else--the expectation is that handle_until_resume() + // is the only caller of this function, so the function will immediately get called again + // and this second call will no longer see a pending wait reason and do a recv() for the + // next message. + if let Some(reason) = wait_reason { + if reason.has_init() { + wait_reason = Some(reason); + } else { + response.set_wait(reason); + do_recv = false; + wait_reason = None; + } + } - let msg_size = self - .connection - .recv(&mut request_buffer) - .map_err(io_to_sys_err)?; + if do_recv { + let mut request_buffer = self.request_buffer.borrow_mut(); + request_buffer.resize(MAX_VCPU_DATAGRAM_SIZE, 0); - let mut request = protobuf::parse_from_bytes::<VcpuRequest>(&request_buffer[..msg_size]) - .map_err(proto_to_sys_err)?; + let msg_size = self + .connection + .recv(&mut request_buffer) + .map_err(io_to_sys_err)?; - let wait_reason = self.wait_reason.take(); + let mut request = + protobuf::parse_from_bytes::<VcpuRequest>(&request_buffer[..msg_size]) + .map_err(proto_to_sys_err)?; - let mut response = VcpuResponse::new(); - let res = if request.has_wait() { - match wait_reason { - Some(wait_reason) => { - response.set_wait(wait_reason); - Ok(()) + let res = if request.has_wait() { + match wait_reason { + Some(wait_reason) => { + response.set_wait(wait_reason); + Ok(()) + } + None => Err(SysError::new(EPROTO)), } - None => Err(SysError::new(EPROTO)), - } - } else if wait_reason.is_some() { - // Any request other than getting the wait_reason while there is one pending is invalid. - self.wait_reason.set(wait_reason); - Err(SysError::new(EPROTO)) - } else if request.has_resume() { - response.mut_resume(); - resume_data = Some(request.take_resume().take_data()); - Ok(()) - } else if request.has_get_state() { - let response_state = response.mut_get_state(); - match get_vcpu_state(vcpu, request.get_get_state().set) { - Ok(state) => { - response_state.state = state; - Ok(()) + } else if wait_reason.is_some() { + // Any request other than getting the wait_reason while there is one pending is invalid. + self.wait_reason.set(wait_reason); + Err(SysError::new(EPROTO)) + } else if request.has_resume() { + response.mut_resume(); + resume_data = Some(request.take_resume().take_data()); + Ok(()) + } else if request.has_get_state() { + let response_state = response.mut_get_state(); + match get_vcpu_state(vcpu, request.get_get_state().set) { + Ok(state) => { + response_state.state = state; + Ok(()) + } + Err(e) => Err(e), } - Err(e) => Err(e), - } - } else if request.has_set_state() { - response.mut_set_state(); - let set_state = request.get_set_state(); - set_vcpu_state(vcpu, set_state.set, set_state.get_state()) - } else if request.has_get_msrs() { - let entry_data = &mut response.mut_get_msrs().entry_data; - let entry_indices = &request.get_get_msrs().entry_indices; - let mut msr_entries = Vec::with_capacity(entry_indices.len()); - for &index in entry_indices { - msr_entries.push(kvm_msr_entry { - index, - ..Default::default() - }); - } - match vcpu.get_msrs(&mut msr_entries) { - Ok(()) => { - for msr_entry in msr_entries { - entry_data.push(msr_entry.data); + } else if request.has_set_state() { + response.mut_set_state(); + let set_state = request.get_set_state(); + set_vcpu_state(vcpu, set_state.set, set_state.get_state()) + } else if request.has_get_msrs() { + let entry_data = &mut response.mut_get_msrs().entry_data; + let entry_indices = &request.get_get_msrs().entry_indices; + let mut msr_entries = Vec::with_capacity(entry_indices.len()); + for &index in entry_indices { + msr_entries.push(kvm_msr_entry { + index, + ..Default::default() + }); + } + match vcpu.get_msrs(&mut msr_entries) { + Ok(()) => { + for msr_entry in msr_entries { + entry_data.push(msr_entry.data); + } + Ok(()) } - Ok(()) + Err(e) => Err(e), } - Err(e) => Err(e), - } - } else if request.has_set_msrs() { - const SIZE_OF_MSRS: usize = mem::size_of::<kvm_msrs>(); - const SIZE_OF_ENTRY: usize = mem::size_of::<kvm_msr_entry>(); - const ALIGN_OF_MSRS: usize = mem::align_of::<kvm_msrs>(); - const ALIGN_OF_ENTRY: usize = mem::align_of::<kvm_msr_entry>(); - const_assert!(ALIGN_OF_MSRS >= ALIGN_OF_ENTRY); - - response.mut_set_msrs(); - let request_entries = &request.get_set_msrs().entries; - let size = SIZE_OF_MSRS + request_entries.len() * SIZE_OF_ENTRY; - let layout = Layout::from_size_align(size, ALIGN_OF_MSRS).expect("impossible layout"); - let mut allocation = LayoutAllocation::zeroed(layout); - - // Safe to obtain an exclusive reference because there are no other - // references to the allocation yet and all-zero is a valid bit - // pattern. - let kvm_msrs = unsafe { allocation.as_mut::<kvm_msrs>() }; - - unsafe { - // Mapping the unsized array to a slice is unsafe becase the length isn't known. - // Providing the length used to create the struct guarantees the entire slice is - // valid. - let kvm_msr_entries: &mut [kvm_msr_entry] = - kvm_msrs.entries.as_mut_slice(request_entries.len()); - for (msr_entry, entry) in kvm_msr_entries.iter_mut().zip(request_entries) { - msr_entry.index = entry.index; - msr_entry.data = entry.data; + } else if request.has_set_msrs() { + const SIZE_OF_MSRS: usize = mem::size_of::<kvm_msrs>(); + const SIZE_OF_ENTRY: usize = mem::size_of::<kvm_msr_entry>(); + const ALIGN_OF_MSRS: usize = mem::align_of::<kvm_msrs>(); + const ALIGN_OF_ENTRY: usize = mem::align_of::<kvm_msr_entry>(); + const_assert!(ALIGN_OF_MSRS >= ALIGN_OF_ENTRY); + + response.mut_set_msrs(); + let request_entries = &request.get_set_msrs().entries; + + let size = SIZE_OF_MSRS + request_entries.len() * SIZE_OF_ENTRY; + let layout = + Layout::from_size_align(size, ALIGN_OF_MSRS).expect("impossible layout"); + let mut allocation = LayoutAllocation::zeroed(layout); + + // Safe to obtain an exclusive reference because there are no other + // references to the allocation yet and all-zero is a valid bit + // pattern. + let kvm_msrs = unsafe { allocation.as_mut::<kvm_msrs>() }; + + unsafe { + // Mapping the unsized array to a slice is unsafe becase the length isn't known. + // Providing the length used to create the struct guarantees the entire slice is + // valid. + let kvm_msr_entries: &mut [kvm_msr_entry] = + kvm_msrs.entries.as_mut_slice(request_entries.len()); + for (msr_entry, entry) in kvm_msr_entries.iter_mut().zip(request_entries) { + msr_entry.index = entry.index; + msr_entry.data = entry.data; + } } - } - kvm_msrs.nmsrs = request_entries.len() as u32; - vcpu.set_msrs(&kvm_msrs) - } else if request.has_set_cpuid() { - response.mut_set_cpuid(); - let request_entries = &request.get_set_cpuid().entries; - let mut cpuid = CpuId::new(request_entries.len()); - let cpuid_entries = cpuid.mut_entries_slice(); - for (request_entry, cpuid_entry) in request_entries.iter().zip(cpuid_entries) { - cpuid_entry.function = request_entry.function; - if request_entry.has_index { - cpuid_entry.index = request_entry.index; - cpuid_entry.flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + kvm_msrs.nmsrs = request_entries.len() as u32; + vcpu.set_msrs(&kvm_msrs) + } else if request.has_set_cpuid() { + response.mut_set_cpuid(); + let request_entries = &request.get_set_cpuid().entries; + let mut cpuid = CpuId::new(request_entries.len()); + let cpuid_entries = cpuid.mut_entries_slice(); + for (request_entry, cpuid_entry) in request_entries.iter().zip(cpuid_entries) { + cpuid_entry.function = request_entry.function; + if request_entry.has_index { + cpuid_entry.index = request_entry.index; + cpuid_entry.flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + } + cpuid_entry.eax = request_entry.eax; + cpuid_entry.ebx = request_entry.ebx; + cpuid_entry.ecx = request_entry.ecx; + cpuid_entry.edx = request_entry.edx; } - cpuid_entry.eax = request_entry.eax; - cpuid_entry.ebx = request_entry.ebx; - cpuid_entry.ecx = request_entry.ecx; - cpuid_entry.edx = request_entry.edx; - } - vcpu.set_cpuid2(&cpuid) - } else { - Err(SysError::new(ENOTTY)) - }; + vcpu.set_cpuid2(&cpuid) + } else { + Err(SysError::new(ENOTTY)) + }; - if let Err(e) = res { - response.errno = e.errno(); + if let Err(e) = res { + response.errno = e.errno(); + } } - let mut response_buffer = self.response_buffer.borrow_mut(); - response_buffer.clear(); - response - .write_to_vec(&mut response_buffer) - .map_err(proto_to_sys_err)?; - self.connection - .send(&response_buffer[..]) - .map_err(io_to_sys_err)?; + // Send the response, except if it's a resume response (in which case + // we'll go run the VM and afterwards send a wait response message). + if !response.has_resume() { + let mut response_buffer = self.response_buffer.borrow_mut(); + response_buffer.clear(); + response + .write_to_vec(&mut response_buffer) + .map_err(proto_to_sys_err)?; + self.connection + .send(&response_buffer[..]) + .map_err(io_to_sys_err)?; + } Ok(resume_data) } |