summary refs log tree commit diff
path: root/src/linux.rs
diff options
context:
space:
mode:
authorAlyssa Ross <hi@alyssa.is>2020-06-13 08:17:27 +0000
committerAlyssa Ross <hi@alyssa.is>2020-06-15 08:33:57 +0000
commit5bd2e7f606b726b598fdc26fe26a9b4029fab6d3 (patch)
tree2f12d905d51580d78b75af45b48a3588e2497d45 /src/linux.rs
parent89059ee485b2eefdecc1e55daa14bcb857e6d2f1 (diff)
downloadcrosvm-5bd2e7f606b726b598fdc26fe26a9b4029fab6d3.tar
crosvm-5bd2e7f606b726b598fdc26fe26a9b4029fab6d3.tar.gz
crosvm-5bd2e7f606b726b598fdc26fe26a9b4029fab6d3.tar.bz2
crosvm-5bd2e7f606b726b598fdc26fe26a9b4029fab6d3.tar.lz
crosvm-5bd2e7f606b726b598fdc26fe26a9b4029fab6d3.tar.xz
crosvm-5bd2e7f606b726b598fdc26fe26a9b4029fab6d3.tar.zst
crosvm-5bd2e7f606b726b598fdc26fe26a9b4029fab6d3.zip
crosvm: fix deadlock on early VmRequest
If a DiskCommand was received on the crosvm socket before the
virtio-block device was activated, the Token::VmRequest case in the
main event loop would forward the request to the block device socket,
and then wait syncronously for a response.  That response would never
come because the device hadn't been activated, and it would never be
activated because the event loop would never continue, and therefore
never be able to respond to the event that causes the device to be
activated.  crosvm would therefore just hang forever, waiting for a
response that would never come.

This patch fixes this deadlock by keeping track of whether devices
that send a response in this way have been activated yet.  If they
have already been activated, messages are sent and responses are
received as normal.  If they have not been activated, messages are
instead put into a per-device queue.  Once the device is activated,
queued messages are processed all at once, and then the device is
marked as ready, and the queue is dropped.  Future messages are
processed immediately as they come in, with no further queueing.

A device indicates that it is ready by sending a message on its
socket.  The main crosvm event loop can then poll the socket, to be
notified when the device is ready.  This poll event will only trigger
once -- once it has been received, it is removed from the poll
context.

Currently, the only device type that responds to external control
messages AND needs to be activated by an event is the block device.
The balloon device does not respond to messages, and the xhci
controller device is activated up front.  The code is nevertheless
structured so that it should be very easy to drop another kind of
device in to the queuing system, should that be required.

Message-Id: <20200614114344.22642-3-hi@alyssa.is>
Notes
Reviewed-by: Cole Helbling <cole.e.helbling@outlook.com>
Diffstat (limited to 'src/linux.rs')
-rw-r--r--src/linux.rs201
1 files changed, 167 insertions, 34 deletions
diff --git a/src/linux.rs b/src/linux.rs
index ec2067c..ccd7d88 100644
--- a/src/linux.rs
+++ b/src/linux.rs
@@ -3,6 +3,7 @@
 // found in the LICENSE file.
 
 use std::cmp::max;
+use std::collections::{BTreeMap, VecDeque};
 use std::convert::TryFrom;
 use std::error::Error as StdError;
 use std::ffi::CStr;
@@ -32,12 +33,12 @@ use acpi_tables::sdt::SDT;
 use devices::virtio::EventDevice;
 use devices::virtio::{self, Console, VirtioDevice};
 use devices::{
-    self, Ac97Backend, Ac97Dev, HostBackendDeviceProvider, PciDevice, VfioContainer, VfioDevice,
-    VfioPciDevice, VirtioPciDevice, XhciController,
+    self, Ac97Backend, Ac97Dev, Bus, HostBackendDeviceProvider, PciDevice, VfioContainer,
+    VfioDevice, VfioPciDevice, VirtioPciDevice, XhciController,
 };
 use io_jail::{self, Minijail};
 use kvm::*;
-use msg_socket::{MsgError, MsgReceiver, MsgSender, MsgSocket};
+use msg_socket::{MsgError, MsgReceiver, MsgResult, MsgSender, MsgSocket};
 use net_util::{Error as NetError, MacAddress, Tap};
 use remain::sorted;
 use resources::{Alloc, MmioType, SystemAllocator};
@@ -45,7 +46,7 @@ use sync::{Condvar, Mutex};
 use sys_util::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
 
 use sys_util::{
-    self, block_signal, clear_signal, drop_capabilities, error, flock, get_blocked_signals,
+    self, block_signal, clear_signal, debug, drop_capabilities, error, flock, get_blocked_signals,
     get_group_id, get_user_id, getegid, geteuid, info, register_rt_signal_handler,
     set_cpu_affinity, validate_raw_fd, warn, EventFd, FlockOperation, GuestAddress, GuestMemory,
     Killable, MemoryMappingArena, PollContext, PollToken, Protection, ScopedEvent, SignalFd,
@@ -57,7 +58,7 @@ use vm_control::{
     DiskControlResult, UsbControlSocket, VmControlResponseSocket, VmIrqRequest, VmIrqResponse,
     VmIrqResponseSocket, VmMemoryControlRequestSocket, VmMemoryControlResponseSocket,
     VmMemoryRequest, VmMemoryResponse, VmMsyncRequest, VmMsyncRequestSocket, VmMsyncResponse,
-    VmMsyncResponseSocket, VmRunMode,
+    VmMsyncResponseSocket, VmRequest, VmRunMode,
 };
 
 use crate::{Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption};
@@ -1667,6 +1668,45 @@ fn file_to_i64<P: AsRef<Path>>(path: P) -> io::Result<i64> {
         .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "empty file"))
 }
 
+/// Returns a boolean indicating whether the VM should be exited.
+fn do_vm_request(
+    request: VmRequest,
+    device_socket: Option<&UnixSeqpacket>,
+    control_socket: &VmControlResponseSocket,
+    run_mode_arc: &Arc<VcpuRunMode>,
+    vcpu_handles: &mut Vec<JoinHandle<()>>,
+    io_bus: &mut Bus,
+) -> MsgResult<bool> {
+    let mut run_mode_opt = None;
+    let response = request.execute(&mut run_mode_opt, device_socket);
+    control_socket.send(&response)?;
+    if let Some(run_mode) = run_mode_opt {
+        info!("control socket changed run mode to {}", run_mode);
+        match run_mode {
+            VmRunMode::Exiting => Ok(true),
+            VmRunMode::Running => {
+                if let VmRunMode::Suspending = *run_mode_arc.mtx.lock() {
+                    io_bus.notify_resume();
+                }
+                run_mode_arc.set_and_notify(VmRunMode::Running);
+                for handle in vcpu_handles {
+                    let _ = handle.kill(SIGRTMIN() + 0);
+                }
+                Ok(false)
+            }
+            other => {
+                run_mode_arc.set_and_notify(other);
+                for handle in vcpu_handles {
+                    let _ = handle.kill(SIGRTMIN() + 0);
+                }
+                Ok(false)
+            }
+        }
+    } else {
+        Ok(false)
+    }
+}
+
 pub fn run_config(cfg: Config) -> Result<()> {
     if cfg.sandbox {
         // Printing something to the syslog before entering minijail so that libc's syslogger has a
@@ -1818,7 +1858,7 @@ fn run_control(
 ) -> Result<()> {
     const LOWMEM_AVAILABLE: &str = "/sys/kernel/mm/chromeos-low_mem/available";
 
-    #[derive(PollToken)]
+    #[derive(Debug, PollToken)]
     enum Token {
         Exit,
         Suspend,
@@ -1828,6 +1868,7 @@ fn run_control(
         BalloonResult,
         VmControlServer,
         VmControl { index: usize },
+        DeviceReady { sock_fd: RawFd },
     }
 
     stdin()
@@ -1912,6 +1953,38 @@ fn run_control(
     }
     vcpu_thread_barrier.wait();
 
+    struct QueuedDeviceReq {
+        request: VmRequest,
+        control_sock_index: usize,
+    }
+
+    /// A device can either be waiting, or ready.
+    ///
+    /// If it's waiting, we queue up events to send to it once it's ready.  We can't just send the
+    /// requests straight away and let them sit on the socket until the device comes up, because we
+    /// want to syncronously wait for a response after sending.  If the device hasn't been activated
+    /// when we do this, we'd sit waiting for a response forever and never activate the device.
+    enum DeviceStatus<'a> {
+        Waiting(&'a UnixSeqpacket, VecDeque<QueuedDeviceReq>),
+        Ready,
+    }
+
+    let mut queued_device_reqs = BTreeMap::<RawFd, DeviceStatus>::new();
+
+    // Each socket will send a ready message when it's ready to receive requests.  Add the sockets
+    // to the event loop, so that when they become ready, we can process their queues.  After the
+    // initial queue is processed, the device becomes ready, and the socket is removed from the
+    // event loop.
+    for socket in disk_host_sockets.iter().map(AsRef::as_ref) {
+        let token = Token::DeviceReady {
+            sock_fd: socket.as_raw_fd(),
+        };
+        poll_ctx.add(socket, token).map_err(Error::PollContextAdd)?;
+
+        let status = DeviceStatus::Waiting(socket, VecDeque::new());
+        queued_device_reqs.insert(socket.as_raw_fd(), status);
+    }
+
     let mut ioapic_delayed = Vec::<usize>::default();
     'poll: loop {
         let events = {
@@ -2110,40 +2183,44 @@ fn run_control(
                         match socket {
                             TaggedControlSocket::Vm(socket) => match socket.recv() {
                                 Ok(request) => {
-                                    let mut run_mode_opt = None;
-                                    let response = request.execute(
-                                        &mut run_mode_opt,
+                                    let device_socket = request.socket(
                                         &balloon_host_socket,
-                                        disk_host_sockets,
+                                        &disk_host_sockets,
                                         &usb_control_socket,
                                     );
-                                    if let Err(e) = socket.send(&response) {
-                                        error!("failed to send VmResponse: {}", e);
-                                    }
-                                    if let Some(run_mode) = run_mode_opt {
-                                        info!("control socket changed run mode to {}", run_mode);
-                                        match run_mode {
-                                            VmRunMode::Exiting => {
-                                                break 'poll;
-                                            }
-                                            VmRunMode::Running => {
-                                                if let VmRunMode::Suspending =
-                                                    *run_mode_arc.mtx.lock()
-                                                {
-                                                    linux.io_bus.notify_resume();
-                                                }
-                                                run_mode_arc.set_and_notify(VmRunMode::Running);
-                                                for handle in &vcpu_handles {
-                                                    let _ = handle.kill(SIGRTMIN() + 0);
-                                                }
-                                            }
-                                            other => {
-                                                run_mode_arc.set_and_notify(other);
-                                                for handle in &vcpu_handles {
-                                                    let _ = handle.kill(SIGRTMIN() + 0);
+
+                                    match device_socket.map(|s| {
+                                        queued_device_reqs.get_mut(&s.as_raw_fd()).unwrap()
+                                    }) {
+                                        None | Some(DeviceStatus::Ready) => {
+                                            match do_vm_request(
+                                                request,
+                                                device_socket,
+                                                socket,
+                                                &run_mode_arc,
+                                                &mut vcpu_handles,
+                                                &mut linux.io_bus,
+                                            ) {
+                                                Ok(true) => break 'poll,
+                                                Ok(false) => {}
+                                                Err(e) => {
+                                                    error!("failed to handle VmRequest: {}", e);
+                                                    continue 'poll;
                                                 }
                                             }
                                         }
+
+                                        Some(DeviceStatus::Waiting(_, queue)) => {
+                                            debug!(
+                                                "Device {} not yet ready. Queueing request.",
+                                                device_socket.unwrap().as_raw_fd()
+                                            );
+
+                                            queue.push_back(QueuedDeviceReq {
+                                                request,
+                                                control_sock_index: index,
+                                            });
+                                        }
                                     }
                                 }
                                 Err(e) => {
@@ -2204,6 +2281,61 @@ fn run_control(
                         }
                     }
                 }
+                Token::DeviceReady { sock_fd } => {
+                    if let Some(DeviceStatus::Waiting(device_sock, mut queue)) =
+                        queued_device_reqs.remove(&sock_fd)
+                    {
+                        debug!(
+                            "Device {} is ready. Processing its queue of {} item(s).",
+                            sock_fd,
+                            queue.len()
+                        );
+
+                        // Deal with the message sent to indicate readiness.  Its contents don't matter.
+                        if let Err(e) = device_sock.recv(&mut []) {
+                            error!(
+                                "Failed to recv ready notification from device socket: {}",
+                                e
+                            );
+                            continue 'poll;
+                        }
+
+                        while let Some(QueuedDeviceReq {
+                            request,
+                            control_sock_index,
+                        }) = queue.pop_front()
+                        {
+                            let control_sock = match control_sockets.get(control_sock_index) {
+                                Some(TaggedControlSocket::Vm(s)) => s,
+                                _ => unreachable!(),
+                            };
+
+                            match do_vm_request(
+                                request,
+                                Some(device_sock),
+                                control_sock,
+                                &run_mode_arc,
+                                &mut vcpu_handles,
+                                &mut linux.io_bus,
+                            ) {
+                                Ok(true) => break 'poll,
+                                Ok(false) => {}
+                                Err(e) => {
+                                    error!("failed to handle VmRequest: {}", e);
+                                    continue 'poll;
+                                }
+                            }
+                        }
+
+                        if let Err(e) = poll_ctx.delete(device_sock) {
+                            error!("failed to delete poll token: {}", e);
+                        }
+                    } else {
+                        error!("Received ready notification for a device that isn't waiting");
+                    }
+
+                    queued_device_reqs.insert(sock_fd, DeviceStatus::Ready);
+                }
             }
         }
 
@@ -2229,6 +2361,7 @@ fn run_control(
                         _ => {}
                     }
                 }
+                Token::DeviceReady { .. } => {}
             }
         }