diff options
author | Alyssa Ross <hi@alyssa.is> | 2022-02-15 12:42:28 +0000 |
---|---|---|
committer | Alyssa Ross <hi@alyssa.is> | 2022-02-15 12:42:28 +0000 |
commit | bbaff80fb27c00f1e9a6d083f2f5914bd79990a4 (patch) | |
tree | 1c8797e9b0ab29d102a035a1ac98c3996f9f1e1d /host | |
parent | 00d04c9a83991ab7d89c58a1e6b127e9b3131fe3 (diff) | |
download | spectrum-bbaff80fb27c00f1e9a6d083f2f5914bd79990a4.tar spectrum-bbaff80fb27c00f1e9a6d083f2f5914bd79990a4.tar.gz spectrum-bbaff80fb27c00f1e9a6d083f2f5914bd79990a4.tar.bz2 spectrum-bbaff80fb27c00f1e9a6d083f2f5914bd79990a4.tar.lz spectrum-bbaff80fb27c00f1e9a6d083f2f5914bd79990a4.tar.xz spectrum-bbaff80fb27c00f1e9a6d083f2f5914bd79990a4.tar.zst spectrum-bbaff80fb27c00f1e9a6d083f2f5914bd79990a4.zip |
host/start-vm: clean up net after VM shuts down
Diffstat (limited to 'host')
-rw-r--r-- | host/start-vm/ch.h | 12 | ||||
-rw-r--r-- | host/start-vm/ch.rs | 126 | ||||
-rw-r--r-- | host/start-vm/net-util.c | 54 | ||||
-rw-r--r-- | host/start-vm/net-util.h | 3 | ||||
-rw-r--r-- | host/start-vm/net.c | 230 | ||||
-rw-r--r-- | host/start-vm/start-vm.rs | 1 |
6 files changed, 303 insertions, 123 deletions
diff --git a/host/start-vm/ch.h b/host/start-vm/ch.h new file mode 100644 index 0000000..661208d --- /dev/null +++ b/host/start-vm/ch.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: EUPL-1.2 +// SPDX-FileCopyrightText: 2022 Alyssa Ross <hi@alyssa.is> + +#include <stdint.h> + +struct ch_device; + +int ch_add_net(const char *vm_name, int tap, const uint8_t mac[6], + struct ch_device **out); +int ch_remove_device(const char *vm_name, struct ch_device *); + +void ch_device_free(struct ch_device *); diff --git a/host/start-vm/ch.rs b/host/start-vm/ch.rs new file mode 100644 index 0000000..04da710 --- /dev/null +++ b/host/start-vm/ch.rs @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: EUPL-1.2 +// SPDX-FileCopyrightText: 2022 Alyssa Ross <hi@alyssa.is> + +use std::ffi::{CStr, OsStr, OsString}; +use std::num::NonZeroI32; +use std::os::raw::{c_char, c_int}; +use std::os::unix::prelude::*; +use std::process::{Command, Stdio}; + +use crate::format_mac; + +// Trivially safe. +const EPERM: NonZeroI32 = unsafe { NonZeroI32::new_unchecked(1) }; +const EPROTO: NonZeroI32 = unsafe { NonZeroI32::new_unchecked(71) }; + +fn command(vm_name: &OsStr, s: impl AsRef<OsStr>) -> Command { + let mut api_socket_path = OsString::from("/run/service/ext-"); + api_socket_path.push(vm_name); + api_socket_path.push("-vmm/env/cloud-hypervisor.sock"); + + let mut command = Command::new("ch-remote"); + command.stdin(Stdio::null()); + command.arg("--api-socket"); + command.arg(api_socket_path); + command.arg(s); + command +} + +pub fn add_net(vm_name: &OsStr, tap: RawFd, mac: &str) -> Result<OsString, NonZeroI32> { + let mut ch_remote = command(vm_name, "add-net") + .arg(format!("fd={},mac={}", tap, mac)) + .stdout(Stdio::piped()) + .spawn() + .or(Err(EPERM))?; + + let jq_out = match Command::new("jq") + .args(&["-j", ".id"]) + .stdin(ch_remote.stdout.take().unwrap()) + .stderr(Stdio::inherit()) + .output() + { + Ok(o) => o, + Err(_) => { + // Try not to leave a zombie. + let _ = ch_remote.kill(); + let _ = ch_remote.wait(); + return Err(EPERM); + } + }; + + if let Ok(ch_remote_status) = ch_remote.wait() { + if ch_remote_status.success() && jq_out.status.success() { + return Ok(OsString::from_vec(jq_out.stdout)); + } + } + + Err(EPROTO) +} + +pub fn remove_device(vm_name: &OsStr, device_id: &OsStr) -> Result<(), NonZeroI32> { + let ch_remote = command(vm_name, "remove-device") + .arg(device_id) + .status() + .or(Err(EPERM))?; + + if ch_remote.success() { + Ok(()) + } else { + Err(EPROTO) + } +} + +/// # Safety +/// +/// - `vm_name` must point to a valid C string. +/// - `tap` must be a file descriptor describing an tap device. +/// - `mac` must be a valid pointer. +#[export_name = "ch_add_net"] +unsafe extern "C" fn add_net_c( + vm_name: *const c_char, + tap: RawFd, + mac: *const [u8; 6], + id: *mut *mut OsString, +) -> c_int { + let vm_name = CStr::from_ptr(vm_name); + let mac = format_mac(&*mac); + + match add_net(OsStr::from_bytes(vm_name.to_bytes()), tap, &mac) { + Err(e) => e.get(), + Ok(id_str) => { + if !id.is_null() { + let token = Box::into_raw(Box::new(id_str)); + *id = token; + } + 0 + } + } +} + +/// # Safety +/// +/// - `vm_name` must point to a valid C string. +/// - `id` must be a device ID obtained by calling `add_net_c`. After +/// calling `device_free`, the pointer is no longer valid. +#[export_name = "ch_remove_device"] +unsafe extern "C" fn remove_device_c(vm_name: *const c_char, device_id: *mut OsString) -> c_int { + let vm_name = CStr::from_ptr(vm_name); + let device_id = Box::from_raw(device_id); + + if let Err(e) = remove_device(OsStr::from_bytes(vm_name.to_bytes()), device_id.as_ref()) { + e.get() + } else { + 0 + } +} + +/// # Safety +/// +/// `id` must be a device ID obtained by calling `add_net_c`. After +/// calling `device_free`, the pointer is no longer valid. +#[export_name = "ch_device_free"] +unsafe extern "C" fn device_free(id: *mut OsString) { + if !id.is_null() { + drop(Box::from_raw(id)) + } +} diff --git a/host/start-vm/net-util.c b/host/start-vm/net-util.c index 70d9385..44f8e8a 100644 --- a/host/start-vm/net-util.c +++ b/host/start-vm/net-util.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: EUPL-1.2 // SPDX-FileCopyrightText: 2022 Alyssa Ross <hi@alyssa.is> +#include <err.h> #include <fcntl.h> #include <net/if.h> #include <string.h> @@ -32,6 +33,39 @@ out: return r; } +int if_rename(const char *name, const char *newname) +{ + int fd, r; + struct ifreq ifr; + + strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name); + strncpy(ifr.ifr_newname, newname, sizeof ifr.ifr_newname); + + if ((fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0)) == -1) + return -1; + r = ioctl(fd, SIOCSIFNAME, &ifr); + close(fd); + return r; +} + +int if_down(const char *name) +{ + struct ifreq ifr; + int fd, r = -1; + + if ((fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0)) == -1) + return -1; + + strncpy(ifr.ifr_name, name, IFNAMSIZ); + if (ioctl(fd, SIOCGIFFLAGS, &ifr) == -1) + goto out; + ifr.ifr_flags &= ~IFF_UP; + r = ioctl(fd, SIOCSIFFLAGS, &ifr); +out: + close(fd); + return r; +} + int bridge_add(const char *name) { int fd, r; @@ -59,10 +93,30 @@ int bridge_add_if(const char *brname, const char *ifname) return r; } +int bridge_remove_if(const char *brname, const char *ifname) +{ + struct ifreq ifr; + int fd, r; + + strncpy(ifr.ifr_name, brname, IFNAMSIZ); + if (!(ifr.ifr_ifindex = if_nametoindex(ifname))) + return -1; + + if ((fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0)) == -1) + return -1; + + r = ioctl(fd, SIOCBRDELIF, &ifr); + close(fd); + return r; +} + int bridge_delete(const char *name) { int fd, r; + if (if_down(name) == -1) + warn("setting %s down", name); + if ((fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0)) == -1) return -1; diff --git a/host/start-vm/net-util.h b/host/start-vm/net-util.h index 79ee903..5ca0a64 100644 --- a/host/start-vm/net-util.h +++ b/host/start-vm/net-util.h @@ -2,9 +2,12 @@ // SPDX-FileCopyrightText: 2022 Alyssa Ross <hi@alyssa.is> int if_up(const char *name); +int if_rename(const char *name, const char *newname); +int if_down(const char *name); int bridge_add(const char *name); int bridge_add_if(const char *brname, const char *ifname); +int bridge_remove_if(const char *brname, const char *ifname); int bridge_delete(const char *name); int tap_open(const char *name, int flags); diff --git a/host/start-vm/net.c b/host/start-vm/net.c index 9a094aa..41bf1c2 100644 --- a/host/start-vm/net.c +++ b/host/start-vm/net.c @@ -1,19 +1,19 @@ // SPDX-License-Identifier: EUPL-1.2 // SPDX-FileCopyrightText: 2022 Alyssa Ross <hi@alyssa.is> +#include "ch.h" #include "net-util.h" +#include <assert.h> +#include <err.h> #include <errno.h> -#include <inttypes.h> #include <net/if.h> +#include <poll.h> #include <stdio.h> #include <stdlib.h> -#include <string.h> +#include <stdnoreturn.h> #include <unistd.h> -#include <sys/ioctl.h> -#include <sys/socket.h> -#include <sys/uio.h> #include <sys/un.h> #include <linux/if_tun.h> @@ -27,119 +27,6 @@ int format_mac(char s[static MAC_STR_LEN + 1], const uint8_t mac[6]) mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); } -static int dial_un(const char *sun_path) -{ - struct sockaddr_un addr = { 0 }; - int fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); - if (fd == -1) - return -1; - - addr.sun_family = AF_UNIX; - strncpy(addr.sun_path, sun_path, sizeof addr.sun_path); - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Warray-bounds" - // Safe because if the last byte of addr.sun_path is non-zero, - // sun_path must be at least one byte longer. - if (addr.sun_path[sizeof addr.sun_path - 1] && - sun_path[sizeof addr.sun_path]) { -#pragma GCC diagnostic pop - errno = E2BIG; - goto fail; - } - - if (connect(fd, (struct sockaddr *)&addr, sizeof addr) == -1) - goto fail; - - return fd; -fail: - close(fd); - return -1; -} - -static int sendv_with_fd(int sock, const struct iovec iov[], size_t iovlen, - int fd, int flags) -{ - struct msghdr msg = { 0 }; - struct cmsghdr *cmsg; - union { - char buf[CMSG_SPACE(sizeof fd)]; - struct cmsghdr _align; - } u; - - msg.msg_iov = (struct iovec *)iov; - msg.msg_iovlen = iovlen; - msg.msg_control = u.buf; - msg.msg_controllen = sizeof u.buf; - - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - cmsg->cmsg_len = CMSG_LEN(sizeof fd); - memcpy(CMSG_DATA(cmsg), &fd, sizeof fd); - - return sendmsg(sock, &msg, flags); -} - -static int ch_add_net(const char *vm_name, int tap, const uint8_t mac[6]) -{ - char mac_s[MAC_STR_LEN + 1]; - char path[sizeof ((struct sockaddr_un *)0)->sun_path] = { 0 }; - int sock = -1; - uint16_t status = 0; - FILE *f = NULL; - static const char buf1[] = - "PUT /api/v1/vm.add-net HTTP/1.1\r\n" - "Host: localhost\r\n" - "Content-Type: application/json\r\n" - "Content-Length: 27\r\n" - "\r\n" - "{\"mac\":\""; - static const char buf2[] = "\"}"; - - if (format_mac(mac_s, mac) == -1) - return -1; - - struct iovec iov[] = { - { .iov_base = (void *)buf1, .iov_len = sizeof buf1 - 1 }, - { .iov_base = (void *)mac_s, .iov_len = MAC_STR_LEN }, - { .iov_base = (void *)buf2, .iov_len = sizeof buf2 - 1 }, - }; - - if (snprintf(path, sizeof path, - "/run/service/ext-%s-vmm/env/cloud-hypervisor.sock", - vm_name) >= (ssize_t)sizeof path) { - errno = E2BIG; - return -1; - } - - if ((sock = dial_un(path)) == -1) - goto out; - - if (sendv_with_fd(sock, iov, sizeof iov / sizeof *iov, tap, 0) == -1) - goto out; - - f = fdopen(sock, "r"); - sock = -1; // now owned by f - if (!f) - goto out; - - if (fscanf(f, "%*s %" SCNu16, &status) != 1) - status = 0; - - if (status < 200 || status >= 300) { - fputs("Failed cloud-hypervisor API request:\n", stderr); - fflush(stderr); - writev(STDERR_FILENO, iov, sizeof iov / sizeof *iov); - fputs("\n", stderr); - } -out: - close(sock); - if (f) - fclose(f); - return (200 <= status && status < 300) - 1; -} - static int setup_tap(const char *bridge_name, const char *tap_prefix) { int fd; @@ -170,15 +57,104 @@ static int client_net_setup(const char *bridge_name) } static int router_net_setup(const char *bridge_name, const char *router_vm_name, - const uint8_t mac[6]) + const uint8_t mac[6], struct ch_device **out) { - int r, fd = setup_tap(bridge_name, "router"); + int e, fd = setup_tap(bridge_name, "router"); if (fd == -1) return -1; - r = ch_add_net(router_vm_name, fd, mac); + e = ch_add_net(router_vm_name, fd, mac, out); close(fd); - return r; + if (!e) + return 0; + errno = e; + return -1; +} + +static int router_net_cleanup(pid_t pid, const char *vm_name, + struct ch_device *vm_net_device) +{ + int e; + char name[IFNAMSIZ], newname[IFNAMSIZ], brname[IFNAMSIZ]; + + if ((e = ch_remove_device(vm_name, vm_net_device))) { + errno = e; + return -1; + } + + // Work around cloud-hypervisor not closing taps it's no + // longer using by freeing up the name. + // + // We assume ≤16-bit pids. + snprintf(name, sizeof name, "router%d", pid); + snprintf(newname, sizeof newname, "_dead%d", pid); + snprintf(brname, sizeof brname, "br%d", pid); + + if (bridge_remove_if(brname, name) == -1) + warn("removing %s from %s", name, brname); + + if (if_down(name) == -1) + return -1; + return if_rename(name, newname); +} + +static int bridge_cleanup(pid_t pid) +{ + char name[IFNAMSIZ]; + snprintf(name, sizeof name, "br%d", pid); + return bridge_delete(name); +} + +static noreturn void exit_listener_main(int fd, pid_t pid, + const char *router_vm_name, + struct ch_device *router_vm_net_device) +{ + // Wait for the other end of the pipe to be closed. + int status = EXIT_SUCCESS; + struct pollfd pollfd = { .fd = fd, .events = 0, .revents = 0 }; + while (poll(&pollfd, 1, -1) == -1) { + if (errno == EINTR || errno == EWOULDBLOCK) + continue; + + err(1, "poll"); + } + assert(pollfd.revents == POLLERR); + + if (router_net_cleanup(pid, router_vm_name, + router_vm_net_device) == -1) { + warn("cleaning up router tap"); + status = EXIT_FAILURE; + } + if (bridge_cleanup(pid) == -1) { + warn("cleaning up bridge"); + status = EXIT_FAILURE; + } + + exit(status); +} + +static int exit_listener_setup(const char *router_vm_name, + struct ch_device *router_vm_net_device) +{ + pid_t pid = getpid(); + int fd[2]; + + if (pipe(fd) == -1) + return -1; + + switch (fork()) { + case -1: + close(fd[0]); + close(fd[1]); + return -1; + case 0: + close(fd[0]); + exit_listener_main(fd[1], pid, router_vm_name, + router_vm_net_device); + default: + close(fd[1]); + return 0; + } } struct net_config { @@ -188,6 +164,7 @@ struct net_config { struct net_config net_setup(const char *router_vm_name) { + struct ch_device *router_vm_net_device = NULL; struct net_config r = { .fd = -1, .mac = { 0 } }; char bridge_name[IFNAMSIZ]; pid_t pid = getpid(); @@ -208,9 +185,15 @@ struct net_config net_setup(const char *router_vm_name) if ((r.fd = client_net_setup(bridge_name)) == -1) goto fail_bridge; - if (router_net_setup(bridge_name, router_vm_name, router_mac) == -1) + if (router_net_setup(bridge_name, router_vm_name, router_mac, + &router_vm_net_device) == -1) goto fail_bridge; + // Set up a process that will listen for this process dying, + // and remove the interface from the netvm, and delete the + // bridge. + exit_listener_setup(router_vm_name, router_vm_net_device); + goto out; fail_bridge: @@ -218,5 +201,6 @@ fail_bridge: close(r.fd); r.fd = -1; out: + ch_device_free(router_vm_net_device); return r; } diff --git a/host/start-vm/start-vm.rs b/host/start-vm/start-vm.rs index 7dba702..a68a669 100644 --- a/host/start-vm/start-vm.rs +++ b/host/start-vm/start-vm.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: EUPL-1.2 // SPDX-FileCopyrightText: 2022 Alyssa Ross <hi@alyssa.is> +mod ch; mod modprobe; mod net; |