summary refs log tree commit diff
path: root/host
diff options
context:
space:
mode:
authorAlyssa Ross <hi@alyssa.is>2022-02-15 12:42:28 +0000
committerAlyssa Ross <hi@alyssa.is>2022-02-15 12:42:28 +0000
commitbbaff80fb27c00f1e9a6d083f2f5914bd79990a4 (patch)
tree1c8797e9b0ab29d102a035a1ac98c3996f9f1e1d /host
parent00d04c9a83991ab7d89c58a1e6b127e9b3131fe3 (diff)
downloadspectrum-bbaff80fb27c00f1e9a6d083f2f5914bd79990a4.tar
spectrum-bbaff80fb27c00f1e9a6d083f2f5914bd79990a4.tar.gz
spectrum-bbaff80fb27c00f1e9a6d083f2f5914bd79990a4.tar.bz2
spectrum-bbaff80fb27c00f1e9a6d083f2f5914bd79990a4.tar.lz
spectrum-bbaff80fb27c00f1e9a6d083f2f5914bd79990a4.tar.xz
spectrum-bbaff80fb27c00f1e9a6d083f2f5914bd79990a4.tar.zst
spectrum-bbaff80fb27c00f1e9a6d083f2f5914bd79990a4.zip
host/start-vm: clean up net after VM shuts down
Diffstat (limited to 'host')
-rw-r--r--host/start-vm/ch.h12
-rw-r--r--host/start-vm/ch.rs126
-rw-r--r--host/start-vm/net-util.c54
-rw-r--r--host/start-vm/net-util.h3
-rw-r--r--host/start-vm/net.c230
-rw-r--r--host/start-vm/start-vm.rs1
6 files changed, 303 insertions, 123 deletions
diff --git a/host/start-vm/ch.h b/host/start-vm/ch.h
new file mode 100644
index 0000000..661208d
--- /dev/null
+++ b/host/start-vm/ch.h
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: EUPL-1.2
+// SPDX-FileCopyrightText: 2022 Alyssa Ross <hi@alyssa.is>
+
+#include <stdint.h>
+
+struct ch_device;
+
+int ch_add_net(const char *vm_name, int tap, const uint8_t mac[6],
+	       struct ch_device **out);
+int ch_remove_device(const char *vm_name, struct ch_device *);
+
+void ch_device_free(struct ch_device *);
diff --git a/host/start-vm/ch.rs b/host/start-vm/ch.rs
new file mode 100644
index 0000000..04da710
--- /dev/null
+++ b/host/start-vm/ch.rs
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: EUPL-1.2
+// SPDX-FileCopyrightText: 2022 Alyssa Ross <hi@alyssa.is>
+
+use std::ffi::{CStr, OsStr, OsString};
+use std::num::NonZeroI32;
+use std::os::raw::{c_char, c_int};
+use std::os::unix::prelude::*;
+use std::process::{Command, Stdio};
+
+use crate::format_mac;
+
+// Trivially safe.
+const EPERM: NonZeroI32 = unsafe { NonZeroI32::new_unchecked(1) };
+const EPROTO: NonZeroI32 = unsafe { NonZeroI32::new_unchecked(71) };
+
+fn command(vm_name: &OsStr, s: impl AsRef<OsStr>) -> Command {
+    let mut api_socket_path = OsString::from("/run/service/ext-");
+    api_socket_path.push(vm_name);
+    api_socket_path.push("-vmm/env/cloud-hypervisor.sock");
+
+    let mut command = Command::new("ch-remote");
+    command.stdin(Stdio::null());
+    command.arg("--api-socket");
+    command.arg(api_socket_path);
+    command.arg(s);
+    command
+}
+
+pub fn add_net(vm_name: &OsStr, tap: RawFd, mac: &str) -> Result<OsString, NonZeroI32> {
+    let mut ch_remote = command(vm_name, "add-net")
+        .arg(format!("fd={},mac={}", tap, mac))
+        .stdout(Stdio::piped())
+        .spawn()
+        .or(Err(EPERM))?;
+
+    let jq_out = match Command::new("jq")
+        .args(&["-j", ".id"])
+        .stdin(ch_remote.stdout.take().unwrap())
+        .stderr(Stdio::inherit())
+        .output()
+    {
+        Ok(o) => o,
+        Err(_) => {
+            // Try not to leave a zombie.
+            let _ = ch_remote.kill();
+            let _ = ch_remote.wait();
+            return Err(EPERM);
+        }
+    };
+
+    if let Ok(ch_remote_status) = ch_remote.wait() {
+        if ch_remote_status.success() && jq_out.status.success() {
+            return Ok(OsString::from_vec(jq_out.stdout));
+        }
+    }
+
+    Err(EPROTO)
+}
+
+pub fn remove_device(vm_name: &OsStr, device_id: &OsStr) -> Result<(), NonZeroI32> {
+    let ch_remote = command(vm_name, "remove-device")
+        .arg(device_id)
+        .status()
+        .or(Err(EPERM))?;
+
+    if ch_remote.success() {
+        Ok(())
+    } else {
+        Err(EPROTO)
+    }
+}
+
+/// # Safety
+///
+/// - `vm_name` must point to a valid C string.
+/// - `tap` must be a file descriptor describing an tap device.
+/// - `mac` must be a valid pointer.
+#[export_name = "ch_add_net"]
+unsafe extern "C" fn add_net_c(
+    vm_name: *const c_char,
+    tap: RawFd,
+    mac: *const [u8; 6],
+    id: *mut *mut OsString,
+) -> c_int {
+    let vm_name = CStr::from_ptr(vm_name);
+    let mac = format_mac(&*mac);
+
+    match add_net(OsStr::from_bytes(vm_name.to_bytes()), tap, &mac) {
+        Err(e) => e.get(),
+        Ok(id_str) => {
+            if !id.is_null() {
+                let token = Box::into_raw(Box::new(id_str));
+                *id = token;
+            }
+            0
+        }
+    }
+}
+
+/// # Safety
+///
+/// - `vm_name` must point to a valid C string.
+/// - `id` must be a device ID obtained by calling `add_net_c`.  After
+///   calling `device_free`, the pointer is no longer valid.
+#[export_name = "ch_remove_device"]
+unsafe extern "C" fn remove_device_c(vm_name: *const c_char, device_id: *mut OsString) -> c_int {
+    let vm_name = CStr::from_ptr(vm_name);
+    let device_id = Box::from_raw(device_id);
+
+    if let Err(e) = remove_device(OsStr::from_bytes(vm_name.to_bytes()), device_id.as_ref()) {
+        e.get()
+    } else {
+        0
+    }
+}
+
+/// # Safety
+///
+/// `id` must be a device ID obtained by calling `add_net_c`.  After
+/// calling `device_free`, the pointer is no longer valid.
+#[export_name = "ch_device_free"]
+unsafe extern "C" fn device_free(id: *mut OsString) {
+    if !id.is_null() {
+        drop(Box::from_raw(id))
+    }
+}
diff --git a/host/start-vm/net-util.c b/host/start-vm/net-util.c
index 70d9385..44f8e8a 100644
--- a/host/start-vm/net-util.c
+++ b/host/start-vm/net-util.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: EUPL-1.2
 // SPDX-FileCopyrightText: 2022 Alyssa Ross <hi@alyssa.is>
 
+#include <err.h>
 #include <fcntl.h>
 #include <net/if.h>
 #include <string.h>
@@ -32,6 +33,39 @@ out:
 	return r;
 }
 
+int if_rename(const char *name, const char *newname)
+{
+	int fd, r;
+	struct ifreq ifr;
+
+	strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
+	strncpy(ifr.ifr_newname, newname, sizeof ifr.ifr_newname);
+
+	if ((fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0)) == -1)
+		return -1;
+	r = ioctl(fd, SIOCSIFNAME, &ifr);
+	close(fd);
+	return r;
+}
+
+int if_down(const char *name)
+{
+	struct ifreq ifr;
+	int fd, r = -1;
+
+	if ((fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0)) == -1)
+		return -1;
+
+	strncpy(ifr.ifr_name, name, IFNAMSIZ);
+	if (ioctl(fd, SIOCGIFFLAGS, &ifr) == -1)
+		goto out;
+	ifr.ifr_flags &= ~IFF_UP;
+	r = ioctl(fd, SIOCSIFFLAGS, &ifr);
+out:
+	close(fd);
+	return r;
+}
+
 int bridge_add(const char *name)
 {
 	int fd, r;
@@ -59,10 +93,30 @@ int bridge_add_if(const char *brname, const char *ifname)
 	return r;
 }
 
+int bridge_remove_if(const char *brname, const char *ifname)
+{
+	struct ifreq ifr;
+	int fd, r;
+
+	strncpy(ifr.ifr_name, brname, IFNAMSIZ);
+	if (!(ifr.ifr_ifindex = if_nametoindex(ifname)))
+		return -1;
+
+	if ((fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0)) == -1)
+		return -1;
+
+	r = ioctl(fd, SIOCBRDELIF, &ifr);
+	close(fd);
+	return r;
+}
+
 int bridge_delete(const char *name)
 {
 	int fd, r;
 
+	if (if_down(name) == -1)
+		warn("setting %s down", name);
+
 	if ((fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0)) == -1)
 		return -1;
 
diff --git a/host/start-vm/net-util.h b/host/start-vm/net-util.h
index 79ee903..5ca0a64 100644
--- a/host/start-vm/net-util.h
+++ b/host/start-vm/net-util.h
@@ -2,9 +2,12 @@
 // SPDX-FileCopyrightText: 2022 Alyssa Ross <hi@alyssa.is>
 
 int if_up(const char *name);
+int if_rename(const char *name, const char *newname);
+int if_down(const char *name);
 
 int bridge_add(const char *name);
 int bridge_add_if(const char *brname, const char *ifname);
+int bridge_remove_if(const char *brname, const char *ifname);
 int bridge_delete(const char *name);
 
 int tap_open(const char *name, int flags);
diff --git a/host/start-vm/net.c b/host/start-vm/net.c
index 9a094aa..41bf1c2 100644
--- a/host/start-vm/net.c
+++ b/host/start-vm/net.c
@@ -1,19 +1,19 @@
 // SPDX-License-Identifier: EUPL-1.2
 // SPDX-FileCopyrightText: 2022 Alyssa Ross <hi@alyssa.is>
 
+#include "ch.h"
 #include "net-util.h"
 
+#include <assert.h>
+#include <err.h>
 #include <errno.h>
-#include <inttypes.h>
 #include <net/if.h>
+#include <poll.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <string.h>
+#include <stdnoreturn.h>
 #include <unistd.h>
 
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/uio.h>
 #include <sys/un.h>
 
 #include <linux/if_tun.h>
@@ -27,119 +27,6 @@ int format_mac(char s[static MAC_STR_LEN + 1], const uint8_t mac[6])
 			mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
 }
 
-static int dial_un(const char *sun_path)
-{
-	struct sockaddr_un addr = { 0 };
-	int fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
-	if (fd == -1)
-		return -1;
-
-	addr.sun_family = AF_UNIX;
-	strncpy(addr.sun_path, sun_path, sizeof addr.sun_path);
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Warray-bounds"
-	// Safe because if the last byte of addr.sun_path is non-zero,
-	// sun_path must be at least one byte longer.
-	if (addr.sun_path[sizeof addr.sun_path - 1] &&
-	    sun_path[sizeof addr.sun_path]) {
-#pragma GCC diagnostic pop
-		errno = E2BIG;
-		goto fail;
-	}
-
-	if (connect(fd, (struct sockaddr *)&addr, sizeof addr) == -1)
-		goto fail;
-
-	return fd;
-fail:
-	close(fd);
-	return -1;
-}
-
-static int sendv_with_fd(int sock, const struct iovec iov[], size_t iovlen,
-			 int fd, int flags)
-{
-	struct msghdr msg = { 0 };
-	struct cmsghdr *cmsg;
-	union {
-		char buf[CMSG_SPACE(sizeof fd)];
-		struct cmsghdr _align;
-	} u;
-
-	msg.msg_iov = (struct iovec *)iov;
-	msg.msg_iovlen = iovlen;
-	msg.msg_control = u.buf;
-	msg.msg_controllen = sizeof u.buf;
-
-	cmsg = CMSG_FIRSTHDR(&msg);
-	cmsg->cmsg_level = SOL_SOCKET;
-	cmsg->cmsg_type = SCM_RIGHTS;
-	cmsg->cmsg_len = CMSG_LEN(sizeof fd);
-	memcpy(CMSG_DATA(cmsg), &fd, sizeof fd);
-
-	return sendmsg(sock, &msg, flags);
-}
-
-static int ch_add_net(const char *vm_name, int tap, const uint8_t mac[6])
-{
-	char mac_s[MAC_STR_LEN + 1];
-	char path[sizeof ((struct sockaddr_un *)0)->sun_path] = { 0 };
-	int sock = -1;
-	uint16_t status = 0;
-	FILE *f = NULL;
-	static const char buf1[] =
-		"PUT /api/v1/vm.add-net HTTP/1.1\r\n"
-		"Host: localhost\r\n"
-		"Content-Type: application/json\r\n"
-		"Content-Length: 27\r\n"
-		"\r\n"
-		"{\"mac\":\"";
-	static const char buf2[] = "\"}";
-
-	if (format_mac(mac_s, mac) == -1)
-		return -1;
-
-	struct iovec iov[] = {
-		{ .iov_base = (void *)buf1, .iov_len = sizeof buf1 - 1 },
-		{ .iov_base = (void *)mac_s, .iov_len = MAC_STR_LEN },
-		{ .iov_base = (void *)buf2, .iov_len = sizeof buf2 - 1 },
-	};
-
-	if (snprintf(path, sizeof path,
-		     "/run/service/ext-%s-vmm/env/cloud-hypervisor.sock",
-		     vm_name) >= (ssize_t)sizeof path) {
-		errno = E2BIG;
-		return -1;
-	}
-
-	if ((sock = dial_un(path)) == -1)
-		goto out;
-
-	if (sendv_with_fd(sock, iov, sizeof iov / sizeof *iov, tap, 0) == -1)
-		goto out;
-
-	f = fdopen(sock, "r");
-	sock = -1; // now owned by f
-	if (!f)
-		goto out;
-
-	if (fscanf(f, "%*s %" SCNu16, &status) != 1)
-		status = 0;
-
-	if (status < 200 || status >= 300) {
-		fputs("Failed cloud-hypervisor API request:\n", stderr);
-		fflush(stderr);
-		writev(STDERR_FILENO, iov, sizeof iov / sizeof *iov);
-		fputs("\n", stderr);
-	}
-out:
-	close(sock);
-	if (f)
-		fclose(f);
-	return (200 <= status && status < 300) - 1;
-}
-
 static int setup_tap(const char *bridge_name, const char *tap_prefix)
 {
 	int fd;
@@ -170,15 +57,104 @@ static int client_net_setup(const char *bridge_name)
 }
 
 static int router_net_setup(const char *bridge_name, const char *router_vm_name,
-			    const uint8_t mac[6])
+			    const uint8_t mac[6], struct ch_device **out)
 {
-	int r, fd = setup_tap(bridge_name, "router");
+	int e, fd = setup_tap(bridge_name, "router");
 	if (fd == -1)
 		return -1;
 
-	r = ch_add_net(router_vm_name, fd, mac);
+	e = ch_add_net(router_vm_name, fd, mac, out);
 	close(fd);
-	return r;
+	if (!e)
+		return 0;
+	errno = e;
+	return -1;
+}
+
+static int router_net_cleanup(pid_t pid, const char *vm_name,
+			      struct ch_device *vm_net_device)
+{
+	int e;
+	char name[IFNAMSIZ], newname[IFNAMSIZ], brname[IFNAMSIZ];
+
+	if ((e = ch_remove_device(vm_name, vm_net_device))) {
+		errno = e;
+		return -1;
+	}
+
+	// Work around cloud-hypervisor not closing taps it's no
+	// longer using by freeing up the name.
+	//
+	// We assume ≤16-bit pids.
+	snprintf(name, sizeof name, "router%d", pid);
+	snprintf(newname, sizeof newname, "_dead%d", pid);
+	snprintf(brname, sizeof brname, "br%d", pid);
+
+	if (bridge_remove_if(brname, name) == -1)
+		warn("removing %s from %s", name, brname);
+
+	if (if_down(name) == -1)
+		return -1;
+	return if_rename(name, newname);
+}
+
+static int bridge_cleanup(pid_t pid)
+{
+	char name[IFNAMSIZ];
+	snprintf(name, sizeof name, "br%d", pid);
+	return bridge_delete(name);
+}
+
+static noreturn void exit_listener_main(int fd, pid_t pid,
+					const char *router_vm_name,
+					struct ch_device *router_vm_net_device)
+{
+	// Wait for the other end of the pipe to be closed.
+	int status = EXIT_SUCCESS;
+	struct pollfd pollfd = { .fd = fd, .events = 0, .revents = 0 };
+	while (poll(&pollfd, 1, -1) == -1) {
+		if (errno == EINTR || errno == EWOULDBLOCK)
+			continue;
+
+		err(1, "poll");
+	}
+	assert(pollfd.revents == POLLERR);
+
+	if (router_net_cleanup(pid, router_vm_name,
+			       router_vm_net_device) == -1) {
+		warn("cleaning up router tap");
+		status = EXIT_FAILURE;
+	}
+	if (bridge_cleanup(pid) == -1) {
+		warn("cleaning up bridge");
+		status = EXIT_FAILURE;
+	}
+
+	exit(status);
+}
+
+static int exit_listener_setup(const char *router_vm_name,
+			       struct ch_device *router_vm_net_device)
+{
+	pid_t pid = getpid();
+	int fd[2];
+
+	if (pipe(fd) == -1)
+		return -1;
+
+	switch (fork()) {
+	case -1:
+		close(fd[0]);
+		close(fd[1]);
+		return -1;
+	case 0:
+		close(fd[0]);
+		exit_listener_main(fd[1], pid, router_vm_name,
+				   router_vm_net_device);
+	default:
+		close(fd[1]);
+		return 0;
+	}
 }
 
 struct net_config {
@@ -188,6 +164,7 @@ struct net_config {
 
 struct net_config net_setup(const char *router_vm_name)
 {
+	struct ch_device *router_vm_net_device = NULL;
 	struct net_config r = { .fd = -1, .mac = { 0 } };
 	char bridge_name[IFNAMSIZ];
 	pid_t pid = getpid();
@@ -208,9 +185,15 @@ struct net_config net_setup(const char *router_vm_name)
 	if ((r.fd = client_net_setup(bridge_name)) == -1)
 		goto fail_bridge;
 
-	if (router_net_setup(bridge_name, router_vm_name, router_mac) == -1)
+	if (router_net_setup(bridge_name, router_vm_name, router_mac,
+			     &router_vm_net_device) == -1)
 		goto fail_bridge;
 
+	// Set up a process that will listen for this process dying,
+	// and remove the interface from the netvm, and delete the
+	// bridge.
+	exit_listener_setup(router_vm_name, router_vm_net_device);
+
 	goto out;
 
 fail_bridge:
@@ -218,5 +201,6 @@ fail_bridge:
 	close(r.fd);
 	r.fd = -1;
 out:
+	ch_device_free(router_vm_net_device);
 	return r;
 }
diff --git a/host/start-vm/start-vm.rs b/host/start-vm/start-vm.rs
index 7dba702..a68a669 100644
--- a/host/start-vm/start-vm.rs
+++ b/host/start-vm/start-vm.rs
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: EUPL-1.2
 // SPDX-FileCopyrightText: 2022 Alyssa Ross <hi@alyssa.is>
 
+mod ch;
 mod modprobe;
 mod net;