summary refs log tree commit diff
diff options
context:
space:
mode:
authorAlyssa Ross <hi@alyssa.is>2020-12-19 16:07:22 +0000
committerAlyssa Ross <hi@alyssa.is>2021-04-14 23:42:56 +0000
commita71eb59d8acf8f02328db002b89d4db78bb9ae56 (patch)
tree8dc0f45b8af0a90039c06296174f1eba4f7d1be6
parent03f8ca8da40dd26d0a2542fef51d28eb943d4442 (diff)
downloadnixpkgs-a71eb59d8acf8f02328db002b89d4db78bb9ae56.tar
nixpkgs-a71eb59d8acf8f02328db002b89d4db78bb9ae56.tar.gz
nixpkgs-a71eb59d8acf8f02328db002b89d4db78bb9ae56.tar.bz2
nixpkgs-a71eb59d8acf8f02328db002b89d4db78bb9ae56.tar.lz
nixpkgs-a71eb59d8acf8f02328db002b89d4db78bb9ae56.tar.xz
nixpkgs-a71eb59d8acf8f02328db002b89d4db78bb9ae56.tar.zst
nixpkgs-a71eb59d8acf8f02328db002b89d4db78bb9ae56.zip
spectrumPackages.sys-vms.net: init
This is a VM that acts as a router.  It handles talking to network
hardware, and other VMs can be connected to it by attaching virtual
ethernet devices.

It expects to get a physical ethernet device passed through to it
using VFIO.  Wi-Fi should work too, but would need to be configured so
I've stuck with Ethernet for now.  We use ConnMan[1] to configure
physical network interfaces, and it automatically takes care of DHCP
and stuff for us.  I chose ConnMan over NetworkManager because it was
easier to get set up.

Virtual ethernet devices are identified by a specific OUI in their MAC
address.  The NIC part of the MAC address is used to encode the last
three octets of the IPv4 address this VM should assign to the
interface.  This way, the host can tell this VM what the address of
each virtual interface is without having to resort to a secondary
communication channel.  The first octet will always be 100, as the
intention is to use the IPv4 shared address space (aka the CGNAT
space) for inter-VM networks to match the behaviour of Chromium OS[2].

Every networking client will be connected to a router VM with a /31,
where the low address is for the router, and the high address is for
the client.  This way the host's job is as simple as possible -- it
just has to connect two TAPs together, without worrying about any
routing rules or anything -- that can all happen in the VM.  This does
mean that the router has to have a virtio-net device for every client,
though.  We may run into scaling limitations with this approach, in
which case we might have to revisit how this works on the host.

[1]: https://git.kernel.org/pub/scm/network/connman/connman.git/about/
[2]: https://chromium.googlesource.com/chromiumos/platform2/+/39e48f668a937d266638f3f7d31d3427a4966464/patchpanel/address_manager.cc#13

Message-Id: <20210411115740.29615-15-hi@alyssa.is>
Reviewed-by: Cole Helbling <cole.e.helbling@outlook.com>
-rw-r--r--pkgs/os-specific/linux/spectrum/vm/default.nix2
-rw-r--r--pkgs/os-specific/linux/spectrum/vm/net/default.nix166
2 files changed, 168 insertions, 0 deletions
diff --git a/pkgs/os-specific/linux/spectrum/vm/default.nix b/pkgs/os-specific/linux/spectrum/vm/default.nix
index 582a0c2e9c3..c4ff729cb8a 100644
--- a/pkgs/os-specific/linux/spectrum/vm/default.nix
+++ b/pkgs/os-specific/linux/spectrum/vm/default.nix
@@ -2,4 +2,6 @@
 
 {
   comp = callPackage ./comp { };
+
+  net = callPackage ./net { };
 }
diff --git a/pkgs/os-specific/linux/spectrum/vm/net/default.nix b/pkgs/os-specific/linux/spectrum/vm/net/default.nix
new file mode 100644
index 00000000000..1deb7031caf
--- /dev/null
+++ b/pkgs/os-specific/linux/spectrum/vm/net/default.nix
@@ -0,0 +1,166 @@
+{ lib, makeRootfs, runCommand, writeScript, writeText
+, busybox, connman, dbus, execline, iptables, iproute, jq, linux_vm, mdevd
+}:
+
+runCommand "vm-net" rec {
+  linux = linux_vm.override {
+    structuredExtraConfig = with lib.kernel; {
+      E1000E = yes;
+      IGB = yes;
+      PACKET = yes;
+
+      IP_NF_NAT = yes;
+      IP_NF_IPTABLES = yes;
+      IP_NF_TARGET_MASQUERADE = yes;
+      NF_CONNTRACK = yes;
+    };
+  };
+
+  login = writeScript "login" ''
+    #! ${execline}/bin/execlineb -s0
+    unexport !
+    ${busybox}/bin/login -p -f root $@
+  '';
+
+  rootfs = makeRootfs {
+    rcServices.ok-all = {
+      type = writeText "ok-all-type" ''
+        bundle
+      '';
+      contents = writeText "ok-all-contents" ''
+        mdevd-coldplug
+      '';
+    };
+
+    rcServices.mdevd = {
+      type = writeText "mdevd-type" ''
+        longrun
+      '';
+      run = writeScript "mdevd-run" ''
+        #! ${execline}/bin/execlineb -P
+        ${mdevd}/bin/mdevd -D3 -f ${writeText "mdevd.conf" ''
+          $INTERFACE=.* 0:0 660 ! @${writeScript "interface" ''
+            #! ${execline}/bin/execlineb -S0
+
+            multisubstitute {
+              importas -i DEVPATH DEVPATH
+              importas -i INTERFACE INTERFACE
+            }
+
+            ifte
+
+            {
+              # This interface is connected to another VM.
+
+              # Our IP is encoded in the NIC-specific portion of the
+              # interface's MAC address.
+              backtick -i LOCAL_IP {
+                pipeline { ip -j link show $INTERFACE }
+                pipeline { jq -r ".[0].address | split(\":\") | .[3:6] | \"0x\" + .[]" }
+                xargs printf "100.%d.%d.%d"
+              }
+              importas -iu LOCAL_IP LOCAL_IP
+
+              if { ip address add ''${LOCAL_IP}/31 dev $INTERFACE }
+              ip link set $INTERFACE up
+            }
+
+            {
+              if { test $INTERFACE != lo }
+              # This is a physical connection to a network device.
+              if { iptables -t nat -A POSTROUTING -o $INTERFACE -j MASQUERADE }
+              s6-rc -u change connman
+            }
+
+            grep -iq ^0A:B3:EC: /sys/class/net/''${INTERFACE}/address
+          ''}
+        ''}
+      '';
+      notification-fd = writeText "mdevd-notification-fd" ''
+        3
+      '';
+      dependencies = writeText "mdevd-dependencies" ''
+        sysctl
+      '';
+    };
+
+    rcServices.mdevd-coldplug = {
+      type = writeText "mdevd-coldplug-type" ''
+        oneshot
+      '';
+      up = writeText "mdevd-run" ''
+        ${mdevd}/bin/mdevd-coldplug
+      '';
+      dependencies = writeText "mdevd-coldplug-dependencies" ''
+        mdevd
+      '';
+    };
+
+    rcServices.dbus = {
+      type = writeText "dbus-daemon" ''
+        longrun
+      '';
+      run = writeScript "dbus-daemon-run" ''
+        #! ${execline}/bin/execlineb -S0
+        foreground { mkdir /run/dbus }
+        # Busybox cp doesn't have -n to avoid copying to paths that
+        # already exist, but we can abuse -u for the same effect,
+        # since every file in the store is from Jan 1 1970.
+        foreground { cp -u ${dbus}/libexec/dbus-daemon-launch-helper /run }
+        foreground { chgrp messagebus /run/dbus-daemon-launch-helper }
+        foreground { chmod 4550 /run/dbus-daemon-launch-helper }
+        ${dbus}/bin/dbus-daemon
+          --nofork --nosyslog --nopidfile --config-file=/etc/dbus-1/system.conf
+      '';
+    };
+
+    rcServices.connman = {
+      type = writeText "connman-type" ''
+        longrun
+      '';
+      run = writeScript "connman-run" ''
+        #! ${execline}/bin/execlineb -S0
+        backtick -in HARDWARE_INTERFACES {
+          pipeline {
+            find -L /sys/class/net -mindepth 2 -maxdepth 2 -name address -print0
+          }
+
+          # Filter out other VMs and the loopback device.
+          pipeline { xargs -0 grep -iL ^\\(0A:B3:EC:\\|00:00:00:00:00:00$\\) }
+
+          # Extract the interface names from the address file paths.
+          awk -F/ "{if (NR > 1) printf \",\"; printf \"%s\", $5}"
+        }
+        importas -iu HARDWARE_INTERFACES HARDWARE_INTERFACES
+
+        ${connman}/bin/connmand -ni $HARDWARE_INTERFACES
+      '';
+      dependencies = writeText "connman-dependencies" ''
+        dbus
+      '';
+    };
+
+    rcServices.sysctl = {
+      type = writeText "sysctl-type" ''
+        oneshot
+      '';
+      up = writeText "sysctl-up" ''
+        redirfd -w 1 /proc/sys/net/ipv4/ip_forward
+        echo 1
+      '';
+    };
+
+    services.getty.run = writeScript "getty-run" ''
+      #! ${execline}/bin/execlineb -P
+      ${busybox}/bin/getty -i -n -l ${login} 38400 ttyS0
+    '';
+
+    path = [ iproute iptables jq ];
+  };
+
+  inherit (rootfs) squashfs;
+} ''
+  mkdir $out
+  ln -s $linux/bzImage $out/kernel
+  ln -s $squashfs $out/squashfs
+''