{ lib, runCommandNoCC, writeScript, writeScriptBin, writeShellScript, writeText , coreutils, cloud-hypervisor, crosvm, curl, execline, gnutar, gnused, iproute , iptables, jq, kmod, mktuntap, rsync, s6, s6-rc, sys-vms, utillinux }: let inherit (lib) concatStrings escapeShellArg makeBinPath mapAttrsToList optionalString; compose2 = f: g: a: b: f (g a b); concatMapAttrs = compose2 concatStrings mapAttrsToList; makeServicesDir = { services }: runCommandNoCC "services" {} '' mkdir $out ${concatMapAttrs (name: attrs: '' mkdir $out/${name} ${concatMapAttrs (key: value: '' cp -r ${value} $out/${name}/${key} '') attrs} '') services} ''; s6RcCompile = { fdhuser ? null }: source: runCommandNoCC "s6-rc-compile" {} '' ${s6-rc}/bin/s6-rc-compile \ ${optionalString (fdhuser != null) "-h ${escapeShellArg fdhuser}"} \ dest ${source} tar -C dest -cf $out . ''; compiledRcServicesDir = s6RcCompile {} (makeServicesDir { services = { vm-app = { run = writeScript "app-run" '' #! ${execline}/bin/execlineb -S0 # fdclose 0 # Checking the return value of the bridge creation is # important, because if it fails due to the bridge already # existing that means something else could already be using # this bridge. if { ip link add name br0 type bridge } if { ip link set br0 up } # Calculate the MACs for our TAP and the router's TAP. # MAC address format, by octet: # # 0-3 Static OUI for Spectrum # 4 Most significant bit is used to differentiate # routers from clients. Other bits are reserved. # 5-6 Last two octets of client's IP (in 100.64.0.0/16). # backtick -i router_mac { pipeline { printf %.4x ${toString sys-vms.app.vmID} } sed s/^\\(..\\)\\(..\\)$/0A:B3:EC:80:\\1:\\2/ } backtick -i client_mac { pipeline { printf %.4x ${toString sys-vms.app.vmID} } sed s/^\\(..\\)\\(..\\)$/0A:B3:EC:00:\\1:\\2/ } multisubstitute { importas -iu router_mac router_mac importas -iu client_mac client_mac } # Create the net VM end, and attach it to the net VM. # # Use a hardcoded name for now because if we use a dynamic # one iproute2 has no way of telling us the name that was # chosen: # https://lore.kernel.org/netdev/20210406134240.wwumpnrzfjbttnmd@eve.qyliss.net/ define other_tap_name vmtapnet # Try to delete the device in case the VM was powered off # (as the finish script wouldn't have been run in that # case.) Since we check the return value of ip tuntap add, # in the case of a race condition between deleting the # device and creating it again, we'll just fail and try # again. foreground { ip link delete $other_tap_name } if { ip tuntap add name $other_tap_name mode tap } if { ip link set $other_tap_name master br0 } if { ip link set $other_tap_name up } if { pipeline { jq -n "$ARGS.named" --arg tap $other_tap_name --arg mac $router_mac } curl -iX PUT -H "Accept: application/json" -H "Content-Type: application/json" --data-binary @- --unix-socket ../vm-net/env/cloud-hypervisor.sock http://localhost/api/v1/vm.add-net } mktuntap -pvBi vmtap%d 6 importas -iu tap_name TUNTAP_NAME if { ip link set $tap_name master br0 } if { ip link set $tap_name up } if { iptables -t nat -A POSTROUTING -o $tap_name -j MASQUERADE } ${crosvm}/bin/crosvm run -p init=/sbin/init -p notifyport=''${port} # --serial type=file,path=/tmp/app.log --cid 4 --tap-fd 6,mac=''${client_mac} --root ${sys-vms.app.rootfs.squashfs} ${sys-vms.app.linux}/bzImage ''; finish = writeScript "app-finish" '' #! ${execline}/bin/execlineb -S0 # TODO: remove from vm-net foreground { ip link delete vmtapnet } ip link delete br0 ''; type = writeText "app-type" '' longrun ''; dependencies = writeText "app-dependencies" '' vm-net ''; }; vm-net = { run = writeScript "net-run" '' #! ${execline}/bin/execlineb -S0 # This is only necessary for when running s6 from a tty. # (i.e. when debugging or running the demo). redirfd -w 0 /dev/null define PCI_LOCATION 0000:00:19.0 define PCI_PATH /sys/bus/pci/devices/''${PCI_LOCATION} # Unbind the network device from the driver it's already # attached to, if any. foreground { redirfd -w 1 ''${PCI_PATH}/driver/unbind printf "%s" $PCI_LOCATION } # Tell the VFIO driver it should support our device. This # is allowed to fail because it might already know that, in # which case it'll return EEXIST. if { modprobe vfio-pci } backtick -in device_id { if { dd bs=2 skip=1 count=2 status=none if=''${PCI_PATH}/vendor } if { printf " " } dd bs=2 skip=1 count=2 status=none if=''${PCI_PATH}/device } importas -iu device_id device_id foreground { redirfd -w 1 /sys/bus/pci/drivers/vfio-pci/new_id printf "%s" $device_id } # Bind the device to the VFIO driver. This is allowed to # fail because the new_id operation we just tried will have # bound it automatically for us if it succeeded. In such a # case, the kernel will return ENODEV (conistency!). foreground { redirfd -w 1 /sys/bus/pci/drivers/vfio-pci/bind printf "%s" $PCI_LOCATION } # Because we allow both new_id and bind to fail, we need to # manually make sure now that at least one of them succeeded # and the device is actually attached to the vfio-driver. if { test -e /sys/bus/pci/drivers/vfio-pci/''${PCI_LOCATION} } foreground { mkdir env } ${cloud-hypervisor}/bin/cloud-hypervisor --api-socket env/cloud-hypervisor.sock --console off # --serial tty --cmdline "console=ttyS0 panic=30 root=/dev/vda" --device path=''${PCI_PATH} --disk path=${sys-vms.net.rootfs.squashfs},readonly=on --kernel ${sys-vms.net.linux.dev}/vmlinux ''; type = writeText "net-type" '' longrun ''; }; }; }); servicesDir = makeServicesDir { services = { ".s6-svscan" = { finish = writeShellScript ".s6-svscan-finish" ""; }; }; }; in writeScriptBin "spectrum-testhost" '' #! ${execline}/bin/execlineb -S0 export PATH ${makeBinPath [ coreutils curl execline gnused gnutar iproute iptables jq kmod mktuntap rsync s6 s6-rc ]} if { redirfd -w 1 /proc/sys/net/ipv4/ip_forward echo 1 } importas -iu runtime_dir XDG_RUNTIME_DIR backtick -in TOP { mktemp -dp $runtime_dir spectrum.XXXXXXXXXX } importas -iu top TOP if { echo $top } if { rsync -r --chmod=Du+w ${servicesDir}/ ''${top}/service } background { if { mkdir -p ''${top}/s6-rc/compiled } if { tar -C ''${top}/s6-rc/compiled -xf ${compiledRcServicesDir} } s6-rc-init -c ''${top}/s6-rc/compiled -l ''${top}/s6-rc/live ''${top}/service } s6-svscan ''${top}/service ''