summary refs log blame commit diff
path: root/pkgs/os-specific/linux/spectrum/testhost/default.nix
blob: de62f0add67475619d7c38eafc4634187a9e9dc7 (plain) (tree)














































                                                                               






                                                                    
                                  

                                                                 

                                  

                                                                 















































































                                                                                          


                                                                     











                                                                             













                                                                        















































                                                                                 
{ lib, runCommandNoCC, writeScript, writeScriptBin, writeShellScript, writeText
, coreutils, cloud-hypervisor, crosvm, curl, execline, gnutar, gnused, iproute
, iptables, jq, kmod, mktuntap, rsync, s6, s6-rc, sys-vms, utillinux
}:

let
  inherit (lib) concatStrings escapeShellArg makeBinPath mapAttrsToList
    optionalString;

  compose2 = f: g: a: b: f (g a b);

  concatMapAttrs = compose2 concatStrings mapAttrsToList;

  makeServicesDir = { services }:
    runCommandNoCC "services" {} ''
      mkdir $out
      ${concatMapAttrs (name: attrs: ''
        mkdir $out/${name}
        ${concatMapAttrs (key: value: ''
          cp -r ${value} $out/${name}/${key}
        '') attrs}
      '') services}
    '';

  s6RcCompile = { fdhuser ? null }: source:
    runCommandNoCC "s6-rc-compile" {} ''
      ${s6-rc}/bin/s6-rc-compile \
        ${optionalString (fdhuser != null) "-h ${escapeShellArg fdhuser}"} \
        dest ${source}
      tar -C dest -cf $out .
    '';

  compiledRcServicesDir = s6RcCompile {} (makeServicesDir {
    services = {
      vm-app = {
        run = writeScript "app-run" ''
          #! ${execline}/bin/execlineb -S0
          # fdclose 0

          # Checking the return value of the bridge creation is
          # important, because if it fails due to the bridge already
          # existing that means something else could already be using
          # this bridge.
          if { ip link add name br0 type bridge }
          if { ip link set br0 up }

          # Calculate the MACs for our TAP and the router's TAP.
          # MAC address format, by octet:
          #
          #  0-3  Static OUI for Spectrum
          #    4  Most significant bit is used to differentiate
          #       routers from clients.  Other bits are reserved.
          #  5-6  Last two octets of client's IP (in 100.64.0.0/16).
          #
          backtick -i router_mac {
            pipeline { printf %.4x ${toString sys-vms.app.vmID} }
            sed s/^\\(..\\)\\(..\\)$/0A:B3:EC:80:\\1:\\2/
          }
          backtick -i client_mac {
            pipeline { printf %.4x ${toString sys-vms.app.vmID} }
            sed s/^\\(..\\)\\(..\\)$/0A:B3:EC:00:\\1:\\2/
          }
          multisubstitute {
            importas -iu router_mac router_mac
            importas -iu client_mac client_mac
          }

          # Create the net VM end, and attach it to the net VM.
          #
          # Use a hardcoded name for now because if we use a dynamic
          # one iproute2 has no way of telling us the name that was
          # chosen:
          # https://lore.kernel.org/netdev/20210406134240.wwumpnrzfjbttnmd@eve.qyliss.net/
          define other_tap_name vmtapnet
          # Try to delete the device in case the VM was powered off
          # (as the finish script wouldn't have been run in that
          # case.)  Since we check the return value of ip tuntap add,
          # in the case of a race condition between deleting the
          # device and creating it again, we'll just fail and try
          # again.
          foreground { ip link delete $other_tap_name }
          if { ip tuntap add name $other_tap_name mode tap }
          if { ip link set $other_tap_name master br0 }
          if { ip link set $other_tap_name up }
          if {
            pipeline {
              jq -n "$ARGS.named"
                --arg tap $other_tap_name
                --arg mac $router_mac
            }
            curl -iX PUT
              -H "Accept: application/json"
              -H "Content-Type: application/json"
              --data-binary @-
              --unix-socket ../vm-net/env/cloud-hypervisor.sock
              http://localhost/api/v1/vm.add-net
          }

          mktuntap -pvBi vmtap%d 6
          importas -iu tap_name TUNTAP_NAME
          if { ip link set $tap_name master br0 }
          if { ip link set $tap_name up }
          if { iptables -t nat -A POSTROUTING -o $tap_name -j MASQUERADE }

          ${crosvm}/bin/crosvm run -p init=/sbin/init -p notifyport=''${port}
            # --serial type=file,path=/tmp/app.log
            --cid 4
            --tap-fd 6,mac=''${client_mac}
            --root ${sys-vms.app.rootfs.squashfs} ${sys-vms.app.linux}/bzImage
        '';
        finish = writeScript "app-finish" ''
          #! ${execline}/bin/execlineb -S0
          # TODO: remove from vm-net
          foreground { ip link delete vmtapnet }
          ip link delete br0
        '';
        type = writeText "app-type" ''
          longrun
        '';
        dependencies = writeText "app-dependencies" ''
          vm-net
        '';
      };

      vm-net = {
        run = writeScript "net-run" ''
          #! ${execline}/bin/execlineb -S0
          # This is only necessary for when running s6 from a tty.
          # (i.e. when debugging or running the demo).
          redirfd -w 0 /dev/null

          define PCI_LOCATION 0000:00:19.0
          define PCI_PATH /sys/bus/pci/devices/''${PCI_LOCATION}

          # Unbind the network device from the driver it's already
          # attached to, if any.
          foreground {
            redirfd -w 1 ''${PCI_PATH}/driver/unbind
            printf "%s" $PCI_LOCATION
          }

          # Tell the VFIO driver it should support our device.  This
          # is allowed to fail because it might already know that, in
          # which case it'll return EEXIST.
          if { modprobe vfio-pci }
          backtick -in device_id {
            if { dd bs=2 skip=1 count=2 status=none if=''${PCI_PATH}/vendor }
            if { printf " " }
            dd bs=2 skip=1 count=2 status=none if=''${PCI_PATH}/device
          }
          importas -iu device_id device_id
          foreground {
            redirfd -w 1 /sys/bus/pci/drivers/vfio-pci/new_id
            printf "%s" $device_id
          }

          # Bind the device to the VFIO driver.  This is allowed to
          # fail because the new_id operation we just tried will have
          # bound it automatically for us if it succeeded.  In such a
          # case, the kernel will return ENODEV (conistency!).
          foreground {
            redirfd -w 1 /sys/bus/pci/drivers/vfio-pci/bind
            printf "%s" $PCI_LOCATION
          }

          # Because we allow both new_id and bind to fail, we need to
          # manually make sure now that at least one of them succeeded
          # and the device is actually attached to the vfio-driver.
          if { test -e /sys/bus/pci/drivers/vfio-pci/''${PCI_LOCATION} }

          foreground { mkdir env }

          ${cloud-hypervisor}/bin/cloud-hypervisor
            --api-socket env/cloud-hypervisor.sock
            --console off
            # --serial tty
            --cmdline "console=ttyS0 panic=30 root=/dev/vda"
            --device path=''${PCI_PATH}
            --disk path=${sys-vms.net.rootfs.squashfs},readonly=on
            --kernel ${sys-vms.net.linux.dev}/vmlinux
        '';
        type = writeText "net-type" ''
          longrun
        '';
      };
    };
  });

  servicesDir = makeServicesDir {
    services = {
      ".s6-svscan" = {
        finish = writeShellScript ".s6-svscan-finish" "";
      };
    };
  };
in

writeScriptBin "spectrum-testhost" ''
  #! ${execline}/bin/execlineb -S0
  export PATH ${makeBinPath [
    coreutils curl execline gnused gnutar iproute iptables jq kmod mktuntap rsync
    s6 s6-rc
  ]}

  if { redirfd -w 1 /proc/sys/net/ipv4/ip_forward echo 1 }

  importas -iu runtime_dir XDG_RUNTIME_DIR
  backtick -in TOP { mktemp -dp $runtime_dir spectrum.XXXXXXXXXX }
  importas -iu top TOP
  if { echo $top }
  if { rsync -r --chmod=Du+w ${servicesDir}/ ''${top}/service }
  background {
    if { mkdir -p ''${top}/s6-rc/compiled }
    if { tar -C ''${top}/s6-rc/compiled -xf ${compiledRcServicesDir} }
    s6-rc-init -c ''${top}/s6-rc/compiled -l ''${top}/s6-rc/live ''${top}/service
  }
  s6-svscan ''${top}/service
''