summary refs log tree commit diff
diff options
context:
space:
mode:
authorAlyssa Ross <hi@alyssa.is>2021-04-21 10:15:50 +0000
committerAlyssa Ross <hi@alyssa.is>2021-04-22 07:59:35 +0000
commit6234ff3dfb6bd532612994be1ff34dc7558eb881 (patch)
tree01ef51a8820081ca917896caf4ac29cce44cfa9a
parent583eb604ce3040d71a23df2576a7c2d5c1e4fe0a (diff)
downloadnixpkgs-6234ff3dfb6bd532612994be1ff34dc7558eb881.tar
nixpkgs-6234ff3dfb6bd532612994be1ff34dc7558eb881.tar.gz
nixpkgs-6234ff3dfb6bd532612994be1ff34dc7558eb881.tar.bz2
nixpkgs-6234ff3dfb6bd532612994be1ff34dc7558eb881.tar.lz
nixpkgs-6234ff3dfb6bd532612994be1ff34dc7558eb881.tar.xz
nixpkgs-6234ff3dfb6bd532612994be1ff34dc7558eb881.tar.zst
nixpkgs-6234ff3dfb6bd532612994be1ff34dc7558eb881.zip
spectrumPackages.spectrum-testhost: fix driver bind
The following operations are available to us in /sys/bus/pci/drivers/vfio-pci:
 bind, unbind, new_id, remove_id

bind and unbind control attachment of a device to a driver.
new_id and remove_id control whether a device should be considered
"supported" by a driver.

But when an unassigned device is newly supported by a driver, the
kernel will automatically bind it to that driver.  We were relying on
the new_id operation binding the device to the driver in this way.

But if the driver is in the supported list, but not bound to anything,
this won't happen.  new_id won't do anything, because it's already in
the supported list.  So we can't rely on new_id binding for us, and
need to try bind as well (at least if new_id fails).

To reproduce:

    echo 8086 1502 > /sys/bus/pci/drivers/vfio-pci/new_id
    echo 0000:00:19.0 > /sys/bus/pci/drivers/vfio-pci/unbind

(Use lspci -nn to find the right values for your hardware.)
This will leave you with a device that is supported by vfio-pci but
not bound to it, which would previously cause vm-net to fail to start,
but should not after this change.

Message-Id: <20210421101605.20790-1-hi@alyssa.is>
Reviewed-by: Cole Helbling <cole.e.helbling@outlook.com>
-rw-r--r--pkgs/os-specific/linux/spectrum/testhost/default.nix18
1 files changed, 17 insertions, 1 deletions
diff --git a/pkgs/os-specific/linux/spectrum/testhost/default.nix b/pkgs/os-specific/linux/spectrum/testhost/default.nix
index 7e1a973e8c6..21c585f1490 100644
--- a/pkgs/os-specific/linux/spectrum/testhost/default.nix
+++ b/pkgs/os-specific/linux/spectrum/testhost/default.nix
@@ -142,7 +142,9 @@ let
             printf "%s" $PCI_LOCATION
           }
 
-          # (Re)bind the device to the VFIO PCI driver.
+          # Tell the VFIO driver it should support our device.  This
+          # is allowed to fail because it might already know that, in
+          # which case it'll return EEXIST.
           if { modprobe vfio-pci }
           backtick -in device_id {
             if { dd bs=2 skip=1 count=2 status=none if=''${PCI_PATH}/vendor }
@@ -155,6 +157,20 @@ let
             printf "%s" $device_id
           }
 
+          # Bind the device to the VFIO driver.  This is allowed to
+          # fail because the new_id operation we just tried will have
+          # bound it automatically for us if it succeeded.  In such a
+          # case, the kernel will return ENODEV (conistency!).
+          foreground {
+            redirfd -w 1 /sys/bus/pci/drivers/vfio-pci/bind
+            printf "%s" $PCI_LOCATION
+          }
+
+          # Because we allow both new_id and bind to fail, we need to
+          # manually make sure now that at least one of them succeeded
+          # and the device is actually attached to the vfio-driver.
+          if { test -e /sys/bus/pci/drivers/vfio-pci/''${PCI_LOCATION} }
+
           foreground { mkdir env }
 
           ${cloud-hypervisor}/bin/cloud-hypervisor