From 6234ff3dfb6bd532612994be1ff34dc7558eb881 Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Wed, 21 Apr 2021 10:15:50 +0000 Subject: spectrumPackages.spectrum-testhost: fix driver bind The following operations are available to us in /sys/bus/pci/drivers/vfio-pci: bind, unbind, new_id, remove_id bind and unbind control attachment of a device to a driver. new_id and remove_id control whether a device should be considered "supported" by a driver. But when an unassigned device is newly supported by a driver, the kernel will automatically bind it to that driver. We were relying on the new_id operation binding the device to the driver in this way. But if the driver is in the supported list, but not bound to anything, this won't happen. new_id won't do anything, because it's already in the supported list. So we can't rely on new_id binding for us, and need to try bind as well (at least if new_id fails). To reproduce: echo 8086 1502 > /sys/bus/pci/drivers/vfio-pci/new_id echo 0000:00:19.0 > /sys/bus/pci/drivers/vfio-pci/unbind (Use lspci -nn to find the right values for your hardware.) This will leave you with a device that is supported by vfio-pci but not bound to it, which would previously cause vm-net to fail to start, but should not after this change. Message-Id: <20210421101605.20790-1-hi@alyssa.is> Reviewed-by: Cole Helbling --- pkgs/os-specific/linux/spectrum/testhost/default.nix | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/pkgs/os-specific/linux/spectrum/testhost/default.nix b/pkgs/os-specific/linux/spectrum/testhost/default.nix index 7e1a973e8c6..21c585f1490 100644 --- a/pkgs/os-specific/linux/spectrum/testhost/default.nix +++ b/pkgs/os-specific/linux/spectrum/testhost/default.nix @@ -142,7 +142,9 @@ let printf "%s" $PCI_LOCATION } - # (Re)bind the device to the VFIO PCI driver. + # Tell the VFIO driver it should support our device. This + # is allowed to fail because it might already know that, in + # which case it'll return EEXIST. if { modprobe vfio-pci } backtick -in device_id { if { dd bs=2 skip=1 count=2 status=none if=''${PCI_PATH}/vendor } @@ -155,6 +157,20 @@ let printf "%s" $device_id } + # Bind the device to the VFIO driver. This is allowed to + # fail because the new_id operation we just tried will have + # bound it automatically for us if it succeeded. In such a + # case, the kernel will return ENODEV (conistency!). + foreground { + redirfd -w 1 /sys/bus/pci/drivers/vfio-pci/bind + printf "%s" $PCI_LOCATION + } + + # Because we allow both new_id and bind to fail, we need to + # manually make sure now that at least one of them succeeded + # and the device is actually attached to the vfio-driver. + if { test -e /sys/bus/pci/drivers/vfio-pci/''${PCI_LOCATION} } + foreground { mkdir env } ${cloud-hypervisor}/bin/cloud-hypervisor -- cgit 1.4.1