From 4147b878bcdd6fc8e8b6395215c71a0ebd0b23c1 Mon Sep 17 00:00:00 2001 From: r-vdp Date: Wed, 26 Apr 2023 00:44:23 +0200 Subject: nixos-test-driver: include a timeout for the recv call, do not assume sh == bash --- nixos/lib/test-driver/test_driver/machine.py | 26 +++++++++++++++++++++----- nixos/modules/testing/test-instrumentation.nix | 12 ++++++++++-- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/nixos/lib/test-driver/test_driver/machine.py b/nixos/lib/test-driver/test_driver/machine.py index 9de98c217a5..4b34ac423d1 100644 --- a/nixos/lib/test-driver/test_driver/machine.py +++ b/nixos/lib/test-driver/test_driver/machine.py @@ -7,6 +7,7 @@ import io import os import queue import re +import select import shlex import shutil import socket @@ -99,7 +100,7 @@ def _perform_ocr_on_screenshot( + "-blur 1x65535" ) - tess_args = f"-c debug_file=/dev/null --psm 11" + tess_args = "-c debug_file=/dev/null --psm 11" cmd = f"convert {magick_args} '{screenshot_path}' 'tiff:{screenshot_path}.tiff'" ret = subprocess.run(cmd, shell=True, capture_output=True) @@ -154,6 +155,7 @@ class StartCommand: # qemu options qemu_opts = ( " -device virtio-serial" + # Note: virtconsole will map to /dev/hvc0 in Linux guests " -device virtconsole,chardev=shell" " -device virtio-rng-pci" " -serial stdio" @@ -524,8 +526,10 @@ class Machine: if timeout is not None: timeout_str = f"timeout {timeout}" + # While sh is bash on NixOS, this is not the case for every distro. + # We explicitely call bash here to allow for the driver to boot other distros as well. out_command = ( - f"{timeout_str} sh -c {shlex.quote(command)} | (base64 --wrap 0; echo)\n" + f"{timeout_str} bash -c {shlex.quote(command)} | (base64 --wrap 0; echo)\n" ) assert self.shell @@ -719,6 +723,15 @@ class Machine: self.wait_for_unit(jobname) def connect(self) -> None: + def shell_ready(timeout_secs: int) -> bool: + """We sent some data from the backdoor service running on the guest + to indicate that the backdoor shell is ready. + As soon as we read some data from the socket here, we assume that + our root shell is operational. + """ + (ready, _, _) = select.select([self.shell], [], [], timeout_secs) + return bool(ready) + if self.connected: return @@ -728,8 +741,11 @@ class Machine: assert self.shell tic = time.time() - self.shell.recv(1024) - # TODO: Timeout + # TODO: do we want to bail after a set number of attempts? + while not shell_ready(timeout_secs=30): + self.log("Guest root shell did not produce any data yet...") + + self.log(self.shell.recv(1024).decode()) toc = time.time() self.log("connected to guest root shell") @@ -950,7 +966,7 @@ class Machine: Prepares the machine to be reconnected which is useful if the machine was started with `allow_reboot = True` """ - self.send_key(f"ctrl-alt-delete") + self.send_key("ctrl-alt-delete") self.connected = False def wait_for_x(self) -> None: diff --git a/nixos/modules/testing/test-instrumentation.nix b/nixos/modules/testing/test-instrumentation.nix index 028099c6464..9c4bbecf480 100644 --- a/nixos/modules/testing/test-instrumentation.nix +++ b/nixos/modules/testing/test-instrumentation.nix @@ -36,8 +36,16 @@ in while ! exec 2> /dev/${qemu-common.qemuSerialDevice}; do sleep 0.1; done echo "connecting to host..." >&2 stty -F /dev/hvc0 raw -echo # prevent nl -> cr/nl conversion - echo - PS1= exec /bin/sh + # The following line is essential since it signals to + # the test driver that the shell is ready. + # See: the connect method in the Machine class. + echo "Spawning backdoor root shell..." + # Passing the terminal device makes bash run non-interactively. + # Otherwise we get errors on the terminal because bash tries to + # setup things like job control. + # Note: calling bash explicitely here instead of sh makes sure that + # we can also run non-NixOS guests during tests. + PS1= exec /usr/bin/env bash --norc /dev/hvc0 ''; serviceConfig.KillSignal = "SIGHUP"; }; -- cgit 1.4.1