diff options
Diffstat (limited to 'nixos/modules/virtualisation')
46 files changed, 1217 insertions, 304 deletions
diff --git a/nixos/modules/virtualisation/amazon-image.nix b/nixos/modules/virtualisation/amazon-image.nix index 20d48add712..26297a7d0f1 100644 --- a/nixos/modules/virtualisation/amazon-image.nix +++ b/nixos/modules/virtualisation/amazon-image.nix @@ -11,6 +11,7 @@ with lib; let cfg = config.ec2; metadataFetcher = import ./ec2-metadata-fetcher.nix { + inherit (pkgs) curl; targetRoot = "$targetRoot/"; wgetExtraOptions = "-q"; }; @@ -48,7 +49,7 @@ in ]; boot.initrd.kernelModules = [ "xen-blkfront" "xen-netfront" ]; boot.initrd.availableKernelModules = [ "ixgbevf" "ena" "nvme" ]; - boot.kernelParams = mkIf cfg.hvm [ "console=ttyS0" ]; + boot.kernelParams = mkIf cfg.hvm [ "console=ttyS0" "random.trust_cpu=on" ]; # Prevent the nouveau kernel module from being loaded, as it # interferes with the nvidia/nvidia-uvm modules needed for CUDA. @@ -123,7 +124,7 @@ in boot.initrd.extraUtilsCommands = '' # We need swapon in the initrd. - copy_bin_and_libs ${pkgs.utillinux}/sbin/swapon + copy_bin_and_libs ${pkgs.util-linux}/sbin/swapon ''; # Don't put old configurations in the GRUB menu. The user has no diff --git a/nixos/modules/virtualisation/amazon-init.nix b/nixos/modules/virtualisation/amazon-init.nix index 8c12e0e49bf..4f2f8df90eb 100644 --- a/nixos/modules/virtualisation/amazon-init.nix +++ b/nixos/modules/virtualisation/amazon-init.nix @@ -1,17 +1,31 @@ -{ config, pkgs, ... }: +{ config, lib, pkgs, ... }: + +with lib; let + cfg = config.virtualisation.amazon-init; + script = '' #!${pkgs.runtimeShell} -eu echo "attempting to fetch configuration from EC2 user data..." export HOME=/root - export PATH=${pkgs.lib.makeBinPath [ config.nix.package pkgs.systemd pkgs.gnugrep pkgs.git pkgs.gnutar pkgs.gzip pkgs.gnused config.system.build.nixos-rebuild]}:$PATH + export PATH=${pkgs.lib.makeBinPath [ config.nix.package pkgs.systemd pkgs.gnugrep pkgs.git pkgs.gnutar pkgs.gzip pkgs.gnused pkgs.xz config.system.build.nixos-rebuild]}:$PATH export NIX_PATH=nixpkgs=/nix/var/nix/profiles/per-user/root/channels/nixos:nixos-config=/etc/nixos/configuration.nix:/nix/var/nix/profiles/per-user/root/channels userData=/etc/ec2-metadata/user-data + # Check if user-data looks like a shell script and execute it with the + # runtime shell if it does. Otherwise treat it as a nixos configuration + # expression + if IFS= LC_ALL=C read -rN2 shebang < $userData && [ "$shebang" = '#!' ]; then + # NB: we cannot chmod the $userData file, this is why we execute it via + # `pkgs.runtimeShell`. This means we have only limited support for shell + # scripts compatible with the `pkgs.runtimeShell`. + exec ${pkgs.runtimeShell} $userData + fi + if [ -s "$userData" ]; then # If the user-data looks like it could be a nix expression, # copy it over. Also, look for a magic three-hash comment and set @@ -41,20 +55,33 @@ let nixos-rebuild switch ''; in { - systemd.services.amazon-init = { - inherit script; - description = "Reconfigure the system from EC2 userdata on startup"; - wantedBy = [ "multi-user.target" ]; - after = [ "multi-user.target" ]; - requires = [ "network-online.target" ]; + options.virtualisation.amazon-init = { + enable = mkOption { + default = true; + type = types.bool; + description = '' + Enable or disable the amazon-init service. + ''; + }; + }; + + config = mkIf cfg.enable { + systemd.services.amazon-init = { + inherit script; + description = "Reconfigure the system from EC2 userdata on startup"; + + wantedBy = [ "multi-user.target" ]; + after = [ "multi-user.target" ]; + requires = [ "network-online.target" ]; - restartIfChanged = false; - unitConfig.X-StopOnRemoval = false; + restartIfChanged = false; + unitConfig.X-StopOnRemoval = false; - serviceConfig = { - Type = "oneshot"; - RemainAfterExit = true; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + }; }; }; } diff --git a/nixos/modules/virtualisation/anbox.nix b/nixos/modules/virtualisation/anbox.nix index da5df358073..7b096bd1a9f 100644 --- a/nixos/modules/virtualisation/anbox.nix +++ b/nixos/modules/virtualisation/anbox.nix @@ -98,7 +98,6 @@ in environment.XDG_RUNTIME_DIR="${anboxloc}"; wantedBy = [ "multi-user.target" ]; - after = [ "systemd-udev-settle.service" ]; preStart = let initsh = pkgs.writeText "nixos-init" ('' #!/system/bin/sh diff --git a/nixos/modules/virtualisation/azure-agent.nix b/nixos/modules/virtualisation/azure-agent.nix index e85482af839..41f3fa0e664 100644 --- a/nixos/modules/virtualisation/azure-agent.nix +++ b/nixos/modules/virtualisation/azure-agent.nix @@ -22,7 +22,7 @@ let nettools # for hostname procps # for pidof shadow # for useradd, usermod - utillinux # for (u)mount, fdisk, sfdisk, mkswap + util-linux # for (u)mount, fdisk, sfdisk, mkswap parted ]; pythonPath = [ pythonPackages.pyasn1 ]; @@ -146,7 +146,7 @@ in services.logrotate = { enable = true; - config = '' + extraConfig = '' /var/log/waagent.log { compress monthly diff --git a/nixos/modules/virtualisation/azure-image.nix b/nixos/modules/virtualisation/azure-image.nix index 60fed3222ef..03dd3c05130 100644 --- a/nixos/modules/virtualisation/azure-image.nix +++ b/nixos/modules/virtualisation/azure-image.nix @@ -9,8 +9,9 @@ in options = { virtualisation.azureImage.diskSize = mkOption { - type = with types; int; - default = 2048; + type = with types; either (enum [ "auto" ]) int; + default = "auto"; + example = 2048; description = '' Size of disk image. Unit is MB. ''; diff --git a/nixos/modules/virtualisation/brightbox-image.nix b/nixos/modules/virtualisation/brightbox-image.nix index d0efbcc808a..9641b693f18 100644 --- a/nixos/modules/virtualisation/brightbox-image.nix +++ b/nixos/modules/virtualisation/brightbox-image.nix @@ -27,7 +27,7 @@ in popd ''; diskImageBase = "nixos-image-${config.system.nixos.label}-${pkgs.stdenv.hostPlatform.system}.raw"; - buildInputs = [ pkgs.utillinux pkgs.perl ]; + buildInputs = [ pkgs.util-linux pkgs.perl ]; exportReferencesGraph = [ "closure" config.system.build.toplevel ]; } @@ -119,7 +119,7 @@ in wants = [ "network-online.target" ]; after = [ "network-online.target" ]; - path = [ pkgs.wget pkgs.iproute ]; + path = [ pkgs.wget pkgs.iproute2 ]; script = '' diff --git a/nixos/modules/virtualisation/containerd.nix b/nixos/modules/virtualisation/containerd.nix new file mode 100644 index 00000000000..c7ceb816a31 --- /dev/null +++ b/nixos/modules/virtualisation/containerd.nix @@ -0,0 +1,96 @@ +{ pkgs, lib, config, ... }: +let + cfg = config.virtualisation.containerd; + + configFile = if cfg.configFile == null then + settingsFormat.generate "containerd.toml" cfg.settings + else + cfg.configFile; + + containerdConfigChecked = pkgs.runCommand "containerd-config-checked.toml" { + nativeBuildInputs = [ pkgs.containerd ]; + } '' + containerd -c ${configFile} config dump >/dev/null + ln -s ${configFile} $out + ''; + + settingsFormat = pkgs.formats.toml {}; +in +{ + + options.virtualisation.containerd = with lib.types; { + enable = lib.mkEnableOption "containerd container runtime"; + + configFile = lib.mkOption { + default = null; + description = '' + Path to containerd config file. + Setting this option will override any configuration applied by the settings option. + ''; + type = nullOr path; + }; + + settings = lib.mkOption { + type = settingsFormat.type; + default = {}; + description = '' + Verbatim lines to add to containerd.toml + ''; + }; + + args = lib.mkOption { + default = {}; + description = "extra args to append to the containerd cmdline"; + type = attrsOf str; + }; + }; + + config = lib.mkIf cfg.enable { + warnings = lib.optional (cfg.configFile != null) '' + `virtualisation.containerd.configFile` is deprecated. use `virtualisation.containerd.settings` instead. + ''; + + virtualisation.containerd = { + args.config = toString containerdConfigChecked; + settings = { + plugins.cri.containerd.snapshotter = lib.mkIf config.boot.zfs.enabled "zfs"; + plugins.cri.cni.bin_dir = lib.mkDefault "${pkgs.cni-plugins}/bin"; + }; + }; + + environment.systemPackages = [ pkgs.containerd ]; + + systemd.services.containerd = { + description = "containerd - container runtime"; + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" ]; + path = with pkgs; [ + containerd + runc + iptables + ] ++ lib.optional config.boot.zfs.enabled config.boot.zfs.package; + serviceConfig = { + ExecStart = ''${pkgs.containerd}/bin/containerd ${lib.concatStringsSep " " (lib.cli.toGNUCommandLine {} cfg.args)}''; + Delegate = "yes"; + KillMode = "process"; + Type = "notify"; + Restart = "always"; + RestartSec = "10"; + + # "limits" defined below are adopted from upstream: https://github.com/containerd/containerd/blob/master/containerd.service + LimitNPROC = "infinity"; + LimitCORE = "infinity"; + LimitNOFILE = "infinity"; + TasksMax = "infinity"; + OOMScoreAdjust = "-999"; + + StateDirectory = "containerd"; + RuntimeDirectory = "containerd"; + }; + unitConfig = { + StartLimitBurst = "16"; + StartLimitIntervalSec = "120s"; + }; + }; + }; +} diff --git a/nixos/modules/virtualisation/containers.nix b/nixos/modules/virtualisation/containers.nix index 3a6767d84a9..84824e2f90f 100644 --- a/nixos/modules/virtualisation/containers.nix +++ b/nixos/modules/virtualisation/containers.nix @@ -1,22 +1,10 @@ -{ config, lib, pkgs, ... }: +{ config, lib, pkgs, utils, ... }: let cfg = config.virtualisation.containers; inherit (lib) mkOption types; - # Once https://github.com/NixOS/nixpkgs/pull/75584 is merged we can use the TOML generator - toTOML = name: value: pkgs.runCommandNoCC name { - nativeBuildInputs = [ pkgs.remarshal ]; - value = builtins.toJSON value; - passAsFile = [ "value" ]; - } '' - json2toml "$valuePath" "$out" - ''; - - # Copy configuration files to avoid having the entire sources in the system closure - copyFile = filePath: pkgs.runCommandNoCC (builtins.unsafeDiscardStringContext (builtins.baseNameOf filePath)) {} '' - cp ${filePath} $out - ''; + toml = pkgs.formats.toml { }; in { meta = { @@ -30,6 +18,11 @@ in [ "virtualisation" "containers" "users" ] "All users with `isNormalUser = true` set now get appropriate subuid/subgid mappings." ) + ( + lib.mkRemovedOptionModule + [ "virtualisation" "containers" "containersConf" "extraConfig" ] + "Use virtualisation.containers.containersConf.settings instead." + ) ]; options.virtualisation.containers = { @@ -43,23 +36,45 @@ in ''; }; - containersConf = mkOption { - default = {}; + ociSeccompBpfHook.enable = mkOption { + type = types.bool; + default = false; + description = "Enable the OCI seccomp BPF hook"; + }; + + containersConf.settings = mkOption { + type = toml.type; + default = { }; description = "containers.conf configuration"; - type = types.submodule { - options = { + }; - extraConfig = mkOption { - type = types.lines; - default = ""; - description = '' - Extra configuration that should be put in the containers.conf - configuration file - ''; + containersConf.cniPlugins = mkOption { + type = types.listOf types.package; + defaultText = '' + [ + pkgs.cni-plugins + ] + ''; + example = lib.literalExample '' + [ + pkgs.cniPlugins.dnsname + ] + ''; + description = '' + CNI plugins to install on the system. + ''; + }; - }; + storage.settings = mkOption { + type = toml.type; + default = { + storage = { + driver = "overlay"; + graphroot = "/var/lib/containers/storage"; + runroot = "/run/containers/storage"; }; }; + description = "storage.conf configuration"; }; registries = { @@ -112,19 +127,30 @@ in config = lib.mkIf cfg.enable { - environment.etc."containers/containers.conf".text = '' - [network] - cni_plugin_dirs = ["${pkgs.cni-plugins}/bin/"] + virtualisation.containers.containersConf.cniPlugins = [ pkgs.cni-plugins ]; + + virtualisation.containers.containersConf.settings = { + network.cni_plugin_dirs = map (p: "${lib.getBin p}/bin") cfg.containersConf.cniPlugins; + engine = { + init_path = "${pkgs.catatonit}/bin/catatonit"; + } // lib.optionalAttrs cfg.ociSeccompBpfHook.enable { + hooks_dir = [ config.boot.kernelPackages.oci-seccomp-bpf-hook ]; + }; + }; + + environment.etc."containers/containers.conf".source = + toml.generate "containers.conf" cfg.containersConf.settings; - '' + cfg.containersConf.extraConfig; + environment.etc."containers/storage.conf".source = + toml.generate "storage.conf" cfg.storage.settings; - environment.etc."containers/registries.conf".source = toTOML "registries.conf" { + environment.etc."containers/registries.conf".source = toml.generate "registries.conf" { registries = lib.mapAttrs (n: v: { registries = v; }) cfg.registries; }; environment.etc."containers/policy.json".source = if cfg.policy != {} then pkgs.writeText "policy.json" (builtins.toJSON cfg.policy) - else copyFile "${pkgs.skopeo.src}/default-policy.json"; + else utils.copyFile "${pkgs.skopeo.src}/default-policy.json"; }; } diff --git a/nixos/modules/virtualisation/cri-o.nix b/nixos/modules/virtualisation/cri-o.nix index 9c818eee73b..c135081959a 100644 --- a/nixos/modules/virtualisation/cri-o.nix +++ b/nixos/modules/virtualisation/cri-o.nix @@ -1,16 +1,14 @@ -{ config, lib, pkgs, ... }: +{ config, lib, pkgs, utils, ... }: with lib; - let cfg = config.virtualisation.cri-o; crioPackage = (pkgs.cri-o.override { inherit (cfg) extraPackages; }); - # Copy configuration files to avoid having the entire sources in the system closure - copyFile = filePath: pkgs.runCommandNoCC (builtins.unsafeDiscardStringContext (builtins.baseNameOf filePath)) {} '' - cp ${filePath} $out - ''; + format = pkgs.formats.toml { }; + + cfgFile = format.generate "00-default.conf" cfg.settings; in { imports = [ @@ -18,7 +16,7 @@ in ]; meta = { - maintainers = lib.teams.podman.members; + maintainers = teams.podman.members; }; options.virtualisation.cri-o = { @@ -60,7 +58,7 @@ in extraPackages = mkOption { type = with types; listOf package; default = [ ]; - example = lib.literalExample '' + example = literalExample '' [ pkgs.gvisor ] @@ -70,7 +68,7 @@ in ''; }; - package = lib.mkOption { + package = mkOption { type = types.package; default = crioPackage; internal = true; @@ -78,39 +76,61 @@ in The final CRI-O package (including extra packages). ''; }; + + networkDir = mkOption { + type = types.nullOr types.path; + default = null; + description = "Override the network_dir option."; + internal = true; + }; + + settings = mkOption { + type = format.type; + default = { }; + description = '' + Configuration for cri-o, see + <link xlink:href="https://github.com/cri-o/cri-o/blob/master/docs/crio.conf.5.md"/>. + ''; + }; }; config = mkIf cfg.enable { environment.systemPackages = [ cfg.package pkgs.cri-tools ]; - environment.etc."crictl.yaml".source = copyFile "${pkgs.cri-o-unwrapped.src}/crictl.yaml"; + environment.etc."crictl.yaml".source = utils.copyFile "${pkgs.cri-o-unwrapped.src}/crictl.yaml"; - environment.etc."crio/crio.conf.d/00-default.conf".text = '' - [crio] - storage_driver = "${cfg.storageDriver}" + virtualisation.cri-o.settings.crio = { + storage_driver = cfg.storageDriver; - [crio.image] - ${optionalString (cfg.pauseImage != null) ''pause_image = "${cfg.pauseImage}"''} - ${optionalString (cfg.pauseCommand != null) ''pause_command = "${cfg.pauseCommand}"''} - - [crio.network] - plugin_dirs = ["${pkgs.cni-plugins}/bin/"] + image = { + pause_image = mkIf (cfg.pauseImage != null) cfg.pauseImage; + pause_command = mkIf (cfg.pauseCommand != null) cfg.pauseCommand; + }; - [crio.runtime] - cgroup_manager = "systemd" - log_level = "${cfg.logLevel}" - manage_ns_lifecycle = true - pinns_path = "${cfg.package}/bin/pinns" + network = { + plugin_dirs = [ "${pkgs.cni-plugins}/bin" ]; + network_dir = mkIf (cfg.networkDir != null) cfg.networkDir; + }; - ${optionalString (cfg.runtime != null) '' - default_runtime = "${cfg.runtime}" - [crio.runtime.runtimes] - [crio.runtime.runtimes.${cfg.runtime}] - ''} - ''; + runtime = { + cgroup_manager = "systemd"; + log_level = cfg.logLevel; + manage_ns_lifecycle = true; + pinns_path = "${cfg.package}/bin/pinns"; + hooks_dir = + optional (config.virtualisation.containers.ociSeccompBpfHook.enable) + config.boot.kernelPackages.oci-seccomp-bpf-hook; + + default_runtime = mkIf (cfg.runtime != null) cfg.runtime; + runtimes = mkIf (cfg.runtime != null) { + "${cfg.runtime}" = { }; + }; + }; + }; - environment.etc."cni/net.d/10-crio-bridge.conf".source = copyFile "${pkgs.cri-o-unwrapped.src}/contrib/cni/10-crio-bridge.conf"; - environment.etc."cni/net.d/99-loopback.conf".source = copyFile "${pkgs.cri-o-unwrapped.src}/contrib/cni/99-loopback.conf"; + environment.etc."cni/net.d/10-crio-bridge.conf".source = utils.copyFile "${pkgs.cri-o-unwrapped.src}/contrib/cni/10-crio-bridge.conf"; + environment.etc."cni/net.d/99-loopback.conf".source = utils.copyFile "${pkgs.cri-o-unwrapped.src}/contrib/cni/99-loopback.conf"; + environment.etc."crio/crio.conf.d/00-default.conf".source = cfgFile; # Enable common /etc/containers configuration virtualisation.containers.enable = true; @@ -133,6 +153,7 @@ in TimeoutStartSec = "0"; Restart = "on-abnormal"; }; + restartTriggers = [ cfgFile ]; }; }; } diff --git a/nixos/modules/virtualisation/digital-ocean-image.nix b/nixos/modules/virtualisation/digital-ocean-image.nix index b582e235d43..0ff2ee591f2 100644 --- a/nixos/modules/virtualisation/digital-ocean-image.nix +++ b/nixos/modules/virtualisation/digital-ocean-image.nix @@ -10,8 +10,9 @@ in options = { virtualisation.digitalOceanImage.diskSize = mkOption { - type = with types; int; - default = 4096; + type = with types; either (enum [ "auto" ]) int; + default = "auto"; + example = 4096; description = '' Size of disk image. Unit is MB. ''; diff --git a/nixos/modules/virtualisation/docker.nix b/nixos/modules/virtualisation/docker.nix index d87ada35a0a..29f133786d8 100644 --- a/nixos/modules/virtualisation/docker.nix +++ b/nixos/modules/virtualisation/docker.nix @@ -150,6 +150,10 @@ in config = mkIf cfg.enable (mkMerge [{ boot.kernelModules = [ "bridge" "veth" ]; + boot.kernel.sysctl = { + "net.ipv4.conf.all.forwarding" = mkOverride 98 true; + "net.ipv4.conf.default.forwarding" = mkOverride 98 true; + }; environment.systemPackages = [ cfg.package ] ++ optional cfg.enableNvidia pkgs.nvidia-docker; users.groups.docker.gid = config.ids.gids.docker; @@ -157,8 +161,11 @@ in systemd.services.docker = { wantedBy = optional cfg.enableOnBoot "multi-user.target"; + after = [ "network.target" "docker.socket" ]; + requires = [ "docker.socket" ]; environment = proxy_env; serviceConfig = { + Type = "notify"; ExecStart = [ "" '' @@ -212,13 +219,10 @@ in message = "Option enableNvidia requires 32bit support libraries"; }]; } - (mkIf cfg.enableNvidia { - environment.etc."nvidia-container-runtime/config.toml".source = "${pkgs.nvidia-docker}/etc/config.toml"; - }) ]); imports = [ - (mkRemovedOptionModule ["virtualisation" "docker" "socketActivation"] "This option was removed in favor of starting docker at boot") + (mkRemovedOptionModule ["virtualisation" "docker" "socketActivation"] "This option was removed and socket activation is now always active") ]; } diff --git a/nixos/modules/virtualisation/ec2-amis.nix b/nixos/modules/virtualisation/ec2-amis.nix index 24de8cf1afb..d38f41ab39d 100644 --- a/nixos/modules/virtualisation/ec2-amis.nix +++ b/nixos/modules/virtualisation/ec2-amis.nix @@ -329,5 +329,43 @@ let self = { "20.03".ap-east-1.hvm-ebs = "ami-0d18fdd309cdefa86"; "20.03".sa-east-1.hvm-ebs = "ami-09859378158ae971d"; - latest = self."20.03"; + # 20.09.2016.19db3e5ea27 + "20.09".eu-west-1.hvm-ebs = "ami-0057cb7d614329fa2"; + "20.09".eu-west-2.hvm-ebs = "ami-0d46f16e0bb0ec8fd"; + "20.09".eu-west-3.hvm-ebs = "ami-0e8985c3ea42f87fe"; + "20.09".eu-central-1.hvm-ebs = "ami-0eed77c38432886d2"; + "20.09".eu-north-1.hvm-ebs = "ami-0be5bcadd632bea14"; + "20.09".us-east-1.hvm-ebs = "ami-0a2cce52b42daccc8"; + "20.09".us-east-2.hvm-ebs = "ami-09378bf487b07a4d8"; + "20.09".us-west-1.hvm-ebs = "ami-09b4337b2a9e77485"; + "20.09".us-west-2.hvm-ebs = "ami-081d3bb5fbee0a1ac"; + "20.09".ca-central-1.hvm-ebs = "ami-020c24c6c607e7ac7"; + "20.09".ap-southeast-1.hvm-ebs = "ami-08f648d5db009e67d"; + "20.09".ap-southeast-2.hvm-ebs = "ami-0be390efaccbd40f9"; + "20.09".ap-northeast-1.hvm-ebs = "ami-0c3311601cbe8f927"; + "20.09".ap-northeast-2.hvm-ebs = "ami-0020146701f4d56cf"; + "20.09".ap-south-1.hvm-ebs = "ami-0117e2bd876bb40d1"; + "20.09".ap-east-1.hvm-ebs = "ami-0c42f97e5b1fda92f"; + "20.09".sa-east-1.hvm-ebs = "ami-021637976b094959d"; + + # 21.05.740.aa576357673 + "21.05".eu-west-1.hvm-ebs = "ami-048dbc738074a3083"; + "21.05".eu-west-2.hvm-ebs = "ami-0234cf81fec68315d"; + "21.05".eu-west-3.hvm-ebs = "ami-020e459baf709107d"; + "21.05".eu-central-1.hvm-ebs = "ami-0857d5d1309ab8b77"; + "21.05".eu-north-1.hvm-ebs = "ami-05403e3ae53d3716f"; + "21.05".us-east-1.hvm-ebs = "ami-0d3002ba40b5b9897"; + "21.05".us-east-2.hvm-ebs = "ami-069a0ca1bde6dea52"; + "21.05".us-west-1.hvm-ebs = "ami-0b415460a84bcf9bc"; + "21.05".us-west-2.hvm-ebs = "ami-093cba49754abd7f8"; + "21.05".ca-central-1.hvm-ebs = "ami-065c13e1d52d60b33"; + "21.05".ap-southeast-1.hvm-ebs = "ami-04f570c70ff9b665e"; + "21.05".ap-southeast-2.hvm-ebs = "ami-02a3d1df595df5ef6"; + "21.05".ap-northeast-1.hvm-ebs = "ami-027836fddb5c56012"; + "21.05".ap-northeast-2.hvm-ebs = "ami-0edacd41dc7700c39"; + "21.05".ap-south-1.hvm-ebs = "ami-0b279b5bb55288059"; + "21.05".ap-east-1.hvm-ebs = "ami-06dc98082bc55c1fc"; + "21.05".sa-east-1.hvm-ebs = "ami-04737dd49b98936c6"; + + latest = self."21.05"; }; in self diff --git a/nixos/modules/virtualisation/ec2-data.nix b/nixos/modules/virtualisation/ec2-data.nix index 62912535018..1b764e7e4d8 100644 --- a/nixos/modules/virtualisation/ec2-data.nix +++ b/nixos/modules/virtualisation/ec2-data.nix @@ -19,7 +19,7 @@ with lib; wantedBy = [ "multi-user.target" "sshd.service" ]; before = [ "sshd.service" ]; - path = [ pkgs.iproute ]; + path = [ pkgs.iproute2 ]; script = '' diff --git a/nixos/modules/virtualisation/ec2-metadata-fetcher.nix b/nixos/modules/virtualisation/ec2-metadata-fetcher.nix index b531787c31a..760f024f33f 100644 --- a/nixos/modules/virtualisation/ec2-metadata-fetcher.nix +++ b/nixos/modules/virtualisation/ec2-metadata-fetcher.nix @@ -1,23 +1,77 @@ -{ targetRoot, wgetExtraOptions }: +{ curl, targetRoot, wgetExtraOptions }: +# Note: be very cautious about dependencies, each dependency grows +# the closure of the initrd. Ideally we would not even require curl, +# but there is no reasonable way to send an HTTP PUT request without +# it. Note: do not be fooled: the wget referenced in this script +# is busybox's wget, not the fully featured one with --method support. +# +# Make sure that every package you depend on here is already listed as +# a channel blocker for both the full-sized and small channels. +# Otherwise, we risk breaking user deploys in released channels. +# +# Also note: OpenStack's metadata service for its instances aims to be +# compatible with the EC2 IMDS. Where possible, try to keep the set of +# fetched metadata in sync with ./openstack-metadata-fetcher.nix . '' metaDir=${targetRoot}etc/ec2-metadata mkdir -m 0755 -p "$metaDir" + rm -f "$metaDir/*" - echo "getting EC2 instance metadata..." + get_imds_token() { + # retry-delay of 1 selected to give the system a second to get going, + # but not add a lot to the bootup time + ${curl}/bin/curl \ + -v \ + --retry 3 \ + --retry-delay 1 \ + --fail \ + -X PUT \ + --connect-timeout 1 \ + -H "X-aws-ec2-metadata-token-ttl-seconds: 600" \ + http://169.254.169.254/latest/api/token + } - if ! [ -e "$metaDir/ami-manifest-path" ]; then - wget ${wgetExtraOptions} -O "$metaDir/ami-manifest-path" http://169.254.169.254/1.0/meta-data/ami-manifest-path - fi + preflight_imds_token() { + # retry-delay of 1 selected to give the system a second to get going, + # but not add a lot to the bootup time + ${curl}/bin/curl \ + -v \ + --retry 3 \ + --retry-delay 1 \ + --fail \ + --connect-timeout 1 \ + -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" \ + http://169.254.169.254/1.0/meta-data/instance-id + } - if ! [ -e "$metaDir/user-data" ]; then - wget ${wgetExtraOptions} -O "$metaDir/user-data" http://169.254.169.254/1.0/user-data && chmod 600 "$metaDir/user-data" - fi + try=1 + while [ $try -le 3 ]; do + echo "(attempt $try/3) getting an EC2 instance metadata service v2 token..." + IMDS_TOKEN=$(get_imds_token) && break + try=$((try + 1)) + sleep 1 + done - if ! [ -e "$metaDir/hostname" ]; then - wget ${wgetExtraOptions} -O "$metaDir/hostname" http://169.254.169.254/1.0/meta-data/hostname + if [ "x$IMDS_TOKEN" == "x" ]; then + echo "failed to fetch an IMDS2v token." fi - if ! [ -e "$metaDir/public-keys-0-openssh-key" ]; then - wget ${wgetExtraOptions} -O "$metaDir/public-keys-0-openssh-key" http://169.254.169.254/1.0/meta-data/public-keys/0/openssh-key - fi + try=1 + while [ $try -le 10 ]; do + echo "(attempt $try/10) validating the EC2 instance metadata service v2 token..." + preflight_imds_token && break + try=$((try + 1)) + sleep 1 + done + + echo "getting EC2 instance metadata..." + + wget_imds() { + wget ${wgetExtraOptions} --header "X-aws-ec2-metadata-token: $IMDS_TOKEN" "$@"; + } + + wget_imds -O "$metaDir/ami-manifest-path" http://169.254.169.254/1.0/meta-data/ami-manifest-path + (umask 077 && wget_imds -O "$metaDir/user-data" http://169.254.169.254/1.0/user-data) + wget_imds -O "$metaDir/hostname" http://169.254.169.254/1.0/meta-data/hostname + wget_imds -O "$metaDir/public-keys-0-openssh-key" http://169.254.169.254/1.0/meta-data/public-keys/0/openssh-key '' diff --git a/nixos/modules/virtualisation/fetch-instance-ssh-keys.bash b/nixos/modules/virtualisation/fetch-instance-ssh-keys.bash new file mode 100644 index 00000000000..4a860196111 --- /dev/null +++ b/nixos/modules/virtualisation/fetch-instance-ssh-keys.bash @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +set -euo pipefail + +WGET() { + wget --retry-connrefused -t 15 --waitretry=10 --header='Metadata-Flavor: Google' "$@" +} + +# When dealing with cryptographic keys, we want to keep things private. +umask 077 +mkdir -p /root/.ssh + +echo "Fetching authorized keys..." +WGET -O /tmp/auth_keys http://metadata.google.internal/computeMetadata/v1/instance/attributes/sshKeys + +# Read keys one by one, split in case Google decided +# to append metadata (it does sometimes) and add to +# authorized_keys if not already present. +touch /root/.ssh/authorized_keys +while IFS='' read -r line || [[ -n "$line" ]]; do + keyLine=$(echo -n "$line" | cut -d ':' -f2) + IFS=' ' read -r -a array <<<"$keyLine" + if [[ ${#array[@]} -ge 3 ]]; then + echo "${array[@]:0:3}" >>/tmp/new_keys + echo "Added ${array[*]:2} to authorized_keys" + fi +done </tmp/auth_keys +mv /tmp/new_keys /root/.ssh/authorized_keys +chmod 600 /root/.ssh/authorized_keys + +echo "Fetching host keys..." +WGET -O /tmp/ssh_host_ed25519_key http://metadata.google.internal/computeMetadata/v1/instance/attributes/ssh_host_ed25519_key +WGET -O /tmp/ssh_host_ed25519_key.pub http://metadata.google.internal/computeMetadata/v1/instance/attributes/ssh_host_ed25519_key_pub +mv -f /tmp/ssh_host_ed25519_key* /etc/ssh/ +chmod 600 /etc/ssh/ssh_host_ed25519_key +chmod 644 /etc/ssh/ssh_host_ed25519_key.pub diff --git a/nixos/modules/virtualisation/gce-images.nix b/nixos/modules/virtualisation/gce-images.nix index 5354d91deb9..7b027619a44 100644 --- a/nixos/modules/virtualisation/gce-images.nix +++ b/nixos/modules/virtualisation/gce-images.nix @@ -5,5 +5,13 @@ let self = { "17.03" = "gs://nixos-cloud-images/nixos-image-17.03.1082.4aab5c5798-x86_64-linux.raw.tar.gz"; "18.03" = "gs://nixos-cloud-images/nixos-image-18.03.132536.fdb5ba4cdf9-x86_64-linux.raw.tar.gz"; "18.09" = "gs://nixos-cloud-images/nixos-image-18.09.1228.a4c4cbb613c-x86_64-linux.raw.tar.gz"; - latest = self."18.09"; + + # This format will be handled by the upcoming NixOPS 2.0 release. + # The old images based on a GS object are deprecated. + "20.09" = { + project = "nixos-cloud"; + name = "nixos-image-20-09-3531-3858fbc08e6-x86-64-linux"; + }; + + latest = self."20.09"; }; in self diff --git a/nixos/modules/virtualisation/google-compute-config.nix b/nixos/modules/virtualisation/google-compute-config.nix index 327324f2921..cff48d20b2b 100644 --- a/nixos/modules/virtualisation/google-compute-config.nix +++ b/nixos/modules/virtualisation/google-compute-config.nix @@ -69,6 +69,31 @@ in # GC has 1460 MTU networking.interfaces.eth0.mtu = 1460; + # Used by NixOps + systemd.services.fetch-instance-ssh-keys = { + description = "Fetch host keys and authorized_keys for root user"; + + wantedBy = [ "sshd.service" ]; + before = [ "sshd.service" ]; + after = [ "network-online.target" ]; + wants = [ "network-online.target" ]; + path = [ pkgs.wget ]; + + serviceConfig = { + Type = "oneshot"; + ExecStart = pkgs.runCommand "fetch-instance-ssh-keys" { } '' + cp ${./fetch-instance-ssh-keys.bash} $out + chmod +x $out + ${pkgs.shfmt}/bin/shfmt -i 4 -d $out + ${pkgs.shellcheck}/bin/shellcheck $out + patchShebangs $out + ''; + PrivateTmp = true; + StandardError = "journal+console"; + StandardOutput = "journal+console"; + }; + }; + systemd.services.google-instance-setup = { description = "Google Compute Engine Instance Setup"; after = [ "network-online.target" "network.target" "rsyslog.service" ]; @@ -85,7 +110,7 @@ in systemd.services.google-network-daemon = { description = "Google Compute Engine Network Daemon"; after = [ "network-online.target" "network.target" "google-instance-setup.service" ]; - path = with pkgs; [ iproute ]; + path = with pkgs; [ iproute2 ]; serviceConfig = { ExecStart = "${gce}/bin/google_network_daemon"; StandardOutput="journal+console"; diff --git a/nixos/modules/virtualisation/google-compute-image.nix b/nixos/modules/virtualisation/google-compute-image.nix index d172ae38fdc..79c3921669e 100644 --- a/nixos/modules/virtualisation/google-compute-image.nix +++ b/nixos/modules/virtualisation/google-compute-image.nix @@ -18,8 +18,9 @@ in options = { virtualisation.googleComputeImage.diskSize = mkOption { - type = with types; int; - default = 1536; + type = with types; either (enum [ "auto" ]) int; + default = "auto"; + example = 1536; description = '' Size of disk image. Unit is MB. ''; @@ -43,7 +44,7 @@ in system.build.googleComputeImage = import ../../lib/make-disk-image.nix { name = "google-compute-image"; postVM = '' - PATH=$PATH:${with pkgs; stdenv.lib.makeBinPath [ gnutar gzip ]} + PATH=$PATH:${with pkgs; lib.makeBinPath [ gnutar gzip ]} pushd $out mv $diskImage disk.raw tar -Szcf nixos-image-${config.system.nixos.label}-${pkgs.stdenv.hostPlatform.system}.raw.tar.gz disk.raw diff --git a/nixos/modules/virtualisation/hyperv-guest.nix b/nixos/modules/virtualisation/hyperv-guest.nix index adc2810a993..b3bcfff1980 100644 --- a/nixos/modules/virtualisation/hyperv-guest.nix +++ b/nixos/modules/virtualisation/hyperv-guest.nix @@ -31,6 +31,8 @@ in { "hv_balloon" "hv_netvsc" "hv_storvsc" "hv_utils" "hv_vmbus" ]; + initrd.availableKernelModules = [ "hyperv_keyboard" ]; + kernelParams = [ "video=hyperv_fb:${cfg.videoMode} elevator=noop" ]; @@ -38,8 +40,6 @@ in { environment.systemPackages = [ config.boot.kernelPackages.hyperv-daemons.bin ]; - security.rngd.enable = false; - # enable hotadding cpu/memory services.udev.packages = lib.singleton (pkgs.writeTextFile { name = "hyperv-cpu-and-memory-hotadd-udev-rules"; @@ -56,6 +56,8 @@ in { systemd = { packages = [ config.boot.kernelPackages.hyperv-daemons.lib ]; + services.hv-vss.unitConfig.ConditionPathExists = [ "/dev/vmbus/hv_vss" ]; + targets.hyperv-daemons = { wantedBy = [ "multi-user.target" ]; }; diff --git a/nixos/modules/virtualisation/hyperv-image.nix b/nixos/modules/virtualisation/hyperv-image.nix index fabc9113dfc..6845d675009 100644 --- a/nixos/modules/virtualisation/hyperv-image.nix +++ b/nixos/modules/virtualisation/hyperv-image.nix @@ -9,8 +9,9 @@ in { options = { hyperv = { baseImageSize = mkOption { - type = types.int; - default = 2048; + type = with types; either (enum [ "auto" ]) int; + default = "auto"; + example = 2048; description = '' The size of the hyper-v base image in MiB. ''; diff --git a/nixos/modules/virtualisation/kvmgt.nix b/nixos/modules/virtualisation/kvmgt.nix index e08ad344628..72bd2c24e56 100644 --- a/nixos/modules/virtualisation/kvmgt.nix +++ b/nixos/modules/virtualisation/kvmgt.nix @@ -82,5 +82,5 @@ in { }; }; - meta.maintainers = with maintainers; [ gnidorah ]; + meta.maintainers = with maintainers; [ ]; } diff --git a/nixos/modules/virtualisation/libvirtd.nix b/nixos/modules/virtualisation/libvirtd.nix index 1d6a9457dde..f45f1802d91 100644 --- a/nixos/modules/virtualisation/libvirtd.nix +++ b/nixos/modules/virtualisation/libvirtd.nix @@ -11,9 +11,10 @@ let auth_unix_rw = "polkit" ${cfg.extraConfig} ''; + ovmfFilePrefix = if pkgs.stdenv.isAarch64 then "AAVMF" else "OVMF"; qemuConfigFile = pkgs.writeText "qemu.conf" '' ${optionalString cfg.qemuOvmf '' - nvram = ["/run/libvirt/nix-ovmf/OVMF_CODE.fd:/run/libvirt/nix-ovmf/OVMF_VARS.fd"] + nvram = [ "/run/libvirt/nix-ovmf/${ovmfFilePrefix}_CODE.fd:/run/libvirt/nix-ovmf/${ovmfFilePrefix}_VARS.fd" ] ''} ${optionalString (!cfg.qemuRunAsRoot) '' user = "qemu-libvirtd" @@ -46,6 +47,15 @@ in { ''; }; + package = mkOption { + type = types.package; + default = pkgs.libvirt; + defaultText = "pkgs.libvirt"; + description = '' + libvirt package to use. + ''; + }; + qemuPackage = mkOption { type = types.package; default = pkgs.qemu; @@ -145,12 +155,19 @@ in { config = mkIf cfg.enable { + assertions = [ + { + assertion = config.security.polkit.enable; + message = "The libvirtd module currently requires Polkit to be enabled ('security.polkit.enable = true')."; + } + ]; + environment = { # this file is expected in /etc/qemu and not sysconfdir (/var/lib) etc."qemu/bridge.conf".text = lib.concatMapStringsSep "\n" (e: "allow ${e}") cfg.allowedBridges; - systemPackages = with pkgs; [ libvirt libressl.nc iptables cfg.qemuPackage ]; - etc.ethertypes.source = "${pkgs.iptables}/etc/ethertypes"; + systemPackages = with pkgs; [ libressl.nc iptables cfg.package cfg.qemuPackage ]; + etc.ethertypes.source = "${pkgs.ebtables}/etc/ethertypes"; }; boot.kernelModules = [ "tun" ]; @@ -169,26 +186,26 @@ in { source = "/run/${dirName}/nix-helpers/qemu-bridge-helper"; }; - systemd.packages = [ pkgs.libvirt ]; + systemd.packages = [ cfg.package ]; systemd.services.libvirtd-config = { description = "Libvirt Virtual Machine Management Daemon - configuration"; script = '' # Copy default libvirt network config .xml files to /var/lib # Files modified by the user will not be overwritten - for i in $(cd ${pkgs.libvirt}/var/lib && echo \ + for i in $(cd ${cfg.package}/var/lib && echo \ libvirt/qemu/networks/*.xml libvirt/qemu/networks/autostart/*.xml \ libvirt/nwfilter/*.xml ); do mkdir -p /var/lib/$(dirname $i) -m 755 - cp -npd ${pkgs.libvirt}/var/lib/$i /var/lib/$i + cp -npd ${cfg.package}/var/lib/$i /var/lib/$i done # Copy generated qemu config to libvirt directory cp -f ${qemuConfigFile} /var/lib/${dirName}/qemu.conf # stable (not GC'able as in /nix/store) paths for using in <emulator> section of xml configs - for emulator in ${pkgs.libvirt}/libexec/libvirt_lxc ${cfg.qemuPackage}/bin/qemu-kvm ${cfg.qemuPackage}/bin/qemu-system-*; do + for emulator in ${cfg.package}/libexec/libvirt_lxc ${cfg.qemuPackage}/bin/qemu-kvm ${cfg.qemuPackage}/bin/qemu-system-*; do ln -s --force "$emulator" /run/${dirName}/nix-emulators/ done @@ -197,8 +214,8 @@ in { done ${optionalString cfg.qemuOvmf '' - ln -s --force ${pkgs.OVMF.fd}/FV/OVMF_CODE.fd /run/${dirName}/nix-ovmf/ - ln -s --force ${pkgs.OVMF.fd}/FV/OVMF_VARS.fd /run/${dirName}/nix-ovmf/ + ln -s --force ${pkgs.OVMF.fd}/FV/${ovmfFilePrefix}_CODE.fd /run/${dirName}/nix-ovmf/ + ln -s --force ${pkgs.OVMF.fd}/FV/${ovmfFilePrefix}_VARS.fd /run/${dirName}/nix-ovmf/ ''} ''; @@ -213,7 +230,7 @@ in { systemd.services.libvirtd = { requires = [ "libvirtd-config.service" ]; - after = [ "systemd-udev-settle.service" "libvirtd-config.service" ] + after = [ "libvirtd-config.service" ] ++ optional vswitch.enable "ovs-vswitchd.service"; environment.LIBVIRTD_ARGS = escapeShellArgs ( @@ -234,7 +251,7 @@ in { systemd.services.libvirt-guests = { wantedBy = [ "multi-user.target" ]; - path = with pkgs; [ coreutils libvirt gawk ]; + path = with pkgs; [ coreutils gawk cfg.package ]; restartIfChanged = false; environment.ON_BOOT = "${cfg.onBoot}"; @@ -249,7 +266,7 @@ in { systemd.services.virtlogd = { description = "Virtual machine log manager"; - serviceConfig.ExecStart = "@${pkgs.libvirt}/sbin/virtlogd virtlogd"; + serviceConfig.ExecStart = "@${cfg.package}/sbin/virtlogd virtlogd"; restartIfChanged = false; }; @@ -261,7 +278,7 @@ in { systemd.services.virtlockd = { description = "Virtual machine lock manager"; - serviceConfig.ExecStart = "@${pkgs.libvirt}/sbin/virtlockd virtlockd"; + serviceConfig.ExecStart = "@${cfg.package}/sbin/virtlockd virtlockd"; restartIfChanged = false; }; diff --git a/nixos/modules/virtualisation/lxc-container.nix b/nixos/modules/virtualisation/lxc-container.nix index d4936484018..e47bd59dc01 100644 --- a/nixos/modules/virtualisation/lxc-container.nix +++ b/nixos/modules/virtualisation/lxc-container.nix @@ -11,7 +11,7 @@ with lib; users.users.root.initialHashedPassword = mkOverride 150 ""; # Some more help text. - services.mingetty.helpLine = + services.getty.helpLine = '' Log in as "root" with an empty password. diff --git a/nixos/modules/virtualisation/lxc.nix b/nixos/modules/virtualisation/lxc.nix index f484d5ee59a..0f8b22a45df 100644 --- a/nixos/modules/virtualisation/lxc.nix +++ b/nixos/modules/virtualisation/lxc.nix @@ -74,9 +74,13 @@ in systemd.tmpfiles.rules = [ "d /var/lib/lxc/rootfs 0755 root root -" ]; security.apparmor.packages = [ pkgs.lxc ]; - security.apparmor.profiles = [ - "${pkgs.lxc}/etc/apparmor.d/lxc-containers" - "${pkgs.lxc}/etc/apparmor.d/usr.bin.lxc-start" - ]; + security.apparmor.policies = { + "bin.lxc-start".profile = '' + include ${pkgs.lxc}/etc/apparmor.d/usr.bin.lxc-start + ''; + "lxc-containers".profile = '' + include ${pkgs.lxc}/etc/apparmor.d/lxc-containers + ''; + }; }; } diff --git a/nixos/modules/virtualisation/lxd.nix b/nixos/modules/virtualisation/lxd.nix index 3958fc2c1d7..cde29f7bf59 100644 --- a/nixos/modules/virtualisation/lxd.nix +++ b/nixos/modules/virtualisation/lxd.nix @@ -5,13 +5,12 @@ with lib; let - cfg = config.virtualisation.lxd; - zfsCfg = config.boot.zfs; - -in +in { + imports = [ + (mkRemovedOptionModule [ "virtualisation" "lxd" "zfsPackage" ] "Override zfs in an overlay instead to override it globally") + ]; -{ ###### interface options = { @@ -51,18 +50,10 @@ in ''; }; - zfsPackage = mkOption { - type = types.package; - default = with pkgs; if zfsCfg.enableUnstable then zfsUnstable else zfs; - defaultText = "pkgs.zfs"; - description = '' - The ZFS package to use with LXD. - ''; - }; - zfsSupport = mkOption { type = types.bool; - default = false; + default = config.boot.zfs.enabled; + defaultText = "config.boot.zfs.enabled"; description = '' Enables lxd to use zfs as a storage for containers. @@ -75,7 +66,7 @@ in type = types.bool; default = false; description = '' - enables various settings to avoid common pitfalls when + Enables various settings to avoid common pitfalls when running containers requiring many file operations. Fixes errors like "Too many open files" or "neighbour: ndisc_cache: neighbor table overflow!". @@ -83,44 +74,82 @@ in for details. ''; }; + + startTimeout = mkOption { + type = types.int; + default = 600; + apply = toString; + description = '' + Time to wait (in seconds) for LXD to become ready to process requests. + If LXD does not reply within the configured time, lxd.service will be + considered failed and systemd will attempt to restart it. + ''; + }; }; }; ###### implementation - config = mkIf cfg.enable { environment.systemPackages = [ cfg.package ]; + # Note: the following options are also declared in virtualisation.lxc, but + # the latter can't be simply enabled to reuse the formers, because it + # does a bunch of unrelated things. + systemd.tmpfiles.rules = [ "d /var/lib/lxc/rootfs 0755 root root -" ]; + security.apparmor = { - enable = true; - profiles = [ - "${cfg.lxcPackage}/etc/apparmor.d/usr.bin.lxc-start" - "${cfg.lxcPackage}/etc/apparmor.d/lxc-containers" - ]; packages = [ cfg.lxcPackage ]; + policies = { + "bin.lxc-start".profile = '' + include ${cfg.lxcPackage}/etc/apparmor.d/usr.bin.lxc-start + ''; + "lxc-containers".profile = '' + include ${cfg.lxcPackage}/etc/apparmor.d/lxc-containers + ''; + }; + }; + + # TODO: remove once LXD gets proper support for cgroupsv2 + # (currently most of the e.g. CPU accounting stuff doesn't work) + systemd.enableUnifiedCgroupHierarchy = false; + + systemd.sockets.lxd = { + description = "LXD UNIX socket"; + wantedBy = [ "sockets.target" ]; + + socketConfig = { + ListenStream = "/var/lib/lxd/unix.socket"; + SocketMode = "0660"; + SocketGroup = "lxd"; + Service = "lxd.service"; + }; }; systemd.services.lxd = { description = "LXD Container Management Daemon"; wantedBy = [ "multi-user.target" ]; - after = [ "systemd-udev-settle.service" ]; - - path = lib.optional cfg.zfsSupport cfg.zfsPackage; + after = [ "network-online.target" "lxcfs.service" ]; + requires = [ "network-online.target" "lxd.socket" "lxcfs.service" ]; + documentation = [ "man:lxd(1)" ]; - preStart = '' - mkdir -m 0755 -p /var/lib/lxc/rootfs - ''; + path = optional cfg.zfsSupport config.boot.zfs.package; serviceConfig = { ExecStart = "@${cfg.package}/bin/lxd lxd --group lxd"; - Type = "simple"; + ExecStartPost = "${cfg.package}/bin/lxd waitready --timeout=${cfg.startTimeout}"; + ExecStop = "${cfg.package}/bin/lxd shutdown"; + KillMode = "process"; # when stopping, leave the containers alone LimitMEMLOCK = "infinity"; LimitNOFILE = "1048576"; LimitNPROC = "infinity"; TasksMax = "infinity"; + Restart = "on-failure"; + TimeoutStartSec = "${cfg.startTimeout}s"; + TimeoutStopSec = "30s"; + # By default, `lxd` loads configuration files from hard-coded # `/usr/share/lxc/config` - since this is a no-go for us, we have to # explicitly tell it where the actual configuration files are @@ -146,5 +175,8 @@ in "net.ipv6.neigh.default.gc_thresh3" = 8192; "kernel.keys.maxkeys" = 2000; }; + + boot.kernelModules = [ "veth" "xt_comment" "xt_CHECKSUM" "xt_MASQUERADE" ] + ++ optionals (!config.networking.nftables.enable) [ "iptable_mangle" ]; }; } diff --git a/nixos/modules/virtualisation/nixos-containers.nix b/nixos/modules/virtualisation/nixos-containers.nix index b0fa03917c8..f3f318412df 100644 --- a/nixos/modules/virtualisation/nixos-containers.nix +++ b/nixos/modules/virtualisation/nixos-containers.nix @@ -35,6 +35,9 @@ let '' #! ${pkgs.runtimeShell} -e + # Exit early if we're asked to shut down. + trap "exit 0" SIGRTMIN+3 + # Initialise the container side of the veth pair. if [ -n "$HOST_ADDRESS" ] || [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS" ] || [ -n "$LOCAL_ADDRESS6" ] || @@ -56,12 +59,16 @@ let ip -6 route add $HOST_ADDRESS6 dev eth0 ip -6 route add default via $HOST_ADDRESS6 fi - - ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)} fi - # Start the regular stage 1 script. - exec "$1" + ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)} + + # Start the regular stage 2 script. + # We source instead of exec to not lose an early stop signal, which is + # also the only _reliable_ shutdown signal we have since early stop + # does not execute ExecStop* commands. + set +e + . "$1" '' ); @@ -127,12 +134,16 @@ let ''} # Run systemd-nspawn without startup notification (we'll - # wait for the container systemd to signal readiness). + # wait for the container systemd to signal readiness) + # Kill signal handling means systemd-nspawn will pass a system-halt signal + # to the container systemd when it receives SIGTERM for container shutdown; + # containerInit and stage2 have to handle this as well. exec ${config.systemd.package}/bin/systemd-nspawn \ --keep-unit \ -M "$INSTANCE" -D "$root" $extraFlags \ $EXTRA_NSPAWN_FLAGS \ --notify-ready=yes \ + --kill-signal=SIGRTMIN+3 \ --bind-ro=/nix/store \ --bind-ro=/nix/var/nix/db \ --bind-ro=/nix/var/nix/daemon-socket \ @@ -170,7 +181,7 @@ let ${concatStringsSep "\n" ( mapAttrsToList (name: cfg: - ''ip link del dev ${name} 2> /dev/null || true '' + "ip link del dev ${name} 2> /dev/null || true " ) cfg.extraVeths )} ''; @@ -185,7 +196,7 @@ let fi '' else - ''${ipcmd} add ${cfg.${attribute}} dev $ifaceHost''; + "${ipcmd} add ${cfg.${attribute}} dev $ifaceHost"; renderExtraVeth = name: cfg: if cfg.hostBridge != null then '' @@ -223,8 +234,8 @@ let ${ipcall cfg "ip route" "$LOCAL_ADDRESS" "localAddress"} ${ipcall cfg "ip -6 route" "$LOCAL_ADDRESS6" "localAddress6"} fi - ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)} fi + ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)} '' ); @@ -259,20 +270,17 @@ let Slice = "machine.slice"; Delegate = true; - # Hack: we don't want to kill systemd-nspawn, since we call - # "machinectl poweroff" in preStop to shut down the - # container cleanly. But systemd requires sending a signal - # (at least if we want remaining processes to be killed - # after the timeout). So send an ignored signal. + # We rely on systemd-nspawn turning a SIGTERM to itself into a shutdown + # signal (SIGRTMIN+3) for the inner container. KillMode = "mixed"; - KillSignal = "WINCH"; + KillSignal = "TERM"; DevicePolicy = "closed"; DeviceAllow = map (d: "${d.node} ${d.modifier}") cfg.allowedDevices; }; - system = config.nixpkgs.localSystem.system; + kernelVersion = config.boot.kernelPackages.kernel.version; bindMountOpts = { name, ... }: { @@ -321,7 +329,6 @@ let }; }; - mkBindFlag = d: let flagPrefix = if d.isReadOnly then " --bind-ro=" else " --bind="; mountstr = if d.hostPath != null then "${d.hostPath}:${d.mountPoint}" else "${d.mountPoint}"; @@ -421,7 +428,7 @@ let extraVeths = {}; additionalCapabilities = []; ephemeral = false; - timeoutStartSec = "15s"; + timeoutStartSec = "1min"; allowedDevices = []; hostAddress = null; hostAddress6 = null; @@ -440,21 +447,16 @@ in default = false; description = '' Whether this NixOS machine is a lightweight container running - in another NixOS system. If set to true, support for nested - containers is disabled by default, but can be reenabled by - setting <option>boot.enableContainers</option> to true. + in another NixOS system. ''; }; boot.enableContainers = mkOption { type = types.bool; - default = !config.boot.isContainer; + default = true; description = '' Whether to enable support for NixOS containers. Defaults to true - (at no cost if containers are not actually used), but only if the - system is not itself a lightweight container of a host. - To enable support for nested containers, this option has to be - explicitly set to true (in the outer container). + (at no cost if containers are not actually used). ''; }; @@ -463,21 +465,15 @@ in { config, options, name, ... }: { options = { - config = mkOption { description = '' A specification of the desired configuration of this container, as a NixOS module. ''; - type = let - confPkgs = if config.pkgs == null then pkgs else config.pkgs; - in lib.mkOptionType { + type = lib.mkOptionType { name = "Toplevel NixOS config"; - merge = loc: defs: (import (confPkgs.path + "/nixos/lib/eval-config.nix") { + merge = loc: defs: (import "${toString config.nixpkgs}/nixos/lib/eval-config.nix" { inherit system; - pkgs = confPkgs; - baseModules = import (confPkgs.path + "/nixos/modules/module-list.nix"); - inherit (confPkgs) lib; modules = let extraConfig = { @@ -488,11 +484,16 @@ in networking.useDHCP = false; assertions = [ { - assertion = config.privateNetwork -> stringLength name < 12; + assertion = + (builtins.compareVersions kernelVersion "5.8" <= 0) + -> config.privateNetwork + -> stringLength name <= 11; message = '' Container name `${name}` is too long: When `privateNetwork` is enabled, container names can not be longer than 11 characters, because the container's interface name is derived from it. - This might be fixed in the future. See https://github.com/NixOS/nixpkgs/issues/38509 + You should either make the container name shorter or upgrade to a more recent kernel that + supports interface altnames (i.e. at least Linux 5.8 - please see https://github.com/NixOS/nixpkgs/issues/38509 + for details). ''; } ]; @@ -506,7 +507,7 @@ in path = mkOption { type = types.path; - example = "/nix/var/nix/profiles/containers/webserver"; + example = "/nix/var/nix/profiles/per-container/webserver"; description = '' As an alternative to specifying <option>config</option>, you can specify the path to @@ -526,12 +527,18 @@ in ''; }; - pkgs = mkOption { - type = types.nullOr types.attrs; - default = null; - example = literalExample "pkgs"; + nixpkgs = mkOption { + type = types.path; + default = pkgs.path; + defaultText = "pkgs.path"; description = '' - Customise which nixpkgs to use for this container. + A path to the nixpkgs that provide the modules, pkgs and lib for evaluating the container. + + To only change the <literal>pkgs</literal> argument used inside the container modules, + set the <literal>nixpkgs.*</literal> options in the container <option>config</option>. + Setting <literal>config.nixpkgs.pkgs = pkgs</literal> speeds up the container evaluation + by reusing the system pkgs, but the <literal>nixpkgs.config</literal> option in the + container config is ignored in this case. ''; }; @@ -614,20 +621,20 @@ in ''; }; - timeoutStartSec = mkOption { - type = types.str; - default = "1min"; - description = '' - Time for the container to start. In case of a timeout, - the container processes get killed. - See <citerefentry><refentrytitle>systemd.time</refentrytitle> - <manvolnum>7</manvolnum></citerefentry> - for more information about the format. - ''; - }; + timeoutStartSec = mkOption { + type = types.str; + default = "1min"; + description = '' + Time for the container to start. In case of a timeout, + the container processes get killed. + See <citerefentry><refentrytitle>systemd.time</refentrytitle> + <manvolnum>7</manvolnum></citerefentry> + for more information about the format. + ''; + }; bindMounts = mkOption { - type = with types; loaOf (submodule bindMountOpts); + type = with types; attrsOf (submodule bindMountOpts); default = {}; example = literalExample '' { "/home" = { hostPath = "/home/alice"; @@ -672,14 +679,31 @@ in ''; }; + # Removed option. See `checkAssertion` below for the accompanying error message. + pkgs = mkOption { visible = false; }; } // networkOptions; - config = mkMerge - [ - (mkIf options.config.isDefined { - path = config.config.system.build.toplevel; - }) - ]; + config = let + # Throw an error when removed option `pkgs` is used. + # Because this is a submodule we cannot use `mkRemovedOptionModule` or option `assertions`. + optionPath = "containers.${name}.pkgs"; + files = showFiles options.pkgs.files; + checkAssertion = if options.pkgs.isDefined then throw '' + The option definition `${optionPath}' in ${files} no longer has any effect; please remove it. + + Alternatively, you can use the following options: + - containers.${name}.nixpkgs + This sets the nixpkgs (and thereby the modules, pkgs and lib) that + are used for evaluating the container. + + - containers.${name}.config.nixpkgs.pkgs + This only sets the `pkgs` argument used inside the container modules. + '' + else null; + in { + path = builtins.seq checkAssertion + mkIf options.config.isDefined config.config.system.build.toplevel; + }; })); default = {}; @@ -718,7 +742,7 @@ in unitConfig.RequiresMountsFor = "/var/lib/containers/%i"; - path = [ pkgs.iproute ]; + path = [ pkgs.iproute2 ]; environment = { root = "/var/lib/containers/%i"; @@ -731,8 +755,6 @@ in postStart = postStartScript dummyConfig; - preStop = "machinectl poweroff $INSTANCE"; - restartIfChanged = false; serviceConfig = serviceDirectives dummyConfig; diff --git a/nixos/modules/virtualisation/oci-containers.nix b/nixos/modules/virtualisation/oci-containers.nix index a46dd65eb49..a4a92f22506 100644 --- a/nixos/modules/virtualisation/oci-containers.nix +++ b/nixos/modules/virtualisation/oci-containers.nix @@ -31,6 +31,30 @@ let example = literalExample "pkgs.dockerTools.buildDockerImage {...};"; }; + login = { + + username = mkOption { + type = with types; nullOr str; + default = null; + description = "Username for login."; + }; + + passwordFile = mkOption { + type = with types; nullOr str; + default = null; + description = "Path to file containing password."; + example = "/etc/nixos/dockerhub-password.txt"; + }; + + registry = mkOption { + type = with types; nullOr str; + default = null; + description = "Registry where to login to."; + example = "https://docker.pkg.github.com"; + }; + + }; + cmd = mkOption { type = with types; listOf str; default = []; @@ -59,6 +83,18 @@ let ''; }; + environmentFiles = mkOption { + type = with types; listOf path; + default = []; + description = "Environment files for this container."; + example = literalExample '' + [ + /path/to/.env + /path/to/.env.secret + ] + ''; + }; + log-driver = mkOption { type = types.str; default = "journald"; @@ -176,10 +212,10 @@ let description = '' Define which other containers this one depends on. They will be added to both After and Requires for the unit. - Use the same name as the attribute under <literal>virtualisation.oci-containers</literal>. + Use the same name as the attribute under <literal>virtualisation.oci-containers.containers</literal>. ''; example = literalExample '' - virtualisation.oci-containers = { + virtualisation.oci-containers.containers = { node1 = {}; node2 = { dependsOn = [ "node1" ]; @@ -208,6 +244,8 @@ let }; }; + isValidLogin = login: login.username != null && login.passwordFile != null && login.registry != null; + mkService = name: container: let dependsOn = map (x: "${cfg.backend}-${x}.service") container.dependsOn; in { @@ -217,40 +255,46 @@ let environment = proxy_env; path = - if cfg.backend == "docker" then [ pkgs.docker ] + if cfg.backend == "docker" then [ config.virtualisation.docker.package ] else if cfg.backend == "podman" then [ config.virtualisation.podman.package ] else throw "Unhandled backend: ${cfg.backend}"; preStart = '' ${cfg.backend} rm -f ${name} || true + ${optionalString (isValidLogin container.login) '' + cat ${container.login.passwordFile} | \ + ${cfg.backend} login \ + ${container.login.registry} \ + --username ${container.login.username} \ + --password-stdin + ''} ${optionalString (container.imageFile != null) '' ${cfg.backend} load -i ${container.imageFile} ''} ''; + + script = concatStringsSep " \\\n " ([ + "exec ${cfg.backend} run" + "--rm" + "--name=${escapeShellArg name}" + "--log-driver=${container.log-driver}" + ] ++ optional (container.entrypoint != null) + "--entrypoint=${escapeShellArg container.entrypoint}" + ++ (mapAttrsToList (k: v: "-e ${escapeShellArg k}=${escapeShellArg v}") container.environment) + ++ map (f: "--env-file ${escapeShellArg f}") container.environmentFiles + ++ map (p: "-p ${escapeShellArg p}") container.ports + ++ optional (container.user != null) "-u ${escapeShellArg container.user}" + ++ map (v: "-v ${escapeShellArg v}") container.volumes + ++ optional (container.workdir != null) "-w ${escapeShellArg container.workdir}" + ++ map escapeShellArg container.extraOptions + ++ [container.image] + ++ map escapeShellArg container.cmd + ); + + preStop = "[ $SERVICE_RESULT = success ] || ${cfg.backend} stop ${name}"; postStop = "${cfg.backend} rm -f ${name} || true"; serviceConfig = { - StandardOutput = "null"; - StandardError = "null"; - ExecStart = concatStringsSep " \\\n " ([ - "${config.system.path}/bin/${cfg.backend} run" - "--rm" - "--name=${name}" - "--log-driver=${container.log-driver}" - ] ++ optional (container.entrypoint != null) - "--entrypoint=${escapeShellArg container.entrypoint}" - ++ (mapAttrsToList (k: v: "-e ${escapeShellArg k}=${escapeShellArg v}") container.environment) - ++ map (p: "-p ${escapeShellArg p}") container.ports - ++ optional (container.user != null) "-u ${escapeShellArg container.user}" - ++ map (v: "-v ${escapeShellArg v}") container.volumes - ++ optional (container.workdir != null) "-w ${escapeShellArg container.workdir}" - ++ map escapeShellArg container.extraOptions - ++ [container.image] - ++ map escapeShellArg container.cmd - ); - - ExecStop = ''${pkgs.bash}/bin/sh -c "[ $SERVICE_RESULT = success ] || ${cfg.backend} stop ${name}"''; - ### There is no generalized way of supporting `reload` for docker ### containers. Some containers may respond well to SIGHUP sent to their ### init process, but it is not guaranteed; some apps have other reload diff --git a/nixos/modules/virtualisation/openstack-config.nix b/nixos/modules/virtualisation/openstack-config.nix index c2da5d0d230..d01e0f23aba 100644 --- a/nixos/modules/virtualisation/openstack-config.nix +++ b/nixos/modules/virtualisation/openstack-config.nix @@ -3,7 +3,7 @@ with lib; let - metadataFetcher = import ./ec2-metadata-fetcher.nix { + metadataFetcher = import ./openstack-metadata-fetcher.nix { targetRoot = "/"; wgetExtraOptions = "--retry-connrefused"; }; diff --git a/nixos/modules/virtualisation/openstack-metadata-fetcher.nix b/nixos/modules/virtualisation/openstack-metadata-fetcher.nix new file mode 100644 index 00000000000..133cd4c0e9f --- /dev/null +++ b/nixos/modules/virtualisation/openstack-metadata-fetcher.nix @@ -0,0 +1,21 @@ +{ targetRoot, wgetExtraOptions }: + +# OpenStack's metadata service aims to be EC2-compatible. Where +# possible, try to keep the set of fetched metadata in sync with +# ./ec2-metadata-fetcher.nix . +'' + metaDir=${targetRoot}etc/ec2-metadata + mkdir -m 0755 -p "$metaDir" + rm -f "$metaDir/*" + + echo "getting instance metadata..." + + wget_imds() { + wget ${wgetExtraOptions} "$@" + } + + wget_imds -O "$metaDir/ami-manifest-path" http://169.254.169.254/1.0/meta-data/ami-manifest-path + (umask 077 && wget_imds -O "$metaDir/user-data" http://169.254.169.254/1.0/user-data) + wget_imds -O "$metaDir/hostname" http://169.254.169.254/1.0/meta-data/hostname + wget_imds -O "$metaDir/public-keys-0-openssh-key" http://169.254.169.254/1.0/meta-data/public-keys/0/openssh-key +'' diff --git a/nixos/modules/virtualisation/openvswitch.nix b/nixos/modules/virtualisation/openvswitch.nix index c6a3ceddc3e..ccf32641df6 100644 --- a/nixos/modules/virtualisation/openvswitch.nix +++ b/nixos/modules/virtualisation/openvswitch.nix @@ -66,9 +66,7 @@ in { }; in (mkMerge [{ - - environment.systemPackages = [ cfg.package pkgs.ipsecTools ]; - + environment.systemPackages = [ cfg.package ]; boot.kernelModules = [ "tun" "openvswitch" ]; boot.extraModulePackages = [ cfg.package ]; @@ -146,6 +144,8 @@ in { } (mkIf (cfg.ipsec && (versionOlder cfg.package.version "2.6.0")) { + environment.systemPackages = [ pkgs.ipsecTools ]; + services.racoon.enable = true; services.racoon.configPath = "${runDir}/ipsec/etc/racoon/racoon.conf"; diff --git a/nixos/modules/virtualisation/parallels-guest.nix b/nixos/modules/virtualisation/parallels-guest.nix index 828419fb4b9..55605b388b7 100644 --- a/nixos/modules/virtualisation/parallels-guest.nix +++ b/nixos/modules/virtualisation/parallels-guest.nix @@ -32,7 +32,7 @@ in }; package = mkOption { - type = types.package; + type = types.nullOr types.package; default = config.boot.kernelPackages.prl-tools; defaultText = "config.boot.kernelPackages.prl-tools"; example = literalExample "config.boot.kernelPackages.prl-tools"; diff --git a/nixos/modules/virtualisation/podman-dnsname.nix b/nixos/modules/virtualisation/podman-dnsname.nix new file mode 100644 index 00000000000..beef1975507 --- /dev/null +++ b/nixos/modules/virtualisation/podman-dnsname.nix @@ -0,0 +1,36 @@ +{ config, lib, pkgs, ... }: +let + inherit (lib) + mkOption + mkIf + types + ; + + cfg = config.virtualisation.podman; + +in +{ + options = { + virtualisation.podman = { + + defaultNetwork.dnsname.enable = mkOption { + type = types.bool; + default = false; + description = '' + Enable DNS resolution in the default podman network. + ''; + }; + + }; + }; + + config = { + virtualisation.containers.containersConf.cniPlugins = mkIf cfg.defaultNetwork.dnsname.enable [ pkgs.dnsname-cni ]; + virtualisation.podman.defaultNetwork.extraPlugins = + lib.optional cfg.defaultNetwork.dnsname.enable { + type = "dnsname"; + domainName = "dns.podman"; + capabilities.aliases = true; + }; + }; +} diff --git a/nixos/modules/virtualisation/podman-network-socket-ghostunnel.nix b/nixos/modules/virtualisation/podman-network-socket-ghostunnel.nix new file mode 100644 index 00000000000..a0e7e433164 --- /dev/null +++ b/nixos/modules/virtualisation/podman-network-socket-ghostunnel.nix @@ -0,0 +1,34 @@ +{ config, lib, pkg, ... }: +let + inherit (lib) + mkOption + types + ; + + cfg = config.virtualisation.podman.networkSocket; + +in +{ + options.virtualisation.podman.networkSocket = { + server = mkOption { + type = types.enum [ "ghostunnel" ]; + }; + }; + + config = lib.mkIf (cfg.enable && cfg.server == "ghostunnel") { + + services.ghostunnel = { + enable = true; + servers."podman-socket" = { + inherit (cfg.tls) cert key cacert; + listen = "${cfg.listenAddress}:${toString cfg.port}"; + target = "unix:/run/podman/podman.sock"; + allowAll = lib.mkDefault true; + }; + }; + systemd.services.ghostunnel-server-podman-socket.serviceConfig.SupplementaryGroups = ["podman"]; + + }; + + meta.maintainers = lib.teams.podman.members ++ [ lib.maintainers.roberth ]; +} diff --git a/nixos/modules/virtualisation/podman-network-socket.nix b/nixos/modules/virtualisation/podman-network-socket.nix new file mode 100644 index 00000000000..1429164630b --- /dev/null +++ b/nixos/modules/virtualisation/podman-network-socket.nix @@ -0,0 +1,91 @@ +{ config, lib, pkg, ... }: +let + inherit (lib) + mkOption + types + ; + + cfg = config.virtualisation.podman.networkSocket; + +in +{ + options.virtualisation.podman.networkSocket = { + enable = mkOption { + type = types.bool; + default = false; + description = '' + Make the Podman and Docker compatibility API available over the network + with TLS client certificate authentication. + + This allows Docker clients to connect with the equivalents of the Docker + CLI <code>-H</code> and <code>--tls*</code> family of options. + + For certificate setup, see https://docs.docker.com/engine/security/protect-access/ + + This option is independent of <xref linkend="opt-virtualisation.podman.dockerSocket.enable"/>. + ''; + }; + + server = mkOption { + type = types.enum []; + description = '' + Choice of TLS proxy server. + ''; + example = "ghostunnel"; + }; + + openFirewall = mkOption { + type = types.bool; + default = false; + description = '' + Whether to open the port in the firewall. + ''; + }; + + tls.cacert = mkOption { + type = types.path; + description = '' + Path to CA certificate to use for client authentication. + ''; + }; + + tls.cert = mkOption { + type = types.path; + description = '' + Path to certificate describing the server. + ''; + }; + + tls.key = mkOption { + type = types.path; + description = '' + Path to the private key corresponding to the server certificate. + + Use a string for this setting. Otherwise it will be copied to the Nix + store first, where it is readable by any system process. + ''; + }; + + port = mkOption { + type = types.port; + default = 2376; + description = '' + TCP port number for receiving TLS connections. + ''; + }; + listenAddress = mkOption { + type = types.str; + default = "0.0.0.0"; + description = '' + Interface address for receiving TLS connections. + ''; + }; + }; + + config = { + networking.firewall.allowedTCPPorts = + lib.optional (cfg.enable && cfg.openFirewall) cfg.port; + }; + + meta.maintainers = lib.teams.podman.members ++ [ lib.maintainers.roberth ]; +} diff --git a/nixos/modules/virtualisation/podman.nix b/nixos/modules/virtualisation/podman.nix index e0e2f04e24c..e245004e04a 100644 --- a/nixos/modules/virtualisation/podman.nix +++ b/nixos/modules/virtualisation/podman.nix @@ -1,6 +1,8 @@ { config, lib, pkgs, ... }: let cfg = config.virtualisation.podman; + toml = pkgs.formats.toml { }; + json = pkgs.formats.json { }; inherit (lib) mkOption types; @@ -21,14 +23,24 @@ let done ''; - # Copy configuration files to avoid having the entire sources in the system closure - copyFile = filePath: pkgs.runCommandNoCC (builtins.unsafeDiscardStringContext (builtins.baseNameOf filePath)) {} '' - cp ${filePath} $out + net-conflist = pkgs.runCommand "87-podman-bridge.conflist" { + nativeBuildInputs = [ pkgs.jq ]; + extraPlugins = builtins.toJSON cfg.defaultNetwork.extraPlugins; + jqScript = '' + . + { "plugins": (.plugins + $extraPlugins) } + ''; + } '' + jq <${cfg.package}/etc/cni/net.d/87-podman-bridge.conflist \ + --argjson extraPlugins "$extraPlugins" \ + "$jqScript" \ + >$out ''; in { imports = [ + ./podman-dnsname.nix + ./podman-network-socket.nix (lib.mkRenamedOptionModule [ "virtualisation" "podman" "libpod" ] [ "virtualisation" "containers" "containersConf" ]) ]; @@ -50,6 +62,20 @@ in ''; }; + dockerSocket.enable = mkOption { + type = types.bool; + default = false; + description = '' + Make the Podman socket available in place of the Docker socket, so + Docker tools can find the Podman socket. + + Podman implements the Docker API. + + Users must be in the <code>podman</code> group in order to connect. As + with Docker, members of this group can gain root access. + ''; + }; + dockerCompat = mkOption { type = types.bool; default = false; @@ -58,6 +84,14 @@ in ''; }; + enableNvidia = mkOption { + type = types.bool; + default = false; + description = '' + Enable use of NVidia GPUs from within podman containers. + ''; + }; + extraPackages = mkOption { type = with types; listOf package; default = [ ]; @@ -80,24 +114,71 @@ in ''; }; + defaultNetwork.extraPlugins = lib.mkOption { + type = types.listOf json.type; + default = []; + description = '' + Extra CNI plugin configurations to add to podman's default network. + ''; + }; }; - config = lib.mkIf cfg.enable { - - environment.systemPackages = [ cfg.package ] - ++ lib.optional cfg.dockerCompat dockerCompat; - - environment.etc."cni/net.d/87-podman-bridge.conflist".source = copyFile "${pkgs.podman-unwrapped.src}/cni/87-podman-bridge.conflist"; - - # Enable common /etc/containers configuration - virtualisation.containers.enable = true; + config = lib.mkIf cfg.enable (lib.mkMerge [ + { + environment.systemPackages = [ cfg.package ] + ++ lib.optional cfg.dockerCompat dockerCompat; + + environment.etc."cni/net.d/87-podman-bridge.conflist".source = net-conflist; + + virtualisation.containers = { + enable = true; # Enable common /etc/containers configuration + containersConf.settings = lib.optionalAttrs cfg.enableNvidia { + engine = { + conmon_env_vars = [ "PATH=${lib.makeBinPath [ pkgs.nvidia-podman ]}" ]; + runtimes.nvidia = [ "${pkgs.nvidia-podman}/bin/nvidia-container-runtime" ]; + }; + }; + }; - assertions = [{ - assertion = cfg.dockerCompat -> !config.virtualisation.docker.enable; - message = "Option dockerCompat conflicts with docker"; - }]; + systemd.packages = [ cfg.package ]; - }; + systemd.services.podman.serviceConfig = { + ExecStart = [ "" "${cfg.package}/bin/podman $LOGGING system service" ]; + }; + systemd.sockets.podman.wantedBy = [ "sockets.target" ]; + systemd.sockets.podman.socketConfig.SocketGroup = "podman"; + + systemd.tmpfiles.packages = [ + # The /run/podman rule interferes with our podman group, so we remove + # it and let the systemd socket logic take care of it. + (pkgs.runCommand "podman-tmpfiles-nixos" { package = cfg.package; } '' + mkdir -p $out/lib/tmpfiles.d/ + grep -v 'D! /run/podman 0700 root root' \ + <$package/lib/tmpfiles.d/podman.conf \ + >$out/lib/tmpfiles.d/podman.conf + '') ]; + + systemd.tmpfiles.rules = + lib.optionals cfg.dockerSocket.enable [ + "L! /run/docker.sock - - - - /run/podman/podman.sock" + ]; + + users.groups.podman = {}; + + assertions = [ + { + assertion = cfg.dockerCompat -> !config.virtualisation.docker.enable; + message = "Option dockerCompat conflicts with docker"; + } + { + assertion = cfg.dockerSocket.enable -> !config.virtualisation.docker.enable; + message = '' + The options virtualisation.podman.dockerSocket.enable and virtualisation.docker.enable conflict, because only one can serve the socket. + ''; + } + ]; + } + ]); } diff --git a/nixos/modules/virtualisation/qemu-guest-agent.nix b/nixos/modules/virtualisation/qemu-guest-agent.nix index 665224e35d8..3824d0c168f 100644 --- a/nixos/modules/virtualisation/qemu-guest-agent.nix +++ b/nixos/modules/virtualisation/qemu-guest-agent.nix @@ -12,6 +12,11 @@ in { default = false; description = "Whether to enable the qemu guest agent."; }; + package = mkOption { + type = types.package; + default = pkgs.qemu.ga; + description = "The QEMU guest agent package."; + }; }; config = mkIf cfg.enable ( @@ -25,9 +30,12 @@ in { systemd.services.qemu-guest-agent = { description = "Run the QEMU Guest Agent"; serviceConfig = { - ExecStart = "${pkgs.qemu.ga}/bin/qemu-ga"; + ExecStart = "${cfg.package}/bin/qemu-ga --statedir /run/qemu-ga"; Restart = "always"; RestartSec = 0; + # Runtime directory and mode + RuntimeDirectory = "qemu-ga"; + RuntimeDirectoryMode = "0755"; }; }; } diff --git a/nixos/modules/virtualisation/qemu-vm.nix b/nixos/modules/virtualisation/qemu-vm.nix index 42e43f5ee02..d9935bcafb7 100644 --- a/nixos/modules/virtualisation/qemu-vm.nix +++ b/nixos/modules/virtualisation/qemu-vm.nix @@ -7,17 +7,18 @@ # the VM in the host. On the other hand, the root filesystem is a # read/writable disk image persistent across VM reboots. -{ config, lib, pkgs, ... }: +{ config, lib, pkgs, options, ... }: with lib; with import ../../lib/qemu-flags.nix { inherit pkgs; }; let - qemu = config.system.build.qemu or pkgs.qemu_test; cfg = config.virtualisation; + qemu = cfg.qemu.package; + consoles = lib.concatMapStringsSep " " (c: "console=${c}") cfg.qemu.consoles; driveOpts = { ... }: { @@ -135,10 +136,8 @@ let cp ${bootDisk}/efi-vars.fd "$NIX_EFI_VARS" || exit 1 chmod 0644 "$NIX_EFI_VARS" || exit 1 fi - '' else '' - ''} - '' else '' - ''} + '' else ""} + '' else ""} cd $TMPDIR idx=0 @@ -186,10 +185,9 @@ let efiVars=$out/efi-vars.fd cp ${efiVarsDefault} $efiVars chmod 0644 $efiVars - '' else '' - ''} + '' else ""} ''; - buildInputs = [ pkgs.utillinux ]; + buildInputs = [ pkgs.util-linux ]; QEMU_OPTS = "-nographic -serial stdio -monitor none" + lib.optionalString cfg.useEFIBoot ( " -drive if=pflash,format=raw,unit=0,readonly=on,file=${efiFirmware}" @@ -268,6 +266,8 @@ in options = { + virtualisation.fileSystems = options.fileSystems; + virtualisation.memorySize = mkOption { default = 384; @@ -277,6 +277,18 @@ in ''; }; + virtualisation.msize = + mkOption { + default = null; + type = types.nullOr types.ints.unsigned; + description = + '' + msize (maximum packet size) option passed to 9p file systems, in + bytes. Increasing this should increase performance significantly, + at the cost of higher RAM usage. + ''; + }; + virtualisation.diskSize = mkOption { default = 512; @@ -401,6 +413,14 @@ in }; virtualisation.qemu = { + package = + mkOption { + type = types.package; + default = pkgs.qemu; + example = "pkgs.qemu_test"; + description = "QEMU package to use."; + }; + options = mkOption { type = types.listOf types.unspecified; @@ -653,11 +673,12 @@ in # attribute should be disregarded for the purpose of building a VM # test image (since those filesystems don't exist in the VM). fileSystems = mkVMOverride ( + cfg.fileSystems // { "/".device = cfg.bootDevice; ${if cfg.writableStore then "/nix/.ro-store" else "/nix/store"} = { device = "store"; fsType = "9p"; - options = [ "trans=virtio" "version=9p2000.L" "cache=loose" ]; + options = [ "trans=virtio" "version=9p2000.L" "cache=loose" ] ++ lib.optional (cfg.msize != null) "msize=${toString cfg.msize}"; neededForBoot = true; }; "/tmp" = mkIf config.boot.tmpOnTmpfs @@ -670,13 +691,13 @@ in "/tmp/xchg" = { device = "xchg"; fsType = "9p"; - options = [ "trans=virtio" "version=9p2000.L" ]; + options = [ "trans=virtio" "version=9p2000.L" ] ++ lib.optional (cfg.msize != null) "msize=${toString cfg.msize}"; neededForBoot = true; }; "/tmp/shared" = { device = "shared"; fsType = "9p"; - options = [ "trans=virtio" "version=9p2000.L" ]; + options = [ "trans=virtio" "version=9p2000.L" ] ++ lib.optional (cfg.msize != null) "msize=${toString cfg.msize}"; neededForBoot = true; }; } // optionalAttrs (cfg.writableStore && cfg.writableStoreUseTmpfs) @@ -735,16 +756,19 @@ in (isEnabled "VIRTIO_PCI") (isEnabled "VIRTIO_NET") (isEnabled "EXT4_FS") + (isEnabled "NET_9P_VIRTIO") + (isEnabled "9P_FS") (isYes "BLK_DEV") (isYes "PCI") - (isYes "EXPERIMENTAL") (isYes "NETDEVICES") (isYes "NET_CORE") (isYes "INET") (isYes "NETWORK_FILESYSTEMS") - ] ++ optional (!cfg.graphics) [ + ] ++ optionals (!cfg.graphics) [ (isYes "SERIAL_8250_CONSOLE") (isYes "SERIAL_8250") + ] ++ optionals (cfg.writableStore) [ + (isEnabled "OVERLAY_FS") ]; }; diff --git a/nixos/modules/virtualisation/railcar.nix b/nixos/modules/virtualisation/railcar.nix index 3f188fc68e5..b603effef6e 100644 --- a/nixos/modules/virtualisation/railcar.nix +++ b/nixos/modules/virtualisation/railcar.nix @@ -41,7 +41,7 @@ let description = "Source for the in-container mount"; }; options = mkOption { - type = loaOf (str); + type = attrsOf (str); default = [ "bind" ]; description = '' Mount options of the filesystem to be used. @@ -61,7 +61,7 @@ in containers = mkOption { default = {}; description = "Declarative container configuration"; - type = with types; loaOf (submodule ({ name, config, ... }: { + type = with types; attrsOf (submodule ({ name, config, ... }: { options = { cmd = mkOption { type = types.lines; @@ -105,7 +105,7 @@ in stateDir = mkOption { type = types.path; - default = ''/var/railcar''; + default = "/var/railcar"; description = "Railcar persistent state directory"; }; diff --git a/nixos/modules/virtualisation/spice-usb-redirection.nix b/nixos/modules/virtualisation/spice-usb-redirection.nix new file mode 100644 index 00000000000..4168cebe79b --- /dev/null +++ b/nixos/modules/virtualisation/spice-usb-redirection.nix @@ -0,0 +1,24 @@ +{ config, pkgs, lib, ... }: +{ + options.virtualisation.spiceUSBRedirection.enable = lib.mkOption { + type = lib.types.bool; + default = false; + description = '' + Install the SPICE USB redirection helper with setuid + privileges. This allows unprivileged users to pass USB devices + connected to this machine to libvirt VMs, both local and + remote. Note that this allows users arbitrary access to USB + devices. + ''; + }; + + config = lib.mkIf config.virtualisation.spiceUSBRedirection.enable { + environment.systemPackages = [ pkgs.spice-gtk ]; # For polkit actions + security.wrappers.spice-client-glib-usb-acl-helper ={ + source = "${pkgs.spice-gtk}/bin/spice-client-glib-usb-acl-helper"; + capabilities = "cap_fowner+ep"; + }; + }; + + meta.maintainers = [ lib.maintainers.lheckemann ]; +} diff --git a/nixos/modules/virtualisation/vagrant-guest.nix b/nixos/modules/virtualisation/vagrant-guest.nix new file mode 100644 index 00000000000..263b1ebca08 --- /dev/null +++ b/nixos/modules/virtualisation/vagrant-guest.nix @@ -0,0 +1,58 @@ +# Minimal configuration that vagrant depends on + +{ config, pkgs, ... }: +let + # Vagrant uses an insecure shared private key by default, but we + # don't use the authorizedKeys attribute under users because it should be + # removed on first boot and replaced with a random one. This script sets + # the correct permissions and installs the temporary key if no + # ~/.ssh/authorized_keys exists. + install-vagrant-ssh-key = pkgs.writeScriptBin "install-vagrant-ssh-key" '' + #!${pkgs.runtimeShell} + if [ ! -e ~/.ssh/authorized_keys ]; then + mkdir -m 0700 -p ~/.ssh + echo "ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA6NF8iallvQVp22WDkTkyrtvp9eWW6A8YVr+kz4TjGYe7gHzIw+niNltGEFHzD8+v1I2YJ6oXevct1YeS0o9HZyN1Q9qgCgzUFtdOKLv6IedplqoPkcmF0aYet2PkEDo3MlTBckFXPITAMzF8dJSIFo9D8HfdOV0IAdx4O7PtixWKn5y2hMNG0zQPyUecp4pzC6kivAIhyfHilFR61RGL+GPXQ2MWZWFYbAGjyiYJnAmCP3NOTd0jMZEnDkbUvxhMmBYSdETk1rRgm+R4LOzFUGaHqHDLKLX+FIPKcF96hrucXzcWyLbIbEgE98OHlnVYCzRdK8jlqm8tehUc9c9WhQ== vagrant insecure public key" >> ~/.ssh/authorized_keys + chmod 0600 ~/.ssh/authorized_keys + fi + ''; +in +{ + # Enable the OpenSSH daemon. + services.openssh.enable = true; + + # Packages used by Vagrant + environment.systemPackages = with pkgs; [ + findutils + iputils + nettools + netcat + nfs-utils + rsync + ]; + + users.extraUsers.vagrant = { + isNormalUser = true; + createHome = true; + description = "Vagrant user account"; + extraGroups = [ "users" "wheel" ]; + home = "/home/vagrant"; + password = "vagrant"; + useDefaultShell = true; + uid = 1000; + }; + + systemd.services.install-vagrant-ssh-key = { + description = "Vagrant SSH key install (if needed)"; + after = [ "fs.target" ]; + wants = [ "fs.target" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + ExecStart = "${install-vagrant-ssh-key}/bin/install-vagrant-ssh-key"; + User = "vagrant"; + # So it won't be (needlessly) restarted: + RemainAfterExit = true; + }; + }; + + security.sudo.wheelNeedsPassword = false; +} diff --git a/nixos/modules/virtualisation/vagrant-virtualbox-image.nix b/nixos/modules/virtualisation/vagrant-virtualbox-image.nix new file mode 100644 index 00000000000..2a921894ab6 --- /dev/null +++ b/nixos/modules/virtualisation/vagrant-virtualbox-image.nix @@ -0,0 +1,60 @@ +# Vagrant + VirtualBox + +{ config, pkgs, ... }: + +{ + imports = [ + ./vagrant-guest.nix + ./virtualbox-image.nix + ]; + + virtualbox.params = { + audio = "none"; + audioin = "off"; + audioout = "off"; + usb = "off"; + usbehci = "off"; + }; + sound.enable = false; + documentation.man.enable = false; + documentation.nixos.enable = false; + + users.extraUsers.vagrant.extraGroups = [ "vboxsf" ]; + + # generate the box v1 format which is much easier to generate + # https://www.vagrantup.com/docs/boxes/format.html + system.build.vagrantVirtualbox = pkgs.runCommand + "virtualbox-vagrant.box" + {} + '' + mkdir workdir + cd workdir + + # 1. create that metadata.json file + echo '{"provider":"virtualbox"}' > metadata.json + + # 2. create a default Vagrantfile config + cat <<VAGRANTFILE > Vagrantfile + Vagrant.configure("2") do |config| + config.vm.base_mac = "0800275F0936" + end + VAGRANTFILE + + # 3. add the exported VM files + tar xvf ${config.system.build.virtualBoxOVA}/*.ova + + # 4. move the ovf to the fixed location + mv *.ovf box.ovf + + # 5. generate OVF manifest file + rm *.mf + touch box.mf + for fname in *; do + checksum=$(sha256sum $fname | cut -d' ' -f 1) + echo "SHA256($fname)= $checksum" >> box.mf + done + + # 6. compress everything back together + tar --owner=0 --group=0 --sort=name --numeric-owner -czf $out . + ''; +} diff --git a/nixos/modules/virtualisation/virtualbox-image.nix b/nixos/modules/virtualisation/virtualbox-image.nix index fa580e8b42d..272c696807a 100644 --- a/nixos/modules/virtualisation/virtualbox-image.nix +++ b/nixos/modules/virtualisation/virtualbox-image.nix @@ -11,8 +11,9 @@ in { options = { virtualbox = { baseImageSize = mkOption { - type = types.int; - default = 50 * 1024; + type = with types; either (enum [ "auto" ]) int; + default = "auto"; + example = 50 * 1024; description = '' The size of the VirtualBox base image in MiB. ''; @@ -57,7 +58,19 @@ in { Run <literal>VBoxManage modifyvm --help</literal> to see more options. ''; - }; + }; + exportParams = mkOption { + type = with types; listOf (oneOf [ str int bool (listOf str) ]); + example = [ + "--vsys" "0" "--vendor" "ACME Inc." + ]; + default = []; + description = '' + Parameters passed to the Virtualbox export command. + + Run <literal>VBoxManage export --help</literal> to see more options. + ''; + }; extraDisk = mkOption { description = '' Optional extra disk/hdd configuration. @@ -157,7 +170,7 @@ in { echo "exporting VirtualBox VM..." mkdir -p $out fn="$out/${cfg.vmFileName}" - VBoxManage export "$vmName" --output "$fn" --options manifest + VBoxManage export "$vmName" --output "$fn" --options manifest ${escapeShellArgs cfg.exportParams} rm -v $diskImage diff --git a/nixos/modules/virtualisation/vmware-guest.nix b/nixos/modules/virtualisation/vmware-guest.nix index 962a9059ea4..9465a8d6800 100644 --- a/nixos/modules/virtualisation/vmware-guest.nix +++ b/nixos/modules/virtualisation/vmware-guest.nix @@ -56,5 +56,7 @@ in ${open-vm-tools}/bin/vmware-user-suid-wrapper ''; }; + + services.udev.packages = [ open-vm-tools ]; }; } diff --git a/nixos/modules/virtualisation/vmware-image.nix b/nixos/modules/virtualisation/vmware-image.nix index 9da9e145f7a..f6cd12e2bb7 100644 --- a/nixos/modules/virtualisation/vmware-image.nix +++ b/nixos/modules/virtualisation/vmware-image.nix @@ -18,8 +18,9 @@ in { options = { vmware = { baseImageSize = mkOption { - type = types.int; - default = 2048; + type = with types; either (enum [ "auto" ]) int; + default = "auto"; + example = 2048; description = '' The size of the VMWare base image in MiB. ''; diff --git a/nixos/modules/virtualisation/xe-guest-utilities.nix b/nixos/modules/virtualisation/xe-guest-utilities.nix index 675cf929737..25ccbaebc07 100644 --- a/nixos/modules/virtualisation/xe-guest-utilities.nix +++ b/nixos/modules/virtualisation/xe-guest-utilities.nix @@ -17,7 +17,7 @@ in { wantedBy = [ "multi-user.target" ]; after = [ "xe-linux-distribution.service" ]; requires = [ "proc-xen.mount" ]; - path = [ pkgs.coreutils pkgs.iproute ]; + path = [ pkgs.coreutils pkgs.iproute2 ]; serviceConfig = { PIDFile = "/run/xe-daemon.pid"; ExecStart = "${pkgs.xe-guest-utilities}/bin/xe-daemon -p /run/xe-daemon.pid"; diff --git a/nixos/modules/virtualisation/xen-dom0.nix b/nixos/modules/virtualisation/xen-dom0.nix index 7b2a66c4348..fea43727f2f 100644 --- a/nixos/modules/virtualisation/xen-dom0.nix +++ b/nixos/modules/virtualisation/xen-dom0.nix @@ -57,7 +57,8 @@ in virtualisation.xen.bootParams = mkOption { - default = ""; + default = []; + type = types.listOf types.str; description = '' Parameters passed to the Xen hypervisor at boot time. @@ -68,6 +69,7 @@ in mkOption { default = 0; example = 512; + type = types.addCheck types.int (n: n >= 0); description = '' Amount of memory (in MiB) allocated to Domain 0 on boot. @@ -78,6 +80,7 @@ in virtualisation.xen.bridge = { name = mkOption { default = "xenbr0"; + type = types.str; description = '' Name of bridge the Xen domUs connect to. ''; @@ -158,9 +161,6 @@ in environment.systemPackages = [ cfg.package ]; - # Make sure Domain 0 gets the required configuration - #boot.kernelPackages = pkgs.boot.kernelPackages.override { features={xen_dom0=true;}; }; - boot.kernelModules = [ "xen-evtchn" "xen-gntdev" "xen-gntalloc" "xen-blkback" "xen-netback" "xen-pciback" "evtchn" "gntdev" "netbk" "blkbk" "xen-scsibk" @@ -201,8 +201,8 @@ in '' if [ -d /proc/xen ]; then ${pkgs.kmod}/bin/modprobe xenfs 2> /dev/null - ${pkgs.utillinux}/bin/mountpoint -q /proc/xen || \ - ${pkgs.utillinux}/bin/mount -t xenfs none /proc/xen + ${pkgs.util-linux}/bin/mountpoint -q /proc/xen || \ + ${pkgs.util-linux}/bin/mount -t xenfs none /proc/xen fi ''; @@ -245,7 +245,7 @@ in # Xen provides udev rules. services.udev.packages = [ cfg.package ]; - services.udev.path = [ pkgs.bridge-utils pkgs.iproute ]; + services.udev.path = [ pkgs.bridge-utils pkgs.iproute2 ]; systemd.services.xen-store = { description = "Xen Store Daemon"; |