diff options
Diffstat (limited to 'nixos/modules/services/cluster')
18 files changed, 3762 insertions, 0 deletions
diff --git a/nixos/modules/services/cluster/corosync/default.nix b/nixos/modules/services/cluster/corosync/default.nix new file mode 100644 index 00000000000..b4144917fee --- /dev/null +++ b/nixos/modules/services/cluster/corosync/default.nix @@ -0,0 +1,112 @@ +{ config, lib, pkgs, ... }: + +with lib; +let + cfg = config.services.corosync; +in +{ + # interface + options.services.corosync = { + enable = mkEnableOption "corosync"; + + package = mkOption { + type = types.package; + default = pkgs.corosync; + defaultText = literalExpression "pkgs.corosync"; + description = "Package that should be used for corosync."; + }; + + clusterName = mkOption { + type = types.str; + default = "nixcluster"; + description = "Name of the corosync cluster."; + }; + + extraOptions = mkOption { + type = with types; listOf str; + default = []; + description = "Additional options with which to start corosync."; + }; + + nodelist = mkOption { + description = "Corosync nodelist: all cluster members."; + default = []; + type = with types; listOf (submodule { + options = { + nodeid = mkOption { + type = int; + description = "Node ID number"; + }; + name = mkOption { + type = str; + description = "Node name"; + }; + ring_addrs = mkOption { + type = listOf str; + description = "List of addresses, one for each ring."; + }; + }; + }); + }; + }; + + # implementation + config = mkIf cfg.enable { + environment.systemPackages = [ cfg.package ]; + + environment.etc."corosync/corosync.conf".text = '' + totem { + version: 2 + secauth: on + cluster_name: ${cfg.clusterName} + transport: knet + } + + nodelist { + ${concatMapStrings ({ nodeid, name, ring_addrs }: '' + node { + nodeid: ${toString nodeid} + name: ${name} + ${concatStrings (imap0 (i: addr: '' + ring${toString i}_addr: ${addr} + '') ring_addrs)} + } + '') cfg.nodelist} + } + + quorum { + # only corosync_votequorum is supported + provider: corosync_votequorum + wait_for_all: 0 + ${optionalString (builtins.length cfg.nodelist < 3) '' + two_node: 1 + ''} + } + + logging { + to_syslog: yes + } + ''; + + environment.etc."corosync/uidgid.d/root".text = '' + # allow pacemaker connection by root + uidgid { + uid: 0 + gid: 0 + } + ''; + + systemd.packages = [ cfg.package ]; + systemd.services.corosync = { + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + StateDirectory = "corosync"; + StateDirectoryMode = "0700"; + }; + }; + + environment.etc."sysconfig/corosync".text = lib.optionalString (cfg.extraOptions != []) '' + COROSYNC_OPTIONS="${lib.escapeShellArgs cfg.extraOptions}" + ''; + }; +} diff --git a/nixos/modules/services/cluster/hadoop/conf.nix b/nixos/modules/services/cluster/hadoop/conf.nix new file mode 100644 index 00000000000..e3c26a0d550 --- /dev/null +++ b/nixos/modules/services/cluster/hadoop/conf.nix @@ -0,0 +1,44 @@ +{ cfg, pkgs, lib }: +let + propertyXml = name: value: lib.optionalString (value != null) '' + <property> + <name>${name}</name> + <value>${builtins.toString value}</value> + </property> + ''; + siteXml = fileName: properties: pkgs.writeTextDir fileName '' + <?xml version="1.0" encoding="UTF-8" standalone="no"?> + <!-- generated by NixOS --> + <configuration> + ${builtins.concatStringsSep "\n" (pkgs.lib.mapAttrsToList propertyXml properties)} + </configuration> + ''; + cfgLine = name: value: '' + ${name}=${builtins.toString value} + ''; + cfgFile = fileName: properties: pkgs.writeTextDir fileName '' + # generated by NixOS + ${builtins.concatStringsSep "" (pkgs.lib.mapAttrsToList cfgLine properties)} + ''; + userFunctions = '' + hadoop_verify_logdir() { + echo Skipping verification of log directory + } + ''; + hadoopEnv = '' + export HADOOP_LOG_DIR=/tmp/hadoop/$USER + ''; +in +pkgs.runCommand "hadoop-conf" {} (with cfg; '' + mkdir -p $out/ + cp ${siteXml "core-site.xml" (coreSite // coreSiteInternal)}/* $out/ + cp ${siteXml "hdfs-site.xml" (hdfsSiteDefault // hdfsSite // hdfsSiteInternal)}/* $out/ + cp ${siteXml "mapred-site.xml" (mapredSiteDefault // mapredSite)}/* $out/ + cp ${siteXml "yarn-site.xml" (yarnSiteDefault // yarnSite // yarnSiteInternal)}/* $out/ + cp ${siteXml "httpfs-site.xml" httpfsSite}/* $out/ + cp ${cfgFile "container-executor.cfg" containerExecutorCfg}/* $out/ + cp ${pkgs.writeTextDir "hadoop-user-functions.sh" userFunctions}/* $out/ + cp ${pkgs.writeTextDir "hadoop-env.sh" hadoopEnv}/* $out/ + cp ${log4jProperties} $out/log4j.properties + ${lib.concatMapStringsSep "\n" (dir: "cp -r ${dir}/* $out/") extraConfDirs} +'') diff --git a/nixos/modules/services/cluster/hadoop/default.nix b/nixos/modules/services/cluster/hadoop/default.nix new file mode 100644 index 00000000000..a4fdea81037 --- /dev/null +++ b/nixos/modules/services/cluster/hadoop/default.nix @@ -0,0 +1,223 @@ +{ config, lib, options, pkgs, ...}: +let + cfg = config.services.hadoop; + opt = options.services.hadoop; +in +with lib; +{ + imports = [ ./yarn.nix ./hdfs.nix ]; + + options.services.hadoop = { + coreSite = mkOption { + default = {}; + type = types.attrsOf types.anything; + example = literalExpression '' + { + "fs.defaultFS" = "hdfs://localhost"; + } + ''; + description = '' + Hadoop core-site.xml definition + <link xlink:href="https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/core-default.xml"/> + ''; + }; + coreSiteInternal = mkOption { + default = {}; + type = types.attrsOf types.anything; + internal = true; + description = '' + Internal option to add configs to core-site.xml based on module options + ''; + }; + + hdfsSiteDefault = mkOption { + default = { + "dfs.namenode.rpc-bind-host" = "0.0.0.0"; + "dfs.namenode.http-address" = "0.0.0.0:9870"; + "dfs.namenode.servicerpc-bind-host" = "0.0.0.0"; + "dfs.namenode.http-bind-host" = "0.0.0.0"; + }; + type = types.attrsOf types.anything; + description = '' + Default options for hdfs-site.xml + ''; + }; + hdfsSite = mkOption { + default = {}; + type = types.attrsOf types.anything; + example = literalExpression '' + { + "dfs.nameservices" = "namenode1"; + } + ''; + description = '' + Additional options and overrides for hdfs-site.xml + <link xlink:href="https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml"/> + ''; + }; + hdfsSiteInternal = mkOption { + default = {}; + type = types.attrsOf types.anything; + internal = true; + description = '' + Internal option to add configs to hdfs-site.xml based on module options + ''; + }; + + mapredSiteDefault = mkOption { + default = { + "mapreduce.framework.name" = "yarn"; + "yarn.app.mapreduce.am.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}"; + "mapreduce.map.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}"; + "mapreduce.reduce.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}"; + }; + defaultText = literalExpression '' + { + "mapreduce.framework.name" = "yarn"; + "yarn.app.mapreduce.am.env" = "HADOOP_MAPRED_HOME=''${config.${opt.package}}/lib/''${config.${opt.package}.untarDir}"; + "mapreduce.map.env" = "HADOOP_MAPRED_HOME=''${config.${opt.package}}/lib/''${config.${opt.package}.untarDir}"; + "mapreduce.reduce.env" = "HADOOP_MAPRED_HOME=''${config.${opt.package}}/lib/''${config.${opt.package}.untarDir}"; + } + ''; + type = types.attrsOf types.anything; + description = '' + Default options for mapred-site.xml + ''; + }; + mapredSite = mkOption { + default = {}; + type = types.attrsOf types.anything; + example = literalExpression '' + { + "mapreduce.map.java.opts" = "-Xmx900m -XX:+UseParallelGC"; + } + ''; + description = '' + Additional options and overrides for mapred-site.xml + <link xlink:href="https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml"/> + ''; + }; + + yarnSiteDefault = mkOption { + default = { + "yarn.nodemanager.admin-env" = "PATH=$PATH"; + "yarn.nodemanager.aux-services" = "mapreduce_shuffle"; + "yarn.nodemanager.aux-services.mapreduce_shuffle.class" = "org.apache.hadoop.mapred.ShuffleHandler"; + "yarn.nodemanager.bind-host" = "0.0.0.0"; + "yarn.nodemanager.container-executor.class" = "org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor"; + "yarn.nodemanager.env-whitelist" = "JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,LANG,TZ"; + "yarn.nodemanager.linux-container-executor.group" = "hadoop"; + "yarn.nodemanager.linux-container-executor.path" = "/run/wrappers/yarn-nodemanager/bin/container-executor"; + "yarn.nodemanager.log-dirs" = "/var/log/hadoop/yarn/nodemanager"; + "yarn.resourcemanager.bind-host" = "0.0.0.0"; + "yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler"; + }; + type = types.attrsOf types.anything; + description = '' + Default options for yarn-site.xml + ''; + }; + yarnSite = mkOption { + default = {}; + type = types.attrsOf types.anything; + example = literalExpression '' + { + "yarn.resourcemanager.hostname" = "''${config.networking.hostName}"; + } + ''; + description = '' + Additional options and overrides for yarn-site.xml + <link xlink:href="https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-common/yarn-default.xml"/> + ''; + }; + yarnSiteInternal = mkOption { + default = {}; + type = types.attrsOf types.anything; + internal = true; + description = '' + Internal option to add configs to yarn-site.xml based on module options + ''; + }; + + httpfsSite = mkOption { + default = { }; + type = types.attrsOf types.anything; + example = literalExpression '' + { + "hadoop.http.max.threads" = 500; + } + ''; + description = '' + Hadoop httpfs-site.xml definition + <link xlink:href="https://hadoop.apache.org/docs/current/hadoop-hdfs-httpfs/httpfs-default.html"/> + ''; + }; + + log4jProperties = mkOption { + default = "${cfg.package}/lib/${cfg.package.untarDir}/etc/hadoop/log4j.properties"; + defaultText = literalExpression '' + "''${config.${opt.package}}/lib/''${config.${opt.package}.untarDir}/etc/hadoop/log4j.properties" + ''; + type = types.path; + example = literalExpression '' + "''${pkgs.hadoop}/lib/''${pkgs.hadoop.untarDir}/etc/hadoop/log4j.properties"; + ''; + description = "log4j.properties file added to HADOOP_CONF_DIR"; + }; + + containerExecutorCfg = mkOption { + default = { + # must be the same as yarn.nodemanager.linux-container-executor.group in yarnSite + "yarn.nodemanager.linux-container-executor.group"="hadoop"; + "min.user.id"=1000; + "feature.terminal.enabled"=1; + "feature.mount-cgroup.enabled" = 1; + }; + type = types.attrsOf types.anything; + example = literalExpression '' + options.services.hadoop.containerExecutorCfg.default // { + "feature.terminal.enabled" = 0; + } + ''; + description = '' + Yarn container-executor.cfg definition + <link xlink:href="https://hadoop.apache.org/docs/r2.7.2/hadoop-yarn/hadoop-yarn-site/SecureContainer.html"/> + ''; + }; + + extraConfDirs = mkOption { + default = []; + type = types.listOf types.path; + example = literalExpression '' + [ + ./extraHDFSConfs + ./extraYARNConfs + ] + ''; + description = "Directories containing additional config files to be added to HADOOP_CONF_DIR"; + }; + + gatewayRole.enable = mkEnableOption "gateway role for deploying hadoop configs"; + + package = mkOption { + type = types.package; + default = pkgs.hadoop; + defaultText = literalExpression "pkgs.hadoop"; + description = ""; + }; + }; + + + config = mkIf cfg.gatewayRole.enable { + users.groups.hadoop = { + gid = config.ids.gids.hadoop; + }; + environment = { + systemPackages = [ cfg.package ]; + etc."hadoop-conf".source = let + hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/"; + in "${hadoopConf}"; + variables.HADOOP_CONF_DIR = "/etc/hadoop-conf/"; + }; + }; +} diff --git a/nixos/modules/services/cluster/hadoop/hdfs.nix b/nixos/modules/services/cluster/hadoop/hdfs.nix new file mode 100644 index 00000000000..325a002ad32 --- /dev/null +++ b/nixos/modules/services/cluster/hadoop/hdfs.nix @@ -0,0 +1,204 @@ +{ config, lib, pkgs, ... }: +with lib; +let + cfg = config.services.hadoop; + + # Config files for hadoop services + hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/"; + + # Generator for HDFS service options + hadoopServiceOption = { serviceName, firewallOption ? true, extraOpts ? null }: { + enable = mkEnableOption serviceName; + restartIfChanged = mkOption { + type = types.bool; + description = '' + Automatically restart the service on config change. + This can be set to false to defer restarts on clusters running critical applications. + Please consider the security implications of inadvertently running an older version, + and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option. + ''; + default = false; + }; + extraFlags = mkOption{ + type = with types; listOf str; + default = []; + description = "Extra command line flags to pass to ${serviceName}"; + example = [ + "-Dcom.sun.management.jmxremote" + "-Dcom.sun.management.jmxremote.port=8010" + ]; + }; + extraEnv = mkOption{ + type = with types; attrsOf str; + default = {}; + description = "Extra environment variables for ${serviceName}"; + }; + } // (optionalAttrs firewallOption { + openFirewall = mkOption { + type = types.bool; + default = false; + description = "Open firewall ports for ${serviceName}."; + }; + }) // (optionalAttrs (extraOpts != null) extraOpts); + + # Generator for HDFS service configs + hadoopServiceConfig = + { name + , serviceOptions ? cfg.hdfs."${toLower name}" + , description ? "Hadoop HDFS ${name}" + , User ? "hdfs" + , allowedTCPPorts ? [ ] + , preStart ? "" + , environment ? { } + , extraConfig ? { } + }: ( + + mkIf serviceOptions.enable ( mkMerge [{ + systemd.services."hdfs-${toLower name}" = { + inherit description preStart; + environment = environment // serviceOptions.extraEnv; + wantedBy = [ "multi-user.target" ]; + inherit (serviceOptions) restartIfChanged; + serviceConfig = { + inherit User; + SyslogIdentifier = "hdfs-${toLower name}"; + ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} ${toLower name} ${escapeShellArgs serviceOptions.extraFlags}"; + Restart = "always"; + }; + }; + + services.hadoop.gatewayRole.enable = true; + + networking.firewall.allowedTCPPorts = mkIf + ((builtins.hasAttr "openFirewall" serviceOptions) && serviceOptions.openFirewall) + allowedTCPPorts; + } extraConfig]) + ); + +in +{ + options.services.hadoop.hdfs = { + + namenode = hadoopServiceOption { serviceName = "HDFS NameNode"; } // { + formatOnInit = mkOption { + type = types.bool; + default = false; + description = '' + Format HDFS namenode on first start. This is useful for quickly spinning up + ephemeral HDFS clusters with a single namenode. + For HA clusters, initialization involves multiple steps across multiple nodes. + Follow this guide to initialize an HA cluster manually: + <link xlink:href="https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html"/> + ''; + }; + }; + + datanode = hadoopServiceOption { serviceName = "HDFS DataNode"; } // { + dataDirs = mkOption { + default = null; + description = "Tier and path definitions for datanode storage."; + type = with types; nullOr (listOf (submodule { + options = { + type = mkOption { + type = enum [ "SSD" "DISK" "ARCHIVE" "RAM_DISK" ]; + description = '' + Storage types ([SSD]/[DISK]/[ARCHIVE]/[RAM_DISK]) for HDFS storage policies. + ''; + }; + path = mkOption { + type = path; + example = [ "/var/lib/hadoop/hdfs/dn" ]; + description = "Determines where on the local filesystem a data node should store its blocks."; + }; + }; + })); + }; + }; + + journalnode = hadoopServiceOption { serviceName = "HDFS JournalNode"; }; + + zkfc = hadoopServiceOption { + serviceName = "HDFS ZooKeeper failover controller"; + firewallOption = false; + }; + + httpfs = hadoopServiceOption { serviceName = "HDFS JournalNode"; } // { + tempPath = mkOption { + type = types.path; + default = "/tmp/hadoop/httpfs"; + description = "HTTPFS_TEMP path used by HTTPFS"; + }; + }; + + }; + + config = mkMerge [ + (hadoopServiceConfig { + name = "NameNode"; + allowedTCPPorts = [ + 9870 # namenode.http-address + 8020 # namenode.rpc-address + 8022 # namenode.servicerpc-address + 8019 # dfs.ha.zkfc.port + ]; + preStart = (mkIf cfg.hdfs.namenode.formatOnInit + "${cfg.package}/bin/hdfs --config ${hadoopConf} namenode -format -nonInteractive || true" + ); + }) + + (hadoopServiceConfig { + name = "DataNode"; + # port numbers for datanode changed between hadoop 2 and 3 + allowedTCPPorts = if versionAtLeast cfg.package.version "3" then [ + 9864 # datanode.http.address + 9866 # datanode.address + 9867 # datanode.ipc.address + ] else [ + 50075 # datanode.http.address + 50010 # datanode.address + 50020 # datanode.ipc.address + ]; + extraConfig.services.hadoop.hdfsSiteInternal."dfs.datanode.data.dir" = let d = cfg.hdfs.datanode.dataDirs; in + if (d!= null) then (concatMapStringsSep "," (x: "["+x.type+"]file://"+x.path) cfg.hdfs.datanode.dataDirs) else d; + }) + + (hadoopServiceConfig { + name = "JournalNode"; + allowedTCPPorts = [ + 8480 # dfs.journalnode.http-address + 8485 # dfs.journalnode.rpc-address + ]; + }) + + (hadoopServiceConfig { + name = "zkfc"; + description = "Hadoop HDFS ZooKeeper failover controller"; + }) + + (hadoopServiceConfig { + name = "HTTPFS"; + environment.HTTPFS_TEMP = cfg.hdfs.httpfs.tempPath; + preStart = "mkdir -p $HTTPFS_TEMP"; + User = "httpfs"; + allowedTCPPorts = [ + 14000 # httpfs.http.port + ]; + }) + + (mkIf cfg.gatewayRole.enable { + users.users.hdfs = { + description = "Hadoop HDFS user"; + group = "hadoop"; + uid = config.ids.uids.hdfs; + }; + }) + (mkIf cfg.hdfs.httpfs.enable { + users.users.httpfs = { + description = "Hadoop HTTPFS user"; + group = "hadoop"; + isSystemUser = true; + }; + }) + + ]; +} diff --git a/nixos/modules/services/cluster/hadoop/yarn.nix b/nixos/modules/services/cluster/hadoop/yarn.nix new file mode 100644 index 00000000000..74e16bdec68 --- /dev/null +++ b/nixos/modules/services/cluster/hadoop/yarn.nix @@ -0,0 +1,200 @@ +{ config, lib, pkgs, ...}: +with lib; +let + cfg = config.services.hadoop; + hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/"; + restartIfChanged = mkOption { + type = types.bool; + description = '' + Automatically restart the service on config change. + This can be set to false to defer restarts on clusters running critical applications. + Please consider the security implications of inadvertently running an older version, + and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option. + ''; + default = false; + }; + extraFlags = mkOption{ + type = with types; listOf str; + default = []; + description = "Extra command line flags to pass to the service"; + example = [ + "-Dcom.sun.management.jmxremote" + "-Dcom.sun.management.jmxremote.port=8010" + ]; + }; + extraEnv = mkOption{ + type = with types; attrsOf str; + default = {}; + description = "Extra environment variables"; + }; +in +{ + options.services.hadoop.yarn = { + resourcemanager = { + enable = mkEnableOption "Hadoop YARN ResourceManager"; + inherit restartIfChanged extraFlags extraEnv; + + openFirewall = mkOption { + type = types.bool; + default = false; + description = '' + Open firewall ports for resourcemanager + ''; + }; + }; + nodemanager = { + enable = mkEnableOption "Hadoop YARN NodeManager"; + inherit restartIfChanged extraFlags extraEnv; + + resource = { + cpuVCores = mkOption { + description = "Number of vcores that can be allocated for containers."; + type = with types; nullOr ints.positive; + default = null; + }; + maximumAllocationVCores = mkOption { + description = "The maximum virtual CPU cores any container can be allocated."; + type = with types; nullOr ints.positive; + default = null; + }; + memoryMB = mkOption { + description = "Amount of physical memory, in MB, that can be allocated for containers."; + type = with types; nullOr ints.positive; + default = null; + }; + maximumAllocationMB = mkOption { + description = "The maximum physical memory any container can be allocated."; + type = with types; nullOr ints.positive; + default = null; + }; + }; + + useCGroups = mkOption { + type = types.bool; + default = true; + description = '' + Use cgroups to enforce resource limits on containers + ''; + }; + + localDir = mkOption { + description = "List of directories to store localized files in."; + type = with types; nullOr (listOf path); + example = [ "/var/lib/hadoop/yarn/nm" ]; + default = null; + }; + + addBinBash = mkOption { + type = types.bool; + default = true; + description = '' + Add /bin/bash. This is needed by the linux container executor's launch script. + ''; + }; + openFirewall = mkOption { + type = types.bool; + default = false; + description = '' + Open firewall ports for nodemanager. + Because containers can listen on any ephemeral port, TCP ports 1024–65535 will be opened. + ''; + }; + }; + }; + + config = mkMerge [ + (mkIf cfg.gatewayRole.enable { + users.users.yarn = { + description = "Hadoop YARN user"; + group = "hadoop"; + uid = config.ids.uids.yarn; + }; + }) + + (mkIf cfg.yarn.resourcemanager.enable { + systemd.services.yarn-resourcemanager = { + description = "Hadoop YARN ResourceManager"; + wantedBy = [ "multi-user.target" ]; + inherit (cfg.yarn.resourcemanager) restartIfChanged; + environment = cfg.yarn.resourcemanager.extraEnv; + + serviceConfig = { + User = "yarn"; + SyslogIdentifier = "yarn-resourcemanager"; + ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " + + " resourcemanager ${escapeShellArgs cfg.yarn.resourcemanager.extraFlags}"; + Restart = "always"; + }; + }; + + services.hadoop.gatewayRole.enable = true; + + networking.firewall.allowedTCPPorts = (mkIf cfg.yarn.resourcemanager.openFirewall [ + 8088 # resourcemanager.webapp.address + 8030 # resourcemanager.scheduler.address + 8031 # resourcemanager.resource-tracker.address + 8032 # resourcemanager.address + 8033 # resourcemanager.admin.address + ]); + }) + + (mkIf cfg.yarn.nodemanager.enable { + # Needed because yarn hardcodes /bin/bash in container start scripts + # These scripts can't be patched, they are generated at runtime + systemd.tmpfiles.rules = [ + (mkIf cfg.yarn.nodemanager.addBinBash "L /bin/bash - - - - /run/current-system/sw/bin/bash") + ]; + + systemd.services.yarn-nodemanager = { + description = "Hadoop YARN NodeManager"; + wantedBy = [ "multi-user.target" ]; + inherit (cfg.yarn.nodemanager) restartIfChanged; + environment = cfg.yarn.nodemanager.extraEnv; + + preStart = '' + # create log dir + mkdir -p /var/log/hadoop/yarn/nodemanager + chown yarn:hadoop /var/log/hadoop/yarn/nodemanager + + # set up setuid container executor binary + umount /run/wrappers/yarn-nodemanager/cgroup/cpu || true + rm -rf /run/wrappers/yarn-nodemanager/ || true + mkdir -p /run/wrappers/yarn-nodemanager/{bin,etc/hadoop,cgroup/cpu} + cp ${cfg.package}/lib/${cfg.package.untarDir}/bin/container-executor /run/wrappers/yarn-nodemanager/bin/ + chgrp hadoop /run/wrappers/yarn-nodemanager/bin/container-executor + chmod 6050 /run/wrappers/yarn-nodemanager/bin/container-executor + cp ${hadoopConf}/container-executor.cfg /run/wrappers/yarn-nodemanager/etc/hadoop/ + ''; + + serviceConfig = { + User = "yarn"; + SyslogIdentifier = "yarn-nodemanager"; + PermissionsStartOnly = true; + ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " + + " nodemanager ${escapeShellArgs cfg.yarn.nodemanager.extraFlags}"; + Restart = "always"; + }; + }; + + services.hadoop.gatewayRole.enable = true; + + services.hadoop.yarnSiteInternal = with cfg.yarn.nodemanager; { + "yarn.nodemanager.local-dirs" = localDir; + "yarn.scheduler.maximum-allocation-vcores" = resource.maximumAllocationVCores; + "yarn.scheduler.maximum-allocation-mb" = resource.maximumAllocationMB; + "yarn.nodemanager.resource.cpu-vcores" = resource.cpuVCores; + "yarn.nodemanager.resource.memory-mb" = resource.memoryMB; + } // mkIf useCGroups { + "yarn.nodemanager.linux-container-executor.cgroups.hierarchy" = "/hadoop-yarn"; + "yarn.nodemanager.linux-container-executor.resources-handler.class" = "org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler"; + "yarn.nodemanager.linux-container-executor.cgroups.mount" = "true"; + "yarn.nodemanager.linux-container-executor.cgroups.mount-path" = "/run/wrappers/yarn-nodemanager/cgroup"; + }; + + networking.firewall.allowedTCPPortRanges = [ + (mkIf (cfg.yarn.nodemanager.openFirewall) {from = 1024; to = 65535;}) + ]; + }) + + ]; +} diff --git a/nixos/modules/services/cluster/k3s/default.nix b/nixos/modules/services/cluster/k3s/default.nix new file mode 100644 index 00000000000..3a36cfa3f37 --- /dev/null +++ b/nixos/modules/services/cluster/k3s/default.nix @@ -0,0 +1,128 @@ +{ config, lib, pkgs, ... }: + +with lib; +let + cfg = config.services.k3s; +in +{ + # interface + options.services.k3s = { + enable = mkEnableOption "k3s"; + + package = mkOption { + type = types.package; + default = pkgs.k3s; + defaultText = literalExpression "pkgs.k3s"; + description = "Package that should be used for k3s"; + }; + + role = mkOption { + description = '' + Whether k3s should run as a server or agent. + Note that the server, by default, also runs as an agent. + ''; + default = "server"; + type = types.enum [ "server" "agent" ]; + }; + + serverAddr = mkOption { + type = types.str; + description = "The k3s server to connect to. This option only makes sense for an agent."; + example = "https://10.0.0.10:6443"; + default = ""; + }; + + token = mkOption { + type = types.str; + description = '' + The k3s token to use when connecting to the server. This option only makes sense for an agent. + WARNING: This option will expose store your token unencrypted world-readable in the nix store. + If this is undesired use the tokenFile option instead. + ''; + default = ""; + }; + + tokenFile = mkOption { + type = types.nullOr types.path; + description = "File path containing k3s token to use when connecting to the server. This option only makes sense for an agent."; + default = null; + }; + + docker = mkOption { + type = types.bool; + default = false; + description = "Use docker to run containers rather than the built-in containerd."; + }; + + extraFlags = mkOption { + description = "Extra flags to pass to the k3s command."; + type = types.str; + default = ""; + example = "--no-deploy traefik --cluster-cidr 10.24.0.0/16"; + }; + + disableAgent = mkOption { + type = types.bool; + default = false; + description = "Only run the server. This option only makes sense for a server."; + }; + + configPath = mkOption { + type = types.nullOr types.path; + default = null; + description = "File path containing the k3s YAML config. This is useful when the config is generated (for example on boot)."; + }; + }; + + # implementation + + config = mkIf cfg.enable { + assertions = [ + { + assertion = cfg.role == "agent" -> (cfg.configPath != null || cfg.serverAddr != ""); + message = "serverAddr or configPath (with 'server' key) should be set if role is 'agent'"; + } + { + assertion = cfg.role == "agent" -> cfg.configPath != null || cfg.tokenFile != null || cfg.token != ""; + message = "token or tokenFile or configPath (with 'token' or 'token-file' keys) should be set if role is 'agent'"; + } + ]; + + virtualisation.docker = mkIf cfg.docker { + enable = mkDefault true; + }; + environment.systemPackages = [ config.services.k3s.package ]; + + systemd.services.k3s = { + description = "k3s service"; + after = [ "network.service" "firewall.service" ] ++ (optional cfg.docker "docker.service"); + wants = [ "network.service" "firewall.service" ]; + wantedBy = [ "multi-user.target" ]; + path = optional config.boot.zfs.enabled config.boot.zfs.package; + serviceConfig = { + # See: https://github.com/rancher/k3s/blob/dddbd16305284ae4bd14c0aade892412310d7edc/install.sh#L197 + Type = if cfg.role == "agent" then "exec" else "notify"; + KillMode = "process"; + Delegate = "yes"; + Restart = "always"; + RestartSec = "5s"; + LimitNOFILE = 1048576; + LimitNPROC = "infinity"; + LimitCORE = "infinity"; + TasksMax = "infinity"; + ExecStart = concatStringsSep " \\\n " ( + [ + "${cfg.package}/bin/k3s ${cfg.role}" + ] ++ (optional cfg.docker "--docker") + ++ (optional (cfg.docker && config.systemd.enableUnifiedCgroupHierarchy) "--kubelet-arg=cgroup-driver=systemd") + ++ (optional cfg.disableAgent "--disable-agent") + ++ (optional (cfg.serverAddr != "") "--server ${cfg.serverAddr}") + ++ (optional (cfg.token != "") "--token ${cfg.token}") + ++ (optional (cfg.tokenFile != null) "--token-file ${cfg.tokenFile}") + ++ (optional (cfg.configPath != null) "--config ${cfg.configPath}") + ++ [ cfg.extraFlags ] + ); + }; + }; + }; +} diff --git a/nixos/modules/services/cluster/kubernetes/addon-manager.nix b/nixos/modules/services/cluster/kubernetes/addon-manager.nix new file mode 100644 index 00000000000..b677d900ff5 --- /dev/null +++ b/nixos/modules/services/cluster/kubernetes/addon-manager.nix @@ -0,0 +1,171 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + top = config.services.kubernetes; + cfg = top.addonManager; + + isRBACEnabled = elem "RBAC" top.apiserver.authorizationMode; + + addons = pkgs.runCommand "kubernetes-addons" { } '' + mkdir -p $out + # since we are mounting the addons to the addon manager, they need to be copied + ${concatMapStringsSep ";" (a: "cp -v ${a}/* $out/") (mapAttrsToList (name: addon: + pkgs.writeTextDir "${name}.json" (builtins.toJSON addon) + ) (cfg.addons))} + ''; +in +{ + ###### interface + options.services.kubernetes.addonManager = with lib.types; { + + bootstrapAddons = mkOption { + description = '' + Bootstrap addons are like regular addons, but they are applied with cluster-admin rigths. + They are applied at addon-manager startup only. + ''; + default = { }; + type = attrsOf attrs; + example = literalExpression '' + { + "my-service" = { + "apiVersion" = "v1"; + "kind" = "Service"; + "metadata" = { + "name" = "my-service"; + "namespace" = "default"; + }; + "spec" = { ... }; + }; + } + ''; + }; + + addons = mkOption { + description = "Kubernetes addons (any kind of Kubernetes resource can be an addon)."; + default = { }; + type = attrsOf (either attrs (listOf attrs)); + example = literalExpression '' + { + "my-service" = { + "apiVersion" = "v1"; + "kind" = "Service"; + "metadata" = { + "name" = "my-service"; + "namespace" = "default"; + }; + "spec" = { ... }; + }; + } + // import <nixpkgs/nixos/modules/services/cluster/kubernetes/dns.nix> { cfg = config.services.kubernetes; }; + ''; + }; + + enable = mkEnableOption "Kubernetes addon manager."; + }; + + ###### implementation + config = mkIf cfg.enable { + environment.etc."kubernetes/addons".source = "${addons}/"; + + systemd.services.kube-addon-manager = { + description = "Kubernetes addon manager"; + wantedBy = [ "kubernetes.target" ]; + after = [ "kube-apiserver.service" ]; + environment.ADDON_PATH = "/etc/kubernetes/addons/"; + path = [ pkgs.gawk ]; + serviceConfig = { + Slice = "kubernetes.slice"; + ExecStart = "${top.package}/bin/kube-addons"; + WorkingDirectory = top.dataDir; + User = "kubernetes"; + Group = "kubernetes"; + Restart = "on-failure"; + RestartSec = 10; + }; + unitConfig = { + StartLimitIntervalSec = 0; + }; + }; + + services.kubernetes.addonManager.bootstrapAddons = mkIf isRBACEnabled + (let + name = "system:kube-addon-manager"; + namespace = "kube-system"; + in + { + + kube-addon-manager-r = { + apiVersion = "rbac.authorization.k8s.io/v1"; + kind = "Role"; + metadata = { + inherit name namespace; + }; + rules = [{ + apiGroups = ["*"]; + resources = ["*"]; + verbs = ["*"]; + }]; + }; + + kube-addon-manager-rb = { + apiVersion = "rbac.authorization.k8s.io/v1"; + kind = "RoleBinding"; + metadata = { + inherit name namespace; + }; + roleRef = { + apiGroup = "rbac.authorization.k8s.io"; + kind = "Role"; + inherit name; + }; + subjects = [{ + apiGroup = "rbac.authorization.k8s.io"; + kind = "User"; + inherit name; + }]; + }; + + kube-addon-manager-cluster-lister-cr = { + apiVersion = "rbac.authorization.k8s.io/v1"; + kind = "ClusterRole"; + metadata = { + name = "${name}:cluster-lister"; + }; + rules = [{ + apiGroups = ["*"]; + resources = ["*"]; + verbs = ["list"]; + }]; + }; + + kube-addon-manager-cluster-lister-crb = { + apiVersion = "rbac.authorization.k8s.io/v1"; + kind = "ClusterRoleBinding"; + metadata = { + name = "${name}:cluster-lister"; + }; + roleRef = { + apiGroup = "rbac.authorization.k8s.io"; + kind = "ClusterRole"; + name = "${name}:cluster-lister"; + }; + subjects = [{ + kind = "User"; + inherit name; + }]; + }; + }); + + services.kubernetes.pki.certs = { + addonManager = top.lib.mkCert { + name = "kube-addon-manager"; + CN = "system:kube-addon-manager"; + action = "systemctl restart kube-addon-manager.service"; + }; + }; + }; + + meta.buildDocsInSandbox = false; +} diff --git a/nixos/modules/services/cluster/kubernetes/addons/dns.nix b/nixos/modules/services/cluster/kubernetes/addons/dns.nix new file mode 100644 index 00000000000..7bd4991f43f --- /dev/null +++ b/nixos/modules/services/cluster/kubernetes/addons/dns.nix @@ -0,0 +1,368 @@ +{ config, options, pkgs, lib, ... }: + +with lib; + +let + version = "1.7.1"; + cfg = config.services.kubernetes.addons.dns; + ports = { + dns = 10053; + health = 10054; + metrics = 10055; + }; +in { + options.services.kubernetes.addons.dns = { + enable = mkEnableOption "kubernetes dns addon"; + + clusterIp = mkOption { + description = "Dns addon clusterIP"; + + # this default is also what kubernetes users + default = ( + concatStringsSep "." ( + take 3 (splitString "." config.services.kubernetes.apiserver.serviceClusterIpRange + )) + ) + ".254"; + defaultText = literalDocBook '' + The <literal>x.y.z.254</literal> IP of + <literal>config.${options.services.kubernetes.apiserver.serviceClusterIpRange}</literal>. + ''; + type = types.str; + }; + + clusterDomain = mkOption { + description = "Dns cluster domain"; + default = "cluster.local"; + type = types.str; + }; + + replicas = mkOption { + description = "Number of DNS pod replicas to deploy in the cluster."; + default = 2; + type = types.int; + }; + + reconcileMode = mkOption { + description = '' + Controls the addon manager reconciliation mode for the DNS addon. + + Setting reconcile mode to EnsureExists makes it possible to tailor DNS behavior by editing the coredns ConfigMap. + + See: <link xlink:href="https://github.com/kubernetes/kubernetes/blob/master/cluster/addons/addon-manager/README.md"/>. + ''; + default = "Reconcile"; + type = types.enum [ "Reconcile" "EnsureExists" ]; + }; + + coredns = mkOption { + description = "Docker image to seed for the CoreDNS container."; + type = types.attrs; + default = { + imageName = "coredns/coredns"; + imageDigest = "sha256:4a6e0769130686518325b21b0c1d0688b54e7c79244d48e1b15634e98e40c6ef"; + finalImageTag = version; + sha256 = "02r440xcdsgi137k5lmmvp0z5w5fmk8g9mysq5pnysq1wl8sj6mw"; + }; + }; + + corefile = mkOption { + description = '' + Custom coredns corefile configuration. + + See: <link xlink:href="https://coredns.io/manual/toc/#configuration"/>. + ''; + type = types.str; + default = '' + .:${toString ports.dns} { + errors + health :${toString ports.health} + kubernetes ${cfg.clusterDomain} in-addr.arpa ip6.arpa { + pods insecure + fallthrough in-addr.arpa ip6.arpa + } + prometheus :${toString ports.metrics} + forward . /etc/resolv.conf + cache 30 + loop + reload + loadbalance + }''; + defaultText = literalExpression '' + ''' + .:${toString ports.dns} { + errors + health :${toString ports.health} + kubernetes ''${config.services.kubernetes.addons.dns.clusterDomain} in-addr.arpa ip6.arpa { + pods insecure + fallthrough in-addr.arpa ip6.arpa + } + prometheus :${toString ports.metrics} + forward . /etc/resolv.conf + cache 30 + loop + reload + loadbalance + } + ''' + ''; + }; + }; + + config = mkIf cfg.enable { + services.kubernetes.kubelet.seedDockerImages = + singleton (pkgs.dockerTools.pullImage cfg.coredns); + + services.kubernetes.addonManager.bootstrapAddons = { + coredns-cr = { + apiVersion = "rbac.authorization.k8s.io/v1"; + kind = "ClusterRole"; + metadata = { + labels = { + "addonmanager.kubernetes.io/mode" = "Reconcile"; + k8s-app = "kube-dns"; + "kubernetes.io/cluster-service" = "true"; + "kubernetes.io/bootstrapping" = "rbac-defaults"; + }; + name = "system:coredns"; + }; + rules = [ + { + apiGroups = [ "" ]; + resources = [ "endpoints" "services" "pods" "namespaces" ]; + verbs = [ "list" "watch" ]; + } + { + apiGroups = [ "" ]; + resources = [ "nodes" ]; + verbs = [ "get" ]; + } + ]; + }; + + coredns-crb = { + apiVersion = "rbac.authorization.k8s.io/v1"; + kind = "ClusterRoleBinding"; + metadata = { + annotations = { + "rbac.authorization.kubernetes.io/autoupdate" = "true"; + }; + labels = { + "addonmanager.kubernetes.io/mode" = "Reconcile"; + k8s-app = "kube-dns"; + "kubernetes.io/cluster-service" = "true"; + "kubernetes.io/bootstrapping" = "rbac-defaults"; + }; + name = "system:coredns"; + }; + roleRef = { + apiGroup = "rbac.authorization.k8s.io"; + kind = "ClusterRole"; + name = "system:coredns"; + }; + subjects = [ + { + kind = "ServiceAccount"; + name = "coredns"; + namespace = "kube-system"; + } + ]; + }; + }; + + services.kubernetes.addonManager.addons = { + coredns-sa = { + apiVersion = "v1"; + kind = "ServiceAccount"; + metadata = { + labels = { + "addonmanager.kubernetes.io/mode" = "Reconcile"; + k8s-app = "kube-dns"; + "kubernetes.io/cluster-service" = "true"; + }; + name = "coredns"; + namespace = "kube-system"; + }; + }; + + coredns-cm = { + apiVersion = "v1"; + kind = "ConfigMap"; + metadata = { + labels = { + "addonmanager.kubernetes.io/mode" = cfg.reconcileMode; + k8s-app = "kube-dns"; + "kubernetes.io/cluster-service" = "true"; + }; + name = "coredns"; + namespace = "kube-system"; + }; + data = { + Corefile = cfg.corefile; + }; + }; + + coredns-deploy = { + apiVersion = "apps/v1"; + kind = "Deployment"; + metadata = { + labels = { + "addonmanager.kubernetes.io/mode" = cfg.reconcileMode; + k8s-app = "kube-dns"; + "kubernetes.io/cluster-service" = "true"; + "kubernetes.io/name" = "CoreDNS"; + }; + name = "coredns"; + namespace = "kube-system"; + }; + spec = { + replicas = cfg.replicas; + selector = { + matchLabels = { k8s-app = "kube-dns"; }; + }; + strategy = { + rollingUpdate = { maxUnavailable = 1; }; + type = "RollingUpdate"; + }; + template = { + metadata = { + labels = { + k8s-app = "kube-dns"; + }; + }; + spec = { + containers = [ + { + args = [ "-conf" "/etc/coredns/Corefile" ]; + image = with cfg.coredns; "${imageName}:${finalImageTag}"; + imagePullPolicy = "Never"; + livenessProbe = { + failureThreshold = 5; + httpGet = { + path = "/health"; + port = ports.health; + scheme = "HTTP"; + }; + initialDelaySeconds = 60; + successThreshold = 1; + timeoutSeconds = 5; + }; + name = "coredns"; + ports = [ + { + containerPort = ports.dns; + name = "dns"; + protocol = "UDP"; + } + { + containerPort = ports.dns; + name = "dns-tcp"; + protocol = "TCP"; + } + { + containerPort = ports.metrics; + name = "metrics"; + protocol = "TCP"; + } + ]; + resources = { + limits = { + memory = "170Mi"; + }; + requests = { + cpu = "100m"; + memory = "70Mi"; + }; + }; + securityContext = { + allowPrivilegeEscalation = false; + capabilities = { + drop = [ "all" ]; + }; + readOnlyRootFilesystem = true; + }; + volumeMounts = [ + { + mountPath = "/etc/coredns"; + name = "config-volume"; + readOnly = true; + } + ]; + } + ]; + dnsPolicy = "Default"; + nodeSelector = { + "beta.kubernetes.io/os" = "linux"; + }; + serviceAccountName = "coredns"; + tolerations = [ + { + effect = "NoSchedule"; + key = "node-role.kubernetes.io/master"; + } + { + key = "CriticalAddonsOnly"; + operator = "Exists"; + } + ]; + volumes = [ + { + configMap = { + items = [ + { + key = "Corefile"; + path = "Corefile"; + } + ]; + name = "coredns"; + }; + name = "config-volume"; + } + ]; + }; + }; + }; + }; + + coredns-svc = { + apiVersion = "v1"; + kind = "Service"; + metadata = { + annotations = { + "prometheus.io/port" = toString ports.metrics; + "prometheus.io/scrape" = "true"; + }; + labels = { + "addonmanager.kubernetes.io/mode" = "Reconcile"; + k8s-app = "kube-dns"; + "kubernetes.io/cluster-service" = "true"; + "kubernetes.io/name" = "CoreDNS"; + }; + name = "kube-dns"; + namespace = "kube-system"; + }; + spec = { + clusterIP = cfg.clusterIp; + ports = [ + { + name = "dns"; + port = 53; + targetPort = ports.dns; + protocol = "UDP"; + } + { + name = "dns-tcp"; + port = 53; + targetPort = ports.dns; + protocol = "TCP"; + } + ]; + selector = { k8s-app = "kube-dns"; }; + }; + }; + }; + + services.kubernetes.kubelet.clusterDns = mkDefault cfg.clusterIp; + }; + + meta.buildDocsInSandbox = false; +} diff --git a/nixos/modules/services/cluster/kubernetes/apiserver.nix b/nixos/modules/services/cluster/kubernetes/apiserver.nix new file mode 100644 index 00000000000..a192e93badc --- /dev/null +++ b/nixos/modules/services/cluster/kubernetes/apiserver.nix @@ -0,0 +1,500 @@ + { config, lib, options, pkgs, ... }: + +with lib; + +let + top = config.services.kubernetes; + otop = options.services.kubernetes; + cfg = top.apiserver; + + isRBACEnabled = elem "RBAC" cfg.authorizationMode; + + apiserverServiceIP = (concatStringsSep "." ( + take 3 (splitString "." cfg.serviceClusterIpRange + )) + ".1"); +in +{ + + imports = [ + (mkRenamedOptionModule [ "services" "kubernetes" "apiserver" "admissionControl" ] [ "services" "kubernetes" "apiserver" "enableAdmissionPlugins" ]) + (mkRenamedOptionModule [ "services" "kubernetes" "apiserver" "address" ] ["services" "kubernetes" "apiserver" "bindAddress"]) + (mkRenamedOptionModule [ "services" "kubernetes" "apiserver" "port" ] ["services" "kubernetes" "apiserver" "insecurePort"]) + (mkRemovedOptionModule [ "services" "kubernetes" "apiserver" "publicAddress" ] "") + (mkRenamedOptionModule [ "services" "kubernetes" "etcd" "servers" ] [ "services" "kubernetes" "apiserver" "etcd" "servers" ]) + (mkRenamedOptionModule [ "services" "kubernetes" "etcd" "keyFile" ] [ "services" "kubernetes" "apiserver" "etcd" "keyFile" ]) + (mkRenamedOptionModule [ "services" "kubernetes" "etcd" "certFile" ] [ "services" "kubernetes" "apiserver" "etcd" "certFile" ]) + (mkRenamedOptionModule [ "services" "kubernetes" "etcd" "caFile" ] [ "services" "kubernetes" "apiserver" "etcd" "caFile" ]) + ]; + + ###### interface + options.services.kubernetes.apiserver = with lib.types; { + + advertiseAddress = mkOption { + description = '' + Kubernetes apiserver IP address on which to advertise the apiserver + to members of the cluster. This address must be reachable by the rest + of the cluster. + ''; + default = null; + type = nullOr str; + }; + + allowPrivileged = mkOption { + description = "Whether to allow privileged containers on Kubernetes."; + default = false; + type = bool; + }; + + authorizationMode = mkOption { + description = '' + Kubernetes apiserver authorization mode (AlwaysAllow/AlwaysDeny/ABAC/Webhook/RBAC/Node). See + <link xlink:href="https://kubernetes.io/docs/reference/access-authn-authz/authorization/"/> + ''; + default = ["RBAC" "Node"]; # Enabling RBAC by default, although kubernetes default is AllowAllow + type = listOf (enum ["AlwaysAllow" "AlwaysDeny" "ABAC" "Webhook" "RBAC" "Node"]); + }; + + authorizationPolicy = mkOption { + description = '' + Kubernetes apiserver authorization policy file. See + <link xlink:href="https://kubernetes.io/docs/reference/access-authn-authz/authorization/"/> + ''; + default = []; + type = listOf attrs; + }; + + basicAuthFile = mkOption { + description = '' + Kubernetes apiserver basic authentication file. See + <link xlink:href="https://kubernetes.io/docs/reference/access-authn-authz/authentication"/> + ''; + default = null; + type = nullOr path; + }; + + bindAddress = mkOption { + description = '' + The IP address on which to listen for the --secure-port port. + The associated interface(s) must be reachable by the rest + of the cluster, and by CLI/web clients. + ''; + default = "0.0.0.0"; + type = str; + }; + + clientCaFile = mkOption { + description = "Kubernetes apiserver CA file for client auth."; + default = top.caFile; + defaultText = literalExpression "config.${otop.caFile}"; + type = nullOr path; + }; + + disableAdmissionPlugins = mkOption { + description = '' + Kubernetes admission control plugins to disable. See + <link xlink:href="https://kubernetes.io/docs/admin/admission-controllers/"/> + ''; + default = []; + type = listOf str; + }; + + enable = mkEnableOption "Kubernetes apiserver"; + + enableAdmissionPlugins = mkOption { + description = '' + Kubernetes admission control plugins to enable. See + <link xlink:href="https://kubernetes.io/docs/admin/admission-controllers/"/> + ''; + default = [ + "NamespaceLifecycle" "LimitRanger" "ServiceAccount" + "ResourceQuota" "DefaultStorageClass" "DefaultTolerationSeconds" + "NodeRestriction" + ]; + example = [ + "NamespaceLifecycle" "NamespaceExists" "LimitRanger" + "SecurityContextDeny" "ServiceAccount" "ResourceQuota" + "PodSecurityPolicy" "NodeRestriction" "DefaultStorageClass" + ]; + type = listOf str; + }; + + etcd = { + servers = mkOption { + description = "List of etcd servers."; + default = ["http://127.0.0.1:2379"]; + type = types.listOf types.str; + }; + + keyFile = mkOption { + description = "Etcd key file."; + default = null; + type = types.nullOr types.path; + }; + + certFile = mkOption { + description = "Etcd cert file."; + default = null; + type = types.nullOr types.path; + }; + + caFile = mkOption { + description = "Etcd ca file."; + default = top.caFile; + defaultText = literalExpression "config.${otop.caFile}"; + type = types.nullOr types.path; + }; + }; + + extraOpts = mkOption { + description = "Kubernetes apiserver extra command line options."; + default = ""; + type = separatedString " "; + }; + + extraSANs = mkOption { + description = "Extra x509 Subject Alternative Names to be added to the kubernetes apiserver tls cert."; + default = []; + type = listOf str; + }; + + featureGates = mkOption { + description = "List set of feature gates"; + default = top.featureGates; + defaultText = literalExpression "config.${otop.featureGates}"; + type = listOf str; + }; + + insecureBindAddress = mkOption { + description = "The IP address on which to serve the --insecure-port."; + default = "127.0.0.1"; + type = str; + }; + + insecurePort = mkOption { + description = "Kubernetes apiserver insecure listening port. (0 = disabled)"; + default = 0; + type = int; + }; + + kubeletClientCaFile = mkOption { + description = "Path to a cert file for connecting to kubelet."; + default = top.caFile; + defaultText = literalExpression "config.${otop.caFile}"; + type = nullOr path; + }; + + kubeletClientCertFile = mkOption { + description = "Client certificate to use for connections to kubelet."; + default = null; + type = nullOr path; + }; + + kubeletClientKeyFile = mkOption { + description = "Key to use for connections to kubelet."; + default = null; + type = nullOr path; + }; + + preferredAddressTypes = mkOption { + description = "List of the preferred NodeAddressTypes to use for kubelet connections."; + type = nullOr str; + default = null; + }; + + proxyClientCertFile = mkOption { + description = "Client certificate to use for connections to proxy."; + default = null; + type = nullOr path; + }; + + proxyClientKeyFile = mkOption { + description = "Key to use for connections to proxy."; + default = null; + type = nullOr path; + }; + + runtimeConfig = mkOption { + description = '' + Api runtime configuration. See + <link xlink:href="https://kubernetes.io/docs/tasks/administer-cluster/cluster-management/"/> + ''; + default = "authentication.k8s.io/v1beta1=true"; + example = "api/all=false,api/v1=true"; + type = str; + }; + + storageBackend = mkOption { + description = '' + Kubernetes apiserver storage backend. + ''; + default = "etcd3"; + type = enum ["etcd2" "etcd3"]; + }; + + securePort = mkOption { + description = "Kubernetes apiserver secure port."; + default = 6443; + type = int; + }; + + apiAudiences = mkOption { + description = '' + Kubernetes apiserver ServiceAccount issuer. + ''; + default = "api,https://kubernetes.default.svc"; + type = str; + }; + + serviceAccountIssuer = mkOption { + description = '' + Kubernetes apiserver ServiceAccount issuer. + ''; + default = "https://kubernetes.default.svc"; + type = str; + }; + + serviceAccountSigningKeyFile = mkOption { + description = '' + Path to the file that contains the current private key of the service + account token issuer. The issuer will sign issued ID tokens with this + private key. + ''; + type = path; + }; + + serviceAccountKeyFile = mkOption { + description = '' + File containing PEM-encoded x509 RSA or ECDSA private or public keys, + used to verify ServiceAccount tokens. The specified file can contain + multiple keys, and the flag can be specified multiple times with + different files. If unspecified, --tls-private-key-file is used. + Must be specified when --service-account-signing-key is provided + ''; + type = path; + }; + + serviceClusterIpRange = mkOption { + description = '' + A CIDR notation IP range from which to assign service cluster IPs. + This must not overlap with any IP ranges assigned to nodes for pods. + ''; + default = "10.0.0.0/24"; + type = str; + }; + + tlsCertFile = mkOption { + description = "Kubernetes apiserver certificate file."; + default = null; + type = nullOr path; + }; + + tlsKeyFile = mkOption { + description = "Kubernetes apiserver private key file."; + default = null; + type = nullOr path; + }; + + tokenAuthFile = mkOption { + description = '' + Kubernetes apiserver token authentication file. See + <link xlink:href="https://kubernetes.io/docs/reference/access-authn-authz/authentication"/> + ''; + default = null; + type = nullOr path; + }; + + verbosity = mkOption { + description = '' + Optional glog verbosity level for logging statements. See + <link xlink:href="https://github.com/kubernetes/community/blob/master/contributors/devel/logging.md"/> + ''; + default = null; + type = nullOr int; + }; + + webhookConfig = mkOption { + description = '' + Kubernetes apiserver Webhook config file. It uses the kubeconfig file format. + See <link xlink:href="https://kubernetes.io/docs/reference/access-authn-authz/webhook/"/> + ''; + default = null; + type = nullOr path; + }; + + }; + + + ###### implementation + config = mkMerge [ + + (mkIf cfg.enable { + systemd.services.kube-apiserver = { + description = "Kubernetes APIServer Service"; + wantedBy = [ "kubernetes.target" ]; + after = [ "network.target" ]; + serviceConfig = { + Slice = "kubernetes.slice"; + ExecStart = ''${top.package}/bin/kube-apiserver \ + --allow-privileged=${boolToString cfg.allowPrivileged} \ + --authorization-mode=${concatStringsSep "," cfg.authorizationMode} \ + ${optionalString (elem "ABAC" cfg.authorizationMode) + "--authorization-policy-file=${ + pkgs.writeText "kube-auth-policy.jsonl" + (concatMapStringsSep "\n" (l: builtins.toJSON l) cfg.authorizationPolicy) + }" + } \ + ${optionalString (elem "Webhook" cfg.authorizationMode) + "--authorization-webhook-config-file=${cfg.webhookConfig}" + } \ + --bind-address=${cfg.bindAddress} \ + ${optionalString (cfg.advertiseAddress != null) + "--advertise-address=${cfg.advertiseAddress}"} \ + ${optionalString (cfg.clientCaFile != null) + "--client-ca-file=${cfg.clientCaFile}"} \ + --disable-admission-plugins=${concatStringsSep "," cfg.disableAdmissionPlugins} \ + --enable-admission-plugins=${concatStringsSep "," cfg.enableAdmissionPlugins} \ + --etcd-servers=${concatStringsSep "," cfg.etcd.servers} \ + ${optionalString (cfg.etcd.caFile != null) + "--etcd-cafile=${cfg.etcd.caFile}"} \ + ${optionalString (cfg.etcd.certFile != null) + "--etcd-certfile=${cfg.etcd.certFile}"} \ + ${optionalString (cfg.etcd.keyFile != null) + "--etcd-keyfile=${cfg.etcd.keyFile}"} \ + ${optionalString (cfg.featureGates != []) + "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.featureGates}"} \ + ${optionalString (cfg.basicAuthFile != null) + "--basic-auth-file=${cfg.basicAuthFile}"} \ + ${optionalString (cfg.kubeletClientCaFile != null) + "--kubelet-certificate-authority=${cfg.kubeletClientCaFile}"} \ + ${optionalString (cfg.kubeletClientCertFile != null) + "--kubelet-client-certificate=${cfg.kubeletClientCertFile}"} \ + ${optionalString (cfg.kubeletClientKeyFile != null) + "--kubelet-client-key=${cfg.kubeletClientKeyFile}"} \ + ${optionalString (cfg.preferredAddressTypes != null) + "--kubelet-preferred-address-types=${cfg.preferredAddressTypes}"} \ + ${optionalString (cfg.proxyClientCertFile != null) + "--proxy-client-cert-file=${cfg.proxyClientCertFile}"} \ + ${optionalString (cfg.proxyClientKeyFile != null) + "--proxy-client-key-file=${cfg.proxyClientKeyFile}"} \ + --insecure-bind-address=${cfg.insecureBindAddress} \ + --insecure-port=${toString cfg.insecurePort} \ + ${optionalString (cfg.runtimeConfig != "") + "--runtime-config=${cfg.runtimeConfig}"} \ + --secure-port=${toString cfg.securePort} \ + --api-audiences=${toString cfg.apiAudiences} \ + --service-account-issuer=${toString cfg.serviceAccountIssuer} \ + --service-account-signing-key-file=${cfg.serviceAccountSigningKeyFile} \ + --service-account-key-file=${cfg.serviceAccountKeyFile} \ + --service-cluster-ip-range=${cfg.serviceClusterIpRange} \ + --storage-backend=${cfg.storageBackend} \ + ${optionalString (cfg.tlsCertFile != null) + "--tls-cert-file=${cfg.tlsCertFile}"} \ + ${optionalString (cfg.tlsKeyFile != null) + "--tls-private-key-file=${cfg.tlsKeyFile}"} \ + ${optionalString (cfg.tokenAuthFile != null) + "--token-auth-file=${cfg.tokenAuthFile}"} \ + ${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \ + ${cfg.extraOpts} + ''; + WorkingDirectory = top.dataDir; + User = "kubernetes"; + Group = "kubernetes"; + AmbientCapabilities = "cap_net_bind_service"; + Restart = "on-failure"; + RestartSec = 5; + }; + + unitConfig = { + StartLimitIntervalSec = 0; + }; + }; + + services.etcd = { + clientCertAuth = mkDefault true; + peerClientCertAuth = mkDefault true; + listenClientUrls = mkDefault ["https://0.0.0.0:2379"]; + listenPeerUrls = mkDefault ["https://0.0.0.0:2380"]; + advertiseClientUrls = mkDefault ["https://${top.masterAddress}:2379"]; + initialCluster = mkDefault ["${top.masterAddress}=https://${top.masterAddress}:2380"]; + name = mkDefault top.masterAddress; + initialAdvertisePeerUrls = mkDefault ["https://${top.masterAddress}:2380"]; + }; + + services.kubernetes.addonManager.bootstrapAddons = mkIf isRBACEnabled { + + apiserver-kubelet-api-admin-crb = { + apiVersion = "rbac.authorization.k8s.io/v1"; + kind = "ClusterRoleBinding"; + metadata = { + name = "system:kube-apiserver:kubelet-api-admin"; + }; + roleRef = { + apiGroup = "rbac.authorization.k8s.io"; + kind = "ClusterRole"; + name = "system:kubelet-api-admin"; + }; + subjects = [{ + kind = "User"; + name = "system:kube-apiserver"; + }]; + }; + + }; + + services.kubernetes.pki.certs = with top.lib; { + apiServer = mkCert { + name = "kube-apiserver"; + CN = "kubernetes"; + hosts = [ + "kubernetes.default.svc" + "kubernetes.default.svc.${top.addons.dns.clusterDomain}" + cfg.advertiseAddress + top.masterAddress + apiserverServiceIP + "127.0.0.1" + ] ++ cfg.extraSANs; + action = "systemctl restart kube-apiserver.service"; + }; + apiserverProxyClient = mkCert { + name = "kube-apiserver-proxy-client"; + CN = "front-proxy-client"; + action = "systemctl restart kube-apiserver.service"; + }; + apiserverKubeletClient = mkCert { + name = "kube-apiserver-kubelet-client"; + CN = "system:kube-apiserver"; + action = "systemctl restart kube-apiserver.service"; + }; + apiserverEtcdClient = mkCert { + name = "kube-apiserver-etcd-client"; + CN = "etcd-client"; + action = "systemctl restart kube-apiserver.service"; + }; + clusterAdmin = mkCert { + name = "cluster-admin"; + CN = "cluster-admin"; + fields = { + O = "system:masters"; + }; + privateKeyOwner = "root"; + }; + etcd = mkCert { + name = "etcd"; + CN = top.masterAddress; + hosts = [ + "etcd.local" + "etcd.${top.addons.dns.clusterDomain}" + top.masterAddress + cfg.advertiseAddress + ]; + privateKeyOwner = "etcd"; + action = "systemctl restart etcd.service"; + }; + }; + + }) + + ]; + + meta.buildDocsInSandbox = false; +} diff --git a/nixos/modules/services/cluster/kubernetes/controller-manager.nix b/nixos/modules/services/cluster/kubernetes/controller-manager.nix new file mode 100644 index 00000000000..7c317e94dee --- /dev/null +++ b/nixos/modules/services/cluster/kubernetes/controller-manager.nix @@ -0,0 +1,176 @@ +{ config, lib, options, pkgs, ... }: + +with lib; + +let + top = config.services.kubernetes; + otop = options.services.kubernetes; + cfg = top.controllerManager; +in +{ + imports = [ + (mkRenamedOptionModule [ "services" "kubernetes" "controllerManager" "address" ] ["services" "kubernetes" "controllerManager" "bindAddress"]) + (mkRenamedOptionModule [ "services" "kubernetes" "controllerManager" "port" ] ["services" "kubernetes" "controllerManager" "insecurePort"]) + ]; + + ###### interface + options.services.kubernetes.controllerManager = with lib.types; { + + allocateNodeCIDRs = mkOption { + description = "Whether to automatically allocate CIDR ranges for cluster nodes."; + default = true; + type = bool; + }; + + bindAddress = mkOption { + description = "Kubernetes controller manager listening address."; + default = "127.0.0.1"; + type = str; + }; + + clusterCidr = mkOption { + description = "Kubernetes CIDR Range for Pods in cluster."; + default = top.clusterCidr; + defaultText = literalExpression "config.${otop.clusterCidr}"; + type = str; + }; + + enable = mkEnableOption "Kubernetes controller manager"; + + extraOpts = mkOption { + description = "Kubernetes controller manager extra command line options."; + default = ""; + type = separatedString " "; + }; + + featureGates = mkOption { + description = "List set of feature gates"; + default = top.featureGates; + defaultText = literalExpression "config.${otop.featureGates}"; + type = listOf str; + }; + + insecurePort = mkOption { + description = "Kubernetes controller manager insecure listening port."; + default = 0; + type = int; + }; + + kubeconfig = top.lib.mkKubeConfigOptions "Kubernetes controller manager"; + + leaderElect = mkOption { + description = "Whether to start leader election before executing main loop."; + type = bool; + default = true; + }; + + rootCaFile = mkOption { + description = '' + Kubernetes controller manager certificate authority file included in + service account's token secret. + ''; + default = top.caFile; + defaultText = literalExpression "config.${otop.caFile}"; + type = nullOr path; + }; + + securePort = mkOption { + description = "Kubernetes controller manager secure listening port."; + default = 10252; + type = int; + }; + + serviceAccountKeyFile = mkOption { + description = '' + Kubernetes controller manager PEM-encoded private RSA key file used to + sign service account tokens + ''; + default = null; + type = nullOr path; + }; + + tlsCertFile = mkOption { + description = "Kubernetes controller-manager certificate file."; + default = null; + type = nullOr path; + }; + + tlsKeyFile = mkOption { + description = "Kubernetes controller-manager private key file."; + default = null; + type = nullOr path; + }; + + verbosity = mkOption { + description = '' + Optional glog verbosity level for logging statements. See + <link xlink:href="https://github.com/kubernetes/community/blob/master/contributors/devel/logging.md"/> + ''; + default = null; + type = nullOr int; + }; + + }; + + ###### implementation + config = mkIf cfg.enable { + systemd.services.kube-controller-manager = { + description = "Kubernetes Controller Manager Service"; + wantedBy = [ "kubernetes.target" ]; + after = [ "kube-apiserver.service" ]; + serviceConfig = { + RestartSec = "30s"; + Restart = "on-failure"; + Slice = "kubernetes.slice"; + ExecStart = ''${top.package}/bin/kube-controller-manager \ + --allocate-node-cidrs=${boolToString cfg.allocateNodeCIDRs} \ + --bind-address=${cfg.bindAddress} \ + ${optionalString (cfg.clusterCidr!=null) + "--cluster-cidr=${cfg.clusterCidr}"} \ + ${optionalString (cfg.featureGates != []) + "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.featureGates}"} \ + --kubeconfig=${top.lib.mkKubeConfig "kube-controller-manager" cfg.kubeconfig} \ + --leader-elect=${boolToString cfg.leaderElect} \ + ${optionalString (cfg.rootCaFile!=null) + "--root-ca-file=${cfg.rootCaFile}"} \ + --port=${toString cfg.insecurePort} \ + --secure-port=${toString cfg.securePort} \ + ${optionalString (cfg.serviceAccountKeyFile!=null) + "--service-account-private-key-file=${cfg.serviceAccountKeyFile}"} \ + ${optionalString (cfg.tlsCertFile!=null) + "--tls-cert-file=${cfg.tlsCertFile}"} \ + ${optionalString (cfg.tlsKeyFile!=null) + "--tls-private-key-file=${cfg.tlsKeyFile}"} \ + ${optionalString (elem "RBAC" top.apiserver.authorizationMode) + "--use-service-account-credentials"} \ + ${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \ + ${cfg.extraOpts} + ''; + WorkingDirectory = top.dataDir; + User = "kubernetes"; + Group = "kubernetes"; + }; + unitConfig = { + StartLimitIntervalSec = 0; + }; + path = top.path; + }; + + services.kubernetes.pki.certs = with top.lib; { + controllerManager = mkCert { + name = "kube-controller-manager"; + CN = "kube-controller-manager"; + action = "systemctl restart kube-controller-manager.service"; + }; + controllerManagerClient = mkCert { + name = "kube-controller-manager-client"; + CN = "system:kube-controller-manager"; + action = "systemctl restart kube-controller-manager.service"; + }; + }; + + services.kubernetes.controllerManager.kubeconfig.server = mkDefault top.apiserverAddress; + }; + + meta.buildDocsInSandbox = false; +} diff --git a/nixos/modules/services/cluster/kubernetes/default.nix b/nixos/modules/services/cluster/kubernetes/default.nix new file mode 100644 index 00000000000..35ec99d83c8 --- /dev/null +++ b/nixos/modules/services/cluster/kubernetes/default.nix @@ -0,0 +1,315 @@ +{ config, lib, options, pkgs, ... }: + +with lib; + +let + cfg = config.services.kubernetes; + opt = options.services.kubernetes; + + defaultContainerdSettings = { + version = 2; + root = "/var/lib/containerd"; + state = "/run/containerd"; + oom_score = 0; + + grpc = { + address = "/run/containerd/containerd.sock"; + }; + + plugins."io.containerd.grpc.v1.cri" = { + sandbox_image = "pause:latest"; + + cni = { + bin_dir = "/opt/cni/bin"; + max_conf_num = 0; + }; + + containerd.runtimes.runc = { + runtime_type = "io.containerd.runc.v2"; + options.SystemdCgroup = true; + }; + }; + }; + + mkKubeConfig = name: conf: pkgs.writeText "${name}-kubeconfig" (builtins.toJSON { + apiVersion = "v1"; + kind = "Config"; + clusters = [{ + name = "local"; + cluster.certificate-authority = conf.caFile or cfg.caFile; + cluster.server = conf.server; + }]; + users = [{ + inherit name; + user = { + client-certificate = conf.certFile; + client-key = conf.keyFile; + }; + }]; + contexts = [{ + context = { + cluster = "local"; + user = name; + }; + name = "local"; + }]; + current-context = "local"; + }); + + caCert = secret "ca"; + + etcdEndpoints = ["https://${cfg.masterAddress}:2379"]; + + mkCert = { name, CN, hosts ? [], fields ? {}, action ? "", + privateKeyOwner ? "kubernetes" }: rec { + inherit name caCert CN hosts fields action; + cert = secret name; + key = secret "${name}-key"; + privateKeyOptions = { + owner = privateKeyOwner; + group = "nogroup"; + mode = "0600"; + path = key; + }; + }; + + secret = name: "${cfg.secretsPath}/${name}.pem"; + + mkKubeConfigOptions = prefix: { + server = mkOption { + description = "${prefix} kube-apiserver server address."; + type = types.str; + }; + + caFile = mkOption { + description = "${prefix} certificate authority file used to connect to kube-apiserver."; + type = types.nullOr types.path; + default = cfg.caFile; + defaultText = literalExpression "config.${opt.caFile}"; + }; + + certFile = mkOption { + description = "${prefix} client certificate file used to connect to kube-apiserver."; + type = types.nullOr types.path; + default = null; + }; + + keyFile = mkOption { + description = "${prefix} client key file used to connect to kube-apiserver."; + type = types.nullOr types.path; + default = null; + }; + }; +in { + + imports = [ + (mkRemovedOptionModule [ "services" "kubernetes" "addons" "dashboard" ] "Removed due to it being an outdated version") + (mkRemovedOptionModule [ "services" "kubernetes" "verbose" ] "") + ]; + + ###### interface + + options.services.kubernetes = { + roles = mkOption { + description = '' + Kubernetes role that this machine should take. + + Master role will enable etcd, apiserver, scheduler, controller manager + addon manager, flannel and proxy services. + Node role will enable flannel, docker, kubelet and proxy services. + ''; + default = []; + type = types.listOf (types.enum ["master" "node"]); + }; + + package = mkOption { + description = "Kubernetes package to use."; + type = types.package; + default = pkgs.kubernetes; + defaultText = literalExpression "pkgs.kubernetes"; + }; + + kubeconfig = mkKubeConfigOptions "Default kubeconfig"; + + apiserverAddress = mkOption { + description = '' + Clusterwide accessible address for the kubernetes apiserver, + including protocol and optional port. + ''; + example = "https://kubernetes-apiserver.example.com:6443"; + type = types.str; + }; + + caFile = mkOption { + description = "Default kubernetes certificate authority"; + type = types.nullOr types.path; + default = null; + }; + + dataDir = mkOption { + description = "Kubernetes root directory for managing kubelet files."; + default = "/var/lib/kubernetes"; + type = types.path; + }; + + easyCerts = mkOption { + description = "Automatically setup x509 certificates and keys for the entire cluster."; + default = false; + type = types.bool; + }; + + featureGates = mkOption { + description = "List set of feature gates."; + default = []; + type = types.listOf types.str; + }; + + masterAddress = mkOption { + description = "Clusterwide available network address or hostname for the kubernetes master server."; + example = "master.example.com"; + type = types.str; + }; + + path = mkOption { + description = "Packages added to the services' PATH environment variable. Both the bin and sbin subdirectories of each package are added."; + type = types.listOf types.package; + default = []; + }; + + clusterCidr = mkOption { + description = "Kubernetes controller manager and proxy CIDR Range for Pods in cluster."; + default = "10.1.0.0/16"; + type = types.nullOr types.str; + }; + + lib = mkOption { + description = "Common functions for the kubernetes modules."; + default = { + inherit mkCert; + inherit mkKubeConfig; + inherit mkKubeConfigOptions; + }; + type = types.attrs; + }; + + secretsPath = mkOption { + description = "Default location for kubernetes secrets. Not a store location."; + type = types.path; + default = cfg.dataDir + "/secrets"; + defaultText = literalExpression '' + config.${opt.dataDir} + "/secrets" + ''; + }; + }; + + ###### implementation + + config = mkMerge [ + + (mkIf cfg.easyCerts { + services.kubernetes.pki.enable = mkDefault true; + services.kubernetes.caFile = caCert; + }) + + (mkIf (elem "master" cfg.roles) { + services.kubernetes.apiserver.enable = mkDefault true; + services.kubernetes.scheduler.enable = mkDefault true; + services.kubernetes.controllerManager.enable = mkDefault true; + services.kubernetes.addonManager.enable = mkDefault true; + services.kubernetes.proxy.enable = mkDefault true; + services.etcd.enable = true; # Cannot mkDefault because of flannel default options + services.kubernetes.kubelet = { + enable = mkDefault true; + taints = mkIf (!(elem "node" cfg.roles)) { + master = { + key = "node-role.kubernetes.io/master"; + value = "true"; + effect = "NoSchedule"; + }; + }; + }; + }) + + + (mkIf (all (el: el == "master") cfg.roles) { + # if this node is only a master make it unschedulable by default + services.kubernetes.kubelet.unschedulable = mkDefault true; + }) + + (mkIf (elem "node" cfg.roles) { + services.kubernetes.kubelet.enable = mkDefault true; + services.kubernetes.proxy.enable = mkDefault true; + }) + + # Using "services.kubernetes.roles" will automatically enable easyCerts and flannel + (mkIf (cfg.roles != []) { + services.kubernetes.flannel.enable = mkDefault true; + services.flannel.etcd.endpoints = mkDefault etcdEndpoints; + services.kubernetes.easyCerts = mkDefault true; + }) + + (mkIf cfg.apiserver.enable { + services.kubernetes.pki.etcClusterAdminKubeconfig = mkDefault "kubernetes/cluster-admin.kubeconfig"; + services.kubernetes.apiserver.etcd.servers = mkDefault etcdEndpoints; + }) + + (mkIf cfg.kubelet.enable { + virtualisation.containerd = { + enable = mkDefault true; + settings = mapAttrsRecursive (name: mkDefault) defaultContainerdSettings; + }; + }) + + (mkIf (cfg.apiserver.enable || cfg.controllerManager.enable) { + services.kubernetes.pki.certs = { + serviceAccount = mkCert { + name = "service-account"; + CN = "system:service-account-signer"; + action = '' + systemctl reload \ + kube-apiserver.service \ + kube-controller-manager.service + ''; + }; + }; + }) + + (mkIf ( + cfg.apiserver.enable || + cfg.scheduler.enable || + cfg.controllerManager.enable || + cfg.kubelet.enable || + cfg.proxy.enable || + cfg.addonManager.enable + ) { + systemd.targets.kubernetes = { + description = "Kubernetes"; + wantedBy = [ "multi-user.target" ]; + }; + + systemd.tmpfiles.rules = [ + "d /opt/cni/bin 0755 root root -" + "d /run/kubernetes 0755 kubernetes kubernetes -" + "d /var/lib/kubernetes 0755 kubernetes kubernetes -" + ]; + + users.users.kubernetes = { + uid = config.ids.uids.kubernetes; + description = "Kubernetes user"; + group = "kubernetes"; + home = cfg.dataDir; + createHome = true; + }; + users.groups.kubernetes.gid = config.ids.gids.kubernetes; + + # dns addon is enabled by default + services.kubernetes.addons.dns.enable = mkDefault true; + + services.kubernetes.apiserverAddress = mkDefault ("https://${if cfg.apiserver.advertiseAddress != null + then cfg.apiserver.advertiseAddress + else "${cfg.masterAddress}:${toString cfg.apiserver.securePort}"}"); + }) + ]; + + meta.buildDocsInSandbox = false; +} diff --git a/nixos/modules/services/cluster/kubernetes/flannel.nix b/nixos/modules/services/cluster/kubernetes/flannel.nix new file mode 100644 index 00000000000..cb81eaaf016 --- /dev/null +++ b/nixos/modules/services/cluster/kubernetes/flannel.nix @@ -0,0 +1,100 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + top = config.services.kubernetes; + cfg = top.flannel; + + # we want flannel to use kubernetes itself as configuration backend, not direct etcd + storageBackend = "kubernetes"; +in +{ + ###### interface + options.services.kubernetes.flannel = { + enable = mkEnableOption "enable flannel networking"; + }; + + ###### implementation + config = mkIf cfg.enable { + services.flannel = { + + enable = mkDefault true; + network = mkDefault top.clusterCidr; + inherit storageBackend; + nodeName = config.services.kubernetes.kubelet.hostname; + }; + + services.kubernetes.kubelet = { + networkPlugin = mkDefault "cni"; + cni.config = mkDefault [{ + name = "mynet"; + type = "flannel"; + cniVersion = "0.3.1"; + delegate = { + isDefaultGateway = true; + bridge = "mynet"; + }; + }]; + }; + + networking = { + firewall.allowedUDPPorts = [ + 8285 # flannel udp + 8472 # flannel vxlan + ]; + dhcpcd.denyInterfaces = [ "mynet*" "flannel*" ]; + }; + + services.kubernetes.pki.certs = { + flannelClient = top.lib.mkCert { + name = "flannel-client"; + CN = "flannel-client"; + action = "systemctl restart flannel.service"; + }; + }; + + # give flannel som kubernetes rbac permissions if applicable + services.kubernetes.addonManager.bootstrapAddons = mkIf ((storageBackend == "kubernetes") && (elem "RBAC" top.apiserver.authorizationMode)) { + + flannel-cr = { + apiVersion = "rbac.authorization.k8s.io/v1"; + kind = "ClusterRole"; + metadata = { name = "flannel"; }; + rules = [{ + apiGroups = [ "" ]; + resources = [ "pods" ]; + verbs = [ "get" ]; + } + { + apiGroups = [ "" ]; + resources = [ "nodes" ]; + verbs = [ "list" "watch" ]; + } + { + apiGroups = [ "" ]; + resources = [ "nodes/status" ]; + verbs = [ "patch" ]; + }]; + }; + + flannel-crb = { + apiVersion = "rbac.authorization.k8s.io/v1"; + kind = "ClusterRoleBinding"; + metadata = { name = "flannel"; }; + roleRef = { + apiGroup = "rbac.authorization.k8s.io"; + kind = "ClusterRole"; + name = "flannel"; + }; + subjects = [{ + kind = "User"; + name = "flannel-client"; + }]; + }; + + }; + }; + + meta.buildDocsInSandbox = false; +} diff --git a/nixos/modules/services/cluster/kubernetes/kubelet.nix b/nixos/modules/services/cluster/kubernetes/kubelet.nix new file mode 100644 index 00000000000..af3a5062feb --- /dev/null +++ b/nixos/modules/services/cluster/kubernetes/kubelet.nix @@ -0,0 +1,398 @@ +{ config, lib, options, pkgs, ... }: + +with lib; + +let + top = config.services.kubernetes; + otop = options.services.kubernetes; + cfg = top.kubelet; + + cniConfig = + if cfg.cni.config != [] && cfg.cni.configDir != null then + throw "Verbatim CNI-config and CNI configDir cannot both be set." + else if cfg.cni.configDir != null then + cfg.cni.configDir + else + (pkgs.buildEnv { + name = "kubernetes-cni-config"; + paths = imap (i: entry: + pkgs.writeTextDir "${toString (10+i)}-${entry.type}.conf" (builtins.toJSON entry) + ) cfg.cni.config; + }); + + infraContainer = pkgs.dockerTools.buildImage { + name = "pause"; + tag = "latest"; + contents = top.package.pause; + config.Cmd = ["/bin/pause"]; + }; + + kubeconfig = top.lib.mkKubeConfig "kubelet" cfg.kubeconfig; + + manifestPath = "kubernetes/manifests"; + + taintOptions = with lib.types; { name, ... }: { + options = { + key = mkOption { + description = "Key of taint."; + default = name; + defaultText = literalDocBook "Name of this submodule."; + type = str; + }; + value = mkOption { + description = "Value of taint."; + type = str; + }; + effect = mkOption { + description = "Effect of taint."; + example = "NoSchedule"; + type = enum ["NoSchedule" "PreferNoSchedule" "NoExecute"]; + }; + }; + }; + + taints = concatMapStringsSep "," (v: "${v.key}=${v.value}:${v.effect}") (mapAttrsToList (n: v: v) cfg.taints); +in +{ + imports = [ + (mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "applyManifests" ] "") + (mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "cadvisorPort" ] "") + (mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "allowPrivileged" ] "") + ]; + + ###### interface + options.services.kubernetes.kubelet = with lib.types; { + + address = mkOption { + description = "Kubernetes kubelet info server listening address."; + default = "0.0.0.0"; + type = str; + }; + + clusterDns = mkOption { + description = "Use alternative DNS."; + default = "10.1.0.1"; + type = str; + }; + + clusterDomain = mkOption { + description = "Use alternative domain."; + default = config.services.kubernetes.addons.dns.clusterDomain; + defaultText = literalExpression "config.${options.services.kubernetes.addons.dns.clusterDomain}"; + type = str; + }; + + clientCaFile = mkOption { + description = "Kubernetes apiserver CA file for client authentication."; + default = top.caFile; + defaultText = literalExpression "config.${otop.caFile}"; + type = nullOr path; + }; + + cni = { + packages = mkOption { + description = "List of network plugin packages to install."; + type = listOf package; + default = []; + }; + + config = mkOption { + description = "Kubernetes CNI configuration."; + type = listOf attrs; + default = []; + example = literalExpression '' + [{ + "cniVersion": "0.3.1", + "name": "mynet", + "type": "bridge", + "bridge": "cni0", + "isGateway": true, + "ipMasq": true, + "ipam": { + "type": "host-local", + "subnet": "10.22.0.0/16", + "routes": [ + { "dst": "0.0.0.0/0" } + ] + } + } { + "cniVersion": "0.3.1", + "type": "loopback" + }] + ''; + }; + + configDir = mkOption { + description = "Path to Kubernetes CNI configuration directory."; + type = nullOr path; + default = null; + }; + }; + + containerRuntime = mkOption { + description = "Which container runtime type to use"; + type = enum ["docker" "remote"]; + default = "remote"; + }; + + containerRuntimeEndpoint = mkOption { + description = "Endpoint at which to find the container runtime api interface/socket"; + type = str; + default = "unix:///run/containerd/containerd.sock"; + }; + + enable = mkEnableOption "Kubernetes kubelet."; + + extraOpts = mkOption { + description = "Kubernetes kubelet extra command line options."; + default = ""; + type = separatedString " "; + }; + + featureGates = mkOption { + description = "List set of feature gates"; + default = top.featureGates; + defaultText = literalExpression "config.${otop.featureGates}"; + type = listOf str; + }; + + healthz = { + bind = mkOption { + description = "Kubernetes kubelet healthz listening address."; + default = "127.0.0.1"; + type = str; + }; + + port = mkOption { + description = "Kubernetes kubelet healthz port."; + default = 10248; + type = int; + }; + }; + + hostname = mkOption { + description = "Kubernetes kubelet hostname override."; + default = config.networking.hostName; + defaultText = literalExpression "config.networking.hostName"; + type = str; + }; + + kubeconfig = top.lib.mkKubeConfigOptions "Kubelet"; + + manifests = mkOption { + description = "List of manifests to bootstrap with kubelet (only pods can be created as manifest entry)"; + type = attrsOf attrs; + default = {}; + }; + + networkPlugin = mkOption { + description = "Network plugin to use by Kubernetes."; + type = nullOr (enum ["cni" "kubenet"]); + default = "kubenet"; + }; + + nodeIp = mkOption { + description = "IP address of the node. If set, kubelet will use this IP address for the node."; + default = null; + type = nullOr str; + }; + + registerNode = mkOption { + description = "Whether to auto register kubelet with API server."; + default = true; + type = bool; + }; + + port = mkOption { + description = "Kubernetes kubelet info server listening port."; + default = 10250; + type = int; + }; + + seedDockerImages = mkOption { + description = "List of docker images to preload on system"; + default = []; + type = listOf package; + }; + + taints = mkOption { + description = "Node taints (https://kubernetes.io/docs/concepts/configuration/assign-pod-node/)."; + default = {}; + type = attrsOf (submodule [ taintOptions ]); + }; + + tlsCertFile = mkOption { + description = "File containing x509 Certificate for HTTPS."; + default = null; + type = nullOr path; + }; + + tlsKeyFile = mkOption { + description = "File containing x509 private key matching tlsCertFile."; + default = null; + type = nullOr path; + }; + + unschedulable = mkOption { + description = "Whether to set node taint to unschedulable=true as it is the case of node that has only master role."; + default = false; + type = bool; + }; + + verbosity = mkOption { + description = '' + Optional glog verbosity level for logging statements. See + <link xlink:href="https://github.com/kubernetes/community/blob/master/contributors/devel/logging.md"/> + ''; + default = null; + type = nullOr int; + }; + + }; + + ###### implementation + config = mkMerge [ + (mkIf cfg.enable { + + environment.etc."cni/net.d".source = cniConfig; + + services.kubernetes.kubelet.seedDockerImages = [infraContainer]; + + boot.kernel.sysctl = { + "net.bridge.bridge-nf-call-iptables" = 1; + "net.ipv4.ip_forward" = 1; + "net.bridge.bridge-nf-call-ip6tables" = 1; + }; + + systemd.services.kubelet = { + description = "Kubernetes Kubelet Service"; + wantedBy = [ "kubernetes.target" ]; + after = [ "containerd.service" "network.target" "kube-apiserver.service" ]; + path = with pkgs; [ + gitMinimal + openssh + util-linux + iproute2 + ethtool + thin-provisioning-tools + iptables + socat + ] ++ lib.optional config.boot.zfs.enabled config.boot.zfs.package ++ top.path; + preStart = '' + ${concatMapStrings (img: '' + echo "Seeding container image: ${img}" + ${if (lib.hasSuffix "gz" img) then + ''${pkgs.gzip}/bin/zcat "${img}" | ${pkgs.containerd}/bin/ctr -n k8s.io image import --all-platforms -'' + else + ''${pkgs.coreutils}/bin/cat "${img}" | ${pkgs.containerd}/bin/ctr -n k8s.io image import --all-platforms -'' + } + '') cfg.seedDockerImages} + + rm /opt/cni/bin/* || true + ${concatMapStrings (package: '' + echo "Linking cni package: ${package}" + ln -fs ${package}/bin/* /opt/cni/bin + '') cfg.cni.packages} + ''; + serviceConfig = { + Slice = "kubernetes.slice"; + CPUAccounting = true; + MemoryAccounting = true; + Restart = "on-failure"; + RestartSec = "1000ms"; + ExecStart = ''${top.package}/bin/kubelet \ + --address=${cfg.address} \ + --authentication-token-webhook \ + --authentication-token-webhook-cache-ttl="10s" \ + --authorization-mode=Webhook \ + ${optionalString (cfg.clientCaFile != null) + "--client-ca-file=${cfg.clientCaFile}"} \ + ${optionalString (cfg.clusterDns != "") + "--cluster-dns=${cfg.clusterDns}"} \ + ${optionalString (cfg.clusterDomain != "") + "--cluster-domain=${cfg.clusterDomain}"} \ + --cni-conf-dir=${cniConfig} \ + ${optionalString (cfg.featureGates != []) + "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.featureGates}"} \ + --hairpin-mode=hairpin-veth \ + --healthz-bind-address=${cfg.healthz.bind} \ + --healthz-port=${toString cfg.healthz.port} \ + --hostname-override=${cfg.hostname} \ + --kubeconfig=${kubeconfig} \ + ${optionalString (cfg.networkPlugin != null) + "--network-plugin=${cfg.networkPlugin}"} \ + ${optionalString (cfg.nodeIp != null) + "--node-ip=${cfg.nodeIp}"} \ + --pod-infra-container-image=pause \ + ${optionalString (cfg.manifests != {}) + "--pod-manifest-path=/etc/${manifestPath}"} \ + --port=${toString cfg.port} \ + --register-node=${boolToString cfg.registerNode} \ + ${optionalString (taints != "") + "--register-with-taints=${taints}"} \ + --root-dir=${top.dataDir} \ + ${optionalString (cfg.tlsCertFile != null) + "--tls-cert-file=${cfg.tlsCertFile}"} \ + ${optionalString (cfg.tlsKeyFile != null) + "--tls-private-key-file=${cfg.tlsKeyFile}"} \ + ${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \ + --container-runtime=${cfg.containerRuntime} \ + --container-runtime-endpoint=${cfg.containerRuntimeEndpoint} \ + --cgroup-driver=systemd \ + ${cfg.extraOpts} + ''; + WorkingDirectory = top.dataDir; + }; + unitConfig = { + StartLimitIntervalSec = 0; + }; + }; + + # Allways include cni plugins + services.kubernetes.kubelet.cni.packages = [pkgs.cni-plugins pkgs.cni-plugin-flannel]; + + boot.kernelModules = ["br_netfilter" "overlay"]; + + services.kubernetes.kubelet.hostname = with config.networking; + mkDefault (hostName + optionalString (domain != null) ".${domain}"); + + services.kubernetes.pki.certs = with top.lib; { + kubelet = mkCert { + name = "kubelet"; + CN = top.kubelet.hostname; + action = "systemctl restart kubelet.service"; + + }; + kubeletClient = mkCert { + name = "kubelet-client"; + CN = "system:node:${top.kubelet.hostname}"; + fields = { + O = "system:nodes"; + }; + action = "systemctl restart kubelet.service"; + }; + }; + + services.kubernetes.kubelet.kubeconfig.server = mkDefault top.apiserverAddress; + }) + + (mkIf (cfg.enable && cfg.manifests != {}) { + environment.etc = mapAttrs' (name: manifest: + nameValuePair "${manifestPath}/${name}.json" { + text = builtins.toJSON manifest; + mode = "0755"; + } + ) cfg.manifests; + }) + + (mkIf (cfg.unschedulable && cfg.enable) { + services.kubernetes.kubelet.taints.unschedulable = { + value = "true"; + effect = "NoSchedule"; + }; + }) + + ]; + + meta.buildDocsInSandbox = false; +} diff --git a/nixos/modules/services/cluster/kubernetes/pki.nix b/nixos/modules/services/cluster/kubernetes/pki.nix new file mode 100644 index 00000000000..7d9198d20e8 --- /dev/null +++ b/nixos/modules/services/cluster/kubernetes/pki.nix @@ -0,0 +1,406 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + top = config.services.kubernetes; + cfg = top.pki; + + csrCA = pkgs.writeText "kube-pki-cacert-csr.json" (builtins.toJSON { + key = { + algo = "rsa"; + size = 2048; + }; + names = singleton cfg.caSpec; + }); + + csrCfssl = pkgs.writeText "kube-pki-cfssl-csr.json" (builtins.toJSON { + key = { + algo = "rsa"; + size = 2048; + }; + CN = top.masterAddress; + hosts = [top.masterAddress] ++ cfg.cfsslAPIExtraSANs; + }); + + cfsslAPITokenBaseName = "apitoken.secret"; + cfsslAPITokenPath = "${config.services.cfssl.dataDir}/${cfsslAPITokenBaseName}"; + certmgrAPITokenPath = "${top.secretsPath}/${cfsslAPITokenBaseName}"; + cfsslAPITokenLength = 32; + + clusterAdminKubeconfig = with cfg.certs.clusterAdmin; + top.lib.mkKubeConfig "cluster-admin" { + server = top.apiserverAddress; + certFile = cert; + keyFile = key; + }; + + remote = with config.services; "https://${kubernetes.masterAddress}:${toString cfssl.port}"; +in +{ + ###### interface + options.services.kubernetes.pki = with lib.types; { + + enable = mkEnableOption "easyCert issuer service"; + + certs = mkOption { + description = "List of certificate specs to feed to cert generator."; + default = {}; + type = attrs; + }; + + genCfsslCACert = mkOption { + description = '' + Whether to automatically generate cfssl CA certificate and key, + if they don't exist. + ''; + default = true; + type = bool; + }; + + genCfsslAPICerts = mkOption { + description = '' + Whether to automatically generate cfssl API webserver TLS cert and key, + if they don't exist. + ''; + default = true; + type = bool; + }; + + cfsslAPIExtraSANs = mkOption { + description = '' + Extra x509 Subject Alternative Names to be added to the cfssl API webserver TLS cert. + ''; + default = []; + example = [ "subdomain.example.com" ]; + type = listOf str; + }; + + genCfsslAPIToken = mkOption { + description = '' + Whether to automatically generate cfssl API-token secret, + if they doesn't exist. + ''; + default = true; + type = bool; + }; + + pkiTrustOnBootstrap = mkOption { + description = "Whether to always trust remote cfssl server upon initial PKI bootstrap."; + default = true; + type = bool; + }; + + caCertPathPrefix = mkOption { + description = '' + Path-prefrix for the CA-certificate to be used for cfssl signing. + Suffixes ".pem" and "-key.pem" will be automatically appended for + the public and private keys respectively. + ''; + default = "${config.services.cfssl.dataDir}/ca"; + defaultText = literalExpression ''"''${config.services.cfssl.dataDir}/ca"''; + type = str; + }; + + caSpec = mkOption { + description = "Certificate specification for the auto-generated CAcert."; + default = { + CN = "kubernetes-cluster-ca"; + O = "NixOS"; + OU = "services.kubernetes.pki.caSpec"; + L = "auto-generated"; + }; + type = attrs; + }; + + etcClusterAdminKubeconfig = mkOption { + description = '' + Symlink a kubeconfig with cluster-admin privileges to environment path + (/etc/<path>). + ''; + default = null; + type = nullOr str; + }; + + }; + + ###### implementation + config = mkIf cfg.enable + (let + cfsslCertPathPrefix = "${config.services.cfssl.dataDir}/cfssl"; + cfsslCert = "${cfsslCertPathPrefix}.pem"; + cfsslKey = "${cfsslCertPathPrefix}-key.pem"; + in + { + + services.cfssl = mkIf (top.apiserver.enable) { + enable = true; + address = "0.0.0.0"; + tlsCert = cfsslCert; + tlsKey = cfsslKey; + configFile = toString (pkgs.writeText "cfssl-config.json" (builtins.toJSON { + signing = { + profiles = { + default = { + usages = ["digital signature"]; + auth_key = "default"; + expiry = "720h"; + }; + }; + }; + auth_keys = { + default = { + type = "standard"; + key = "file:${cfsslAPITokenPath}"; + }; + }; + })); + }; + + systemd.services.cfssl.preStart = with pkgs; with config.services.cfssl; mkIf (top.apiserver.enable) + (concatStringsSep "\n" [ + "set -e" + (optionalString cfg.genCfsslCACert '' + if [ ! -f "${cfg.caCertPathPrefix}.pem" ]; then + ${cfssl}/bin/cfssl genkey -initca ${csrCA} | \ + ${cfssl}/bin/cfssljson -bare ${cfg.caCertPathPrefix} + fi + '') + (optionalString cfg.genCfsslAPICerts '' + if [ ! -f "${dataDir}/cfssl.pem" ]; then + ${cfssl}/bin/cfssl gencert -ca "${cfg.caCertPathPrefix}.pem" -ca-key "${cfg.caCertPathPrefix}-key.pem" ${csrCfssl} | \ + ${cfssl}/bin/cfssljson -bare ${cfsslCertPathPrefix} + fi + '') + (optionalString cfg.genCfsslAPIToken '' + if [ ! -f "${cfsslAPITokenPath}" ]; then + head -c ${toString (cfsslAPITokenLength / 2)} /dev/urandom | od -An -t x | tr -d ' ' >"${cfsslAPITokenPath}" + fi + chown cfssl "${cfsslAPITokenPath}" && chmod 400 "${cfsslAPITokenPath}" + '')]); + + systemd.services.kube-certmgr-bootstrap = { + description = "Kubernetes certmgr bootstrapper"; + wantedBy = [ "certmgr.service" ]; + after = [ "cfssl.target" ]; + script = concatStringsSep "\n" ['' + set -e + + # If there's a cfssl (cert issuer) running locally, then don't rely on user to + # manually paste it in place. Just symlink. + # otherwise, create the target file, ready for users to insert the token + + mkdir -p "$(dirname "${certmgrAPITokenPath}")" + if [ -f "${cfsslAPITokenPath}" ]; then + ln -fs "${cfsslAPITokenPath}" "${certmgrAPITokenPath}" + else + touch "${certmgrAPITokenPath}" && chmod 600 "${certmgrAPITokenPath}" + fi + '' + (optionalString (cfg.pkiTrustOnBootstrap) '' + if [ ! -f "${top.caFile}" ] || [ $(cat "${top.caFile}" | wc -c) -lt 1 ]; then + ${pkgs.curl}/bin/curl --fail-early -f -kd '{}' ${remote}/api/v1/cfssl/info | \ + ${pkgs.cfssl}/bin/cfssljson -stdout >${top.caFile} + fi + '') + ]; + serviceConfig = { + RestartSec = "10s"; + Restart = "on-failure"; + }; + }; + + services.certmgr = { + enable = true; + package = pkgs.certmgr-selfsigned; + svcManager = "command"; + specs = + let + mkSpec = _: cert: { + inherit (cert) action; + authority = { + inherit remote; + file.path = cert.caCert; + root_ca = cert.caCert; + profile = "default"; + auth_key_file = certmgrAPITokenPath; + }; + certificate = { + path = cert.cert; + }; + private_key = cert.privateKeyOptions; + request = { + hosts = [cert.CN] ++ cert.hosts; + inherit (cert) CN; + key = { + algo = "rsa"; + size = 2048; + }; + names = [ cert.fields ]; + }; + }; + in + mapAttrs mkSpec cfg.certs; + }; + + #TODO: Get rid of kube-addon-manager in the future for the following reasons + # - it is basically just a shell script wrapped around kubectl + # - it assumes that it is clusterAdmin or can gain clusterAdmin rights through serviceAccount + # - it is designed to be used with k8s system components only + # - it would be better with a more Nix-oriented way of managing addons + systemd.services.kube-addon-manager = mkIf top.addonManager.enable (mkMerge [{ + environment.KUBECONFIG = with cfg.certs.addonManager; + top.lib.mkKubeConfig "addon-manager" { + server = top.apiserverAddress; + certFile = cert; + keyFile = key; + }; + } + + (optionalAttrs (top.addonManager.bootstrapAddons != {}) { + serviceConfig.PermissionsStartOnly = true; + preStart = with pkgs; + let + files = mapAttrsToList (n: v: writeText "${n}.json" (builtins.toJSON v)) + top.addonManager.bootstrapAddons; + in + '' + export KUBECONFIG=${clusterAdminKubeconfig} + ${kubernetes}/bin/kubectl apply -f ${concatStringsSep " \\\n -f " files} + ''; + })]); + + environment.etc.${cfg.etcClusterAdminKubeconfig}.source = mkIf (!isNull cfg.etcClusterAdminKubeconfig) + clusterAdminKubeconfig; + + environment.systemPackages = mkIf (top.kubelet.enable || top.proxy.enable) [ + (pkgs.writeScriptBin "nixos-kubernetes-node-join" '' + set -e + exec 1>&2 + + if [ $# -gt 0 ]; then + echo "Usage: $(basename $0)" + echo "" + echo "No args. Apitoken must be provided on stdin." + echo "To get the apitoken, execute: 'sudo cat ${certmgrAPITokenPath}' on the master node." + exit 1 + fi + + if [ $(id -u) != 0 ]; then + echo "Run as root please." + exit 1 + fi + + read -r token + if [ ''${#token} != ${toString cfsslAPITokenLength} ]; then + echo "Token must be of length ${toString cfsslAPITokenLength}." + exit 1 + fi + + echo $token > ${certmgrAPITokenPath} + chmod 600 ${certmgrAPITokenPath} + + echo "Restarting certmgr..." >&1 + systemctl restart certmgr + + echo "Waiting for certs to appear..." >&1 + + ${optionalString top.kubelet.enable '' + while [ ! -f ${cfg.certs.kubelet.cert} ]; do sleep 1; done + echo "Restarting kubelet..." >&1 + systemctl restart kubelet + ''} + + ${optionalString top.proxy.enable '' + while [ ! -f ${cfg.certs.kubeProxyClient.cert} ]; do sleep 1; done + echo "Restarting kube-proxy..." >&1 + systemctl restart kube-proxy + ''} + + ${optionalString top.flannel.enable '' + while [ ! -f ${cfg.certs.flannelClient.cert} ]; do sleep 1; done + echo "Restarting flannel..." >&1 + systemctl restart flannel + ''} + + echo "Node joined succesfully" + '')]; + + # isolate etcd on loopback at the master node + # easyCerts doesn't support multimaster clusters anyway atm. + services.etcd = with cfg.certs.etcd; { + listenClientUrls = ["https://127.0.0.1:2379"]; + listenPeerUrls = ["https://127.0.0.1:2380"]; + advertiseClientUrls = ["https://etcd.local:2379"]; + initialCluster = ["${top.masterAddress}=https://etcd.local:2380"]; + initialAdvertisePeerUrls = ["https://etcd.local:2380"]; + certFile = mkDefault cert; + keyFile = mkDefault key; + trustedCaFile = mkDefault caCert; + }; + networking.extraHosts = mkIf (config.services.etcd.enable) '' + 127.0.0.1 etcd.${top.addons.dns.clusterDomain} etcd.local + ''; + + services.flannel = with cfg.certs.flannelClient; { + kubeconfig = top.lib.mkKubeConfig "flannel" { + server = top.apiserverAddress; + certFile = cert; + keyFile = key; + }; + }; + + services.kubernetes = { + + apiserver = mkIf top.apiserver.enable (with cfg.certs.apiServer; { + etcd = with cfg.certs.apiserverEtcdClient; { + servers = ["https://etcd.local:2379"]; + certFile = mkDefault cert; + keyFile = mkDefault key; + caFile = mkDefault caCert; + }; + clientCaFile = mkDefault caCert; + tlsCertFile = mkDefault cert; + tlsKeyFile = mkDefault key; + serviceAccountKeyFile = mkDefault cfg.certs.serviceAccount.cert; + serviceAccountSigningKeyFile = mkDefault cfg.certs.serviceAccount.key; + kubeletClientCaFile = mkDefault caCert; + kubeletClientCertFile = mkDefault cfg.certs.apiserverKubeletClient.cert; + kubeletClientKeyFile = mkDefault cfg.certs.apiserverKubeletClient.key; + proxyClientCertFile = mkDefault cfg.certs.apiserverProxyClient.cert; + proxyClientKeyFile = mkDefault cfg.certs.apiserverProxyClient.key; + }); + controllerManager = mkIf top.controllerManager.enable { + serviceAccountKeyFile = mkDefault cfg.certs.serviceAccount.key; + rootCaFile = cfg.certs.controllerManagerClient.caCert; + kubeconfig = with cfg.certs.controllerManagerClient; { + certFile = mkDefault cert; + keyFile = mkDefault key; + }; + }; + scheduler = mkIf top.scheduler.enable { + kubeconfig = with cfg.certs.schedulerClient; { + certFile = mkDefault cert; + keyFile = mkDefault key; + }; + }; + kubelet = mkIf top.kubelet.enable { + clientCaFile = mkDefault cfg.certs.kubelet.caCert; + tlsCertFile = mkDefault cfg.certs.kubelet.cert; + tlsKeyFile = mkDefault cfg.certs.kubelet.key; + kubeconfig = with cfg.certs.kubeletClient; { + certFile = mkDefault cert; + keyFile = mkDefault key; + }; + }; + proxy = mkIf top.proxy.enable { + kubeconfig = with cfg.certs.kubeProxyClient; { + certFile = mkDefault cert; + keyFile = mkDefault key; + }; + }; + }; + }); + + meta.buildDocsInSandbox = false; +} diff --git a/nixos/modules/services/cluster/kubernetes/proxy.nix b/nixos/modules/services/cluster/kubernetes/proxy.nix new file mode 100644 index 00000000000..0fd98d1c157 --- /dev/null +++ b/nixos/modules/services/cluster/kubernetes/proxy.nix @@ -0,0 +1,102 @@ +{ config, lib, options, pkgs, ... }: + +with lib; + +let + top = config.services.kubernetes; + otop = options.services.kubernetes; + cfg = top.proxy; +in +{ + imports = [ + (mkRenamedOptionModule [ "services" "kubernetes" "proxy" "address" ] ["services" "kubernetes" "proxy" "bindAddress"]) + ]; + + ###### interface + options.services.kubernetes.proxy = with lib.types; { + + bindAddress = mkOption { + description = "Kubernetes proxy listening address."; + default = "0.0.0.0"; + type = str; + }; + + enable = mkEnableOption "Kubernetes proxy"; + + extraOpts = mkOption { + description = "Kubernetes proxy extra command line options."; + default = ""; + type = separatedString " "; + }; + + featureGates = mkOption { + description = "List set of feature gates"; + default = top.featureGates; + defaultText = literalExpression "config.${otop.featureGates}"; + type = listOf str; + }; + + hostname = mkOption { + description = "Kubernetes proxy hostname override."; + default = config.networking.hostName; + defaultText = literalExpression "config.networking.hostName"; + type = str; + }; + + kubeconfig = top.lib.mkKubeConfigOptions "Kubernetes proxy"; + + verbosity = mkOption { + description = '' + Optional glog verbosity level for logging statements. See + <link xlink:href="https://github.com/kubernetes/community/blob/master/contributors/devel/logging.md"/> + ''; + default = null; + type = nullOr int; + }; + + }; + + ###### implementation + config = mkIf cfg.enable { + systemd.services.kube-proxy = { + description = "Kubernetes Proxy Service"; + wantedBy = [ "kubernetes.target" ]; + after = [ "kube-apiserver.service" ]; + path = with pkgs; [ iptables conntrack-tools ]; + serviceConfig = { + Slice = "kubernetes.slice"; + ExecStart = ''${top.package}/bin/kube-proxy \ + --bind-address=${cfg.bindAddress} \ + ${optionalString (top.clusterCidr!=null) + "--cluster-cidr=${top.clusterCidr}"} \ + ${optionalString (cfg.featureGates != []) + "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.featureGates}"} \ + --hostname-override=${cfg.hostname} \ + --kubeconfig=${top.lib.mkKubeConfig "kube-proxy" cfg.kubeconfig} \ + ${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \ + ${cfg.extraOpts} + ''; + WorkingDirectory = top.dataDir; + Restart = "on-failure"; + RestartSec = 5; + }; + unitConfig = { + StartLimitIntervalSec = 0; + }; + }; + + services.kubernetes.proxy.hostname = with config.networking; mkDefault hostName; + + services.kubernetes.pki.certs = { + kubeProxyClient = top.lib.mkCert { + name = "kube-proxy-client"; + CN = "system:kube-proxy"; + action = "systemctl restart kube-proxy.service"; + }; + }; + + services.kubernetes.proxy.kubeconfig.server = mkDefault top.apiserverAddress; + }; + + meta.buildDocsInSandbox = false; +} diff --git a/nixos/modules/services/cluster/kubernetes/scheduler.nix b/nixos/modules/services/cluster/kubernetes/scheduler.nix new file mode 100644 index 00000000000..2d95528a6ea --- /dev/null +++ b/nixos/modules/services/cluster/kubernetes/scheduler.nix @@ -0,0 +1,101 @@ +{ config, lib, options, pkgs, ... }: + +with lib; + +let + top = config.services.kubernetes; + otop = options.services.kubernetes; + cfg = top.scheduler; +in +{ + ###### interface + options.services.kubernetes.scheduler = with lib.types; { + + address = mkOption { + description = "Kubernetes scheduler listening address."; + default = "127.0.0.1"; + type = str; + }; + + enable = mkEnableOption "Kubernetes scheduler"; + + extraOpts = mkOption { + description = "Kubernetes scheduler extra command line options."; + default = ""; + type = separatedString " "; + }; + + featureGates = mkOption { + description = "List set of feature gates"; + default = top.featureGates; + defaultText = literalExpression "config.${otop.featureGates}"; + type = listOf str; + }; + + kubeconfig = top.lib.mkKubeConfigOptions "Kubernetes scheduler"; + + leaderElect = mkOption { + description = "Whether to start leader election before executing main loop."; + type = bool; + default = true; + }; + + port = mkOption { + description = "Kubernetes scheduler listening port."; + default = 10251; + type = int; + }; + + verbosity = mkOption { + description = '' + Optional glog verbosity level for logging statements. See + <link xlink:href="https://github.com/kubernetes/community/blob/master/contributors/devel/logging.md"/> + ''; + default = null; + type = nullOr int; + }; + + }; + + ###### implementation + config = mkIf cfg.enable { + systemd.services.kube-scheduler = { + description = "Kubernetes Scheduler Service"; + wantedBy = [ "kubernetes.target" ]; + after = [ "kube-apiserver.service" ]; + serviceConfig = { + Slice = "kubernetes.slice"; + ExecStart = ''${top.package}/bin/kube-scheduler \ + --bind-address=${cfg.address} \ + ${optionalString (cfg.featureGates != []) + "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.featureGates}"} \ + --kubeconfig=${top.lib.mkKubeConfig "kube-scheduler" cfg.kubeconfig} \ + --leader-elect=${boolToString cfg.leaderElect} \ + --secure-port=${toString cfg.port} \ + ${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \ + ${cfg.extraOpts} + ''; + WorkingDirectory = top.dataDir; + User = "kubernetes"; + Group = "kubernetes"; + Restart = "on-failure"; + RestartSec = 5; + }; + unitConfig = { + StartLimitIntervalSec = 0; + }; + }; + + services.kubernetes.pki.certs = { + schedulerClient = top.lib.mkCert { + name = "kube-scheduler-client"; + CN = "system:kube-scheduler"; + action = "systemctl restart kube-scheduler.service"; + }; + }; + + services.kubernetes.scheduler.kubeconfig.server = mkDefault top.apiserverAddress; + }; + + meta.buildDocsInSandbox = false; +} diff --git a/nixos/modules/services/cluster/pacemaker/default.nix b/nixos/modules/services/cluster/pacemaker/default.nix new file mode 100644 index 00000000000..7eeadffcc58 --- /dev/null +++ b/nixos/modules/services/cluster/pacemaker/default.nix @@ -0,0 +1,52 @@ +{ config, lib, pkgs, ... }: + +with lib; +let + cfg = config.services.pacemaker; +in +{ + # interface + options.services.pacemaker = { + enable = mkEnableOption "pacemaker"; + + package = mkOption { + type = types.package; + default = pkgs.pacemaker; + defaultText = literalExpression "pkgs.pacemaker"; + description = "Package that should be used for pacemaker."; + }; + }; + + # implementation + config = mkIf cfg.enable { + assertions = [ { + assertion = config.services.corosync.enable; + message = '' + Enabling services.pacemaker requires a services.corosync configuration. + ''; + } ]; + + environment.systemPackages = [ cfg.package ]; + + # required by pacemaker + users.users.hacluster = { + isSystemUser = true; + group = "pacemaker"; + home = "/var/lib/pacemaker"; + }; + users.groups.pacemaker = {}; + + systemd.tmpfiles.rules = [ + "d /var/log/pacemaker 0700 hacluster pacemaker -" + ]; + + systemd.packages = [ cfg.package ]; + systemd.services.pacemaker = { + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + StateDirectory = "pacemaker"; + StateDirectoryMode = "0700"; + }; + }; + }; +} diff --git a/nixos/modules/services/cluster/spark/default.nix b/nixos/modules/services/cluster/spark/default.nix new file mode 100644 index 00000000000..e6b44e130a3 --- /dev/null +++ b/nixos/modules/services/cluster/spark/default.nix @@ -0,0 +1,162 @@ +{config, pkgs, lib, ...}: +let + cfg = config.services.spark; +in +with lib; +{ + options = { + services.spark = { + master = { + enable = mkEnableOption "Spark master service"; + bind = mkOption { + type = types.str; + description = "Address the spark master binds to."; + default = "127.0.0.1"; + example = "0.0.0.0"; + }; + restartIfChanged = mkOption { + type = types.bool; + description = '' + Automatically restart master service on config change. + This can be set to false to defer restarts on clusters running critical applications. + Please consider the security implications of inadvertently running an older version, + and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option. + ''; + default = true; + }; + extraEnvironment = mkOption { + type = types.attrsOf types.str; + description = "Extra environment variables to pass to spark master. See spark-standalone documentation."; + default = {}; + example = { + SPARK_MASTER_WEBUI_PORT = 8181; + SPARK_MASTER_OPTS = "-Dspark.deploy.defaultCores=5"; + }; + }; + }; + worker = { + enable = mkEnableOption "Spark worker service"; + workDir = mkOption { + type = types.path; + description = "Spark worker work dir."; + default = "/var/lib/spark"; + }; + master = mkOption { + type = types.str; + description = "Address of the spark master."; + default = "127.0.0.1:7077"; + }; + restartIfChanged = mkOption { + type = types.bool; + description = '' + Automatically restart worker service on config change. + This can be set to false to defer restarts on clusters running critical applications. + Please consider the security implications of inadvertently running an older version, + and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option. + ''; + default = true; + }; + extraEnvironment = mkOption { + type = types.attrsOf types.str; + description = "Extra environment variables to pass to spark worker."; + default = {}; + example = { + SPARK_WORKER_CORES = 5; + SPARK_WORKER_MEMORY = "2g"; + }; + }; + }; + confDir = mkOption { + type = types.path; + description = "Spark configuration directory. Spark will use the configuration files (spark-defaults.conf, spark-env.sh, log4j.properties, etc) from this directory."; + default = "${cfg.package}/lib/${cfg.package.untarDir}/conf"; + defaultText = literalExpression ''"''${package}/lib/''${package.untarDir}/conf"''; + }; + logDir = mkOption { + type = types.path; + description = "Spark log directory."; + default = "/var/log/spark"; + }; + package = mkOption { + type = types.package; + description = "Spark package."; + default = pkgs.spark; + defaultText = literalExpression "pkgs.spark"; + example = literalExpression ''pkgs.spark.overrideAttrs (super: rec { + pname = "spark"; + version = "2.4.4"; + + src = pkgs.fetchzip { + url = "mirror://apache/spark/"''${pname}-''${version}/''${pname}-''${version}-bin-without-hadoop.tgz"; + sha256 = "1a9w5k0207fysgpxx6db3a00fs5hdc2ncx99x4ccy2s0v5ndc66g"; + }; + })''; + }; + }; + }; + config = lib.mkIf (cfg.worker.enable || cfg.master.enable) { + environment.systemPackages = [ cfg.package ]; + systemd = { + services = { + spark-master = lib.mkIf cfg.master.enable { + path = with pkgs; [ procps openssh nettools ]; + description = "spark master service."; + after = [ "network.target" ]; + wantedBy = [ "multi-user.target" ]; + restartIfChanged = cfg.master.restartIfChanged; + environment = cfg.master.extraEnvironment // { + SPARK_MASTER_HOST = cfg.master.bind; + SPARK_CONF_DIR = cfg.confDir; + SPARK_LOG_DIR = cfg.logDir; + }; + serviceConfig = { + Type = "forking"; + User = "spark"; + Group = "spark"; + WorkingDirectory = "${cfg.package}/lib/${cfg.package.untarDir}"; + ExecStart = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/start-master.sh"; + ExecStop = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/stop-master.sh"; + TimeoutSec = 300; + StartLimitBurst=10; + Restart = "always"; + }; + }; + spark-worker = lib.mkIf cfg.worker.enable { + path = with pkgs; [ procps openssh nettools rsync ]; + description = "spark master service."; + after = [ "network.target" ]; + wantedBy = [ "multi-user.target" ]; + restartIfChanged = cfg.worker.restartIfChanged; + environment = cfg.worker.extraEnvironment // { + SPARK_MASTER = cfg.worker.master; + SPARK_CONF_DIR = cfg.confDir; + SPARK_LOG_DIR = cfg.logDir; + SPARK_WORKER_DIR = cfg.worker.workDir; + }; + serviceConfig = { + Type = "forking"; + User = "spark"; + WorkingDirectory = "${cfg.package}/lib/${cfg.package.untarDir}"; + ExecStart = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/start-worker.sh spark://${cfg.worker.master}"; + ExecStop = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/stop-worker.sh"; + TimeoutSec = 300; + StartLimitBurst=10; + Restart = "always"; + }; + }; + }; + tmpfiles.rules = [ + "d '${cfg.worker.workDir}' - spark spark - -" + "d '${cfg.logDir}' - spark spark - -" + ]; + }; + users = { + users.spark = { + description = "spark user."; + group = "spark"; + isSystemUser = true; + }; + groups.spark = { }; + }; + }; +} |