summary refs log tree commit diff
path: root/nixos/modules/services/cluster
diff options
context:
space:
mode:
Diffstat (limited to 'nixos/modules/services/cluster')
-rw-r--r--nixos/modules/services/cluster/corosync/default.nix112
-rw-r--r--nixos/modules/services/cluster/hadoop/conf.nix44
-rw-r--r--nixos/modules/services/cluster/hadoop/default.nix223
-rw-r--r--nixos/modules/services/cluster/hadoop/hdfs.nix204
-rw-r--r--nixos/modules/services/cluster/hadoop/yarn.nix200
-rw-r--r--nixos/modules/services/cluster/k3s/default.nix128
-rw-r--r--nixos/modules/services/cluster/kubernetes/addon-manager.nix171
-rw-r--r--nixos/modules/services/cluster/kubernetes/addons/dns.nix368
-rw-r--r--nixos/modules/services/cluster/kubernetes/apiserver.nix500
-rw-r--r--nixos/modules/services/cluster/kubernetes/controller-manager.nix176
-rw-r--r--nixos/modules/services/cluster/kubernetes/default.nix315
-rw-r--r--nixos/modules/services/cluster/kubernetes/flannel.nix100
-rw-r--r--nixos/modules/services/cluster/kubernetes/kubelet.nix398
-rw-r--r--nixos/modules/services/cluster/kubernetes/pki.nix406
-rw-r--r--nixos/modules/services/cluster/kubernetes/proxy.nix102
-rw-r--r--nixos/modules/services/cluster/kubernetes/scheduler.nix101
-rw-r--r--nixos/modules/services/cluster/pacemaker/default.nix52
-rw-r--r--nixos/modules/services/cluster/spark/default.nix162
18 files changed, 3762 insertions, 0 deletions
diff --git a/nixos/modules/services/cluster/corosync/default.nix b/nixos/modules/services/cluster/corosync/default.nix
new file mode 100644
index 00000000000..b4144917fee
--- /dev/null
+++ b/nixos/modules/services/cluster/corosync/default.nix
@@ -0,0 +1,112 @@
+{ config, lib, pkgs, ... }:
+
+with lib;
+let
+  cfg = config.services.corosync;
+in
+{
+  # interface
+  options.services.corosync = {
+    enable = mkEnableOption "corosync";
+
+    package = mkOption {
+      type = types.package;
+      default = pkgs.corosync;
+      defaultText = literalExpression "pkgs.corosync";
+      description = "Package that should be used for corosync.";
+    };
+
+    clusterName = mkOption {
+      type = types.str;
+      default = "nixcluster";
+      description = "Name of the corosync cluster.";
+    };
+
+    extraOptions = mkOption {
+      type = with types; listOf str;
+      default = [];
+      description = "Additional options with which to start corosync.";
+    };
+
+    nodelist = mkOption {
+      description = "Corosync nodelist: all cluster members.";
+      default = [];
+      type = with types; listOf (submodule {
+        options = {
+          nodeid = mkOption {
+            type = int;
+            description = "Node ID number";
+          };
+          name = mkOption {
+            type = str;
+            description = "Node name";
+          };
+          ring_addrs = mkOption {
+            type = listOf str;
+            description = "List of addresses, one for each ring.";
+          };
+        };
+      });
+    };
+  };
+
+  # implementation
+  config = mkIf cfg.enable {
+    environment.systemPackages = [ cfg.package ];
+
+    environment.etc."corosync/corosync.conf".text = ''
+      totem {
+        version: 2
+        secauth: on
+        cluster_name: ${cfg.clusterName}
+        transport: knet
+      }
+
+      nodelist {
+        ${concatMapStrings ({ nodeid, name, ring_addrs }: ''
+          node {
+            nodeid: ${toString nodeid}
+            name: ${name}
+            ${concatStrings (imap0 (i: addr: ''
+              ring${toString i}_addr: ${addr}
+            '') ring_addrs)}
+          }
+        '') cfg.nodelist}
+      }
+
+      quorum {
+        # only corosync_votequorum is supported
+        provider: corosync_votequorum
+        wait_for_all: 0
+        ${optionalString (builtins.length cfg.nodelist < 3) ''
+          two_node: 1
+        ''}
+      }
+
+      logging {
+        to_syslog: yes
+      }
+    '';
+
+    environment.etc."corosync/uidgid.d/root".text = ''
+      # allow pacemaker connection by root
+      uidgid {
+        uid: 0
+        gid: 0
+      }
+    '';
+
+    systemd.packages = [ cfg.package ];
+    systemd.services.corosync = {
+      wantedBy = [ "multi-user.target" ];
+      serviceConfig = {
+        StateDirectory = "corosync";
+        StateDirectoryMode = "0700";
+      };
+    };
+
+    environment.etc."sysconfig/corosync".text = lib.optionalString (cfg.extraOptions != []) ''
+      COROSYNC_OPTIONS="${lib.escapeShellArgs cfg.extraOptions}"
+    '';
+  };
+}
diff --git a/nixos/modules/services/cluster/hadoop/conf.nix b/nixos/modules/services/cluster/hadoop/conf.nix
new file mode 100644
index 00000000000..e3c26a0d550
--- /dev/null
+++ b/nixos/modules/services/cluster/hadoop/conf.nix
@@ -0,0 +1,44 @@
+{ cfg, pkgs, lib }:
+let
+  propertyXml = name: value: lib.optionalString (value != null) ''
+    <property>
+      <name>${name}</name>
+      <value>${builtins.toString value}</value>
+    </property>
+  '';
+  siteXml = fileName: properties: pkgs.writeTextDir fileName ''
+    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
+    <!-- generated by NixOS -->
+    <configuration>
+      ${builtins.concatStringsSep "\n" (pkgs.lib.mapAttrsToList propertyXml properties)}
+    </configuration>
+  '';
+  cfgLine = name: value: ''
+    ${name}=${builtins.toString value}
+  '';
+  cfgFile = fileName: properties: pkgs.writeTextDir fileName ''
+    # generated by NixOS
+    ${builtins.concatStringsSep "" (pkgs.lib.mapAttrsToList cfgLine properties)}
+  '';
+  userFunctions = ''
+    hadoop_verify_logdir() {
+      echo Skipping verification of log directory
+    }
+  '';
+  hadoopEnv = ''
+    export HADOOP_LOG_DIR=/tmp/hadoop/$USER
+  '';
+in
+pkgs.runCommand "hadoop-conf" {} (with cfg; ''
+  mkdir -p $out/
+  cp ${siteXml "core-site.xml" (coreSite // coreSiteInternal)}/* $out/
+  cp ${siteXml "hdfs-site.xml" (hdfsSiteDefault // hdfsSite // hdfsSiteInternal)}/* $out/
+  cp ${siteXml "mapred-site.xml" (mapredSiteDefault // mapredSite)}/* $out/
+  cp ${siteXml "yarn-site.xml" (yarnSiteDefault // yarnSite // yarnSiteInternal)}/* $out/
+  cp ${siteXml "httpfs-site.xml" httpfsSite}/* $out/
+  cp ${cfgFile "container-executor.cfg" containerExecutorCfg}/* $out/
+  cp ${pkgs.writeTextDir "hadoop-user-functions.sh" userFunctions}/* $out/
+  cp ${pkgs.writeTextDir "hadoop-env.sh" hadoopEnv}/* $out/
+  cp ${log4jProperties} $out/log4j.properties
+  ${lib.concatMapStringsSep "\n" (dir: "cp -r ${dir}/* $out/") extraConfDirs}
+'')
diff --git a/nixos/modules/services/cluster/hadoop/default.nix b/nixos/modules/services/cluster/hadoop/default.nix
new file mode 100644
index 00000000000..a4fdea81037
--- /dev/null
+++ b/nixos/modules/services/cluster/hadoop/default.nix
@@ -0,0 +1,223 @@
+{ config, lib, options, pkgs, ...}:
+let
+  cfg = config.services.hadoop;
+  opt = options.services.hadoop;
+in
+with lib;
+{
+  imports = [ ./yarn.nix ./hdfs.nix ];
+
+  options.services.hadoop = {
+    coreSite = mkOption {
+      default = {};
+      type = types.attrsOf types.anything;
+      example = literalExpression ''
+        {
+          "fs.defaultFS" = "hdfs://localhost";
+        }
+      '';
+      description = ''
+        Hadoop core-site.xml definition
+        <link xlink:href="https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/core-default.xml"/>
+      '';
+    };
+    coreSiteInternal = mkOption {
+      default = {};
+      type = types.attrsOf types.anything;
+      internal = true;
+      description = ''
+        Internal option to add configs to core-site.xml based on module options
+      '';
+    };
+
+    hdfsSiteDefault = mkOption {
+      default = {
+        "dfs.namenode.rpc-bind-host" = "0.0.0.0";
+        "dfs.namenode.http-address" = "0.0.0.0:9870";
+        "dfs.namenode.servicerpc-bind-host" = "0.0.0.0";
+        "dfs.namenode.http-bind-host" = "0.0.0.0";
+      };
+      type = types.attrsOf types.anything;
+      description = ''
+        Default options for hdfs-site.xml
+      '';
+    };
+    hdfsSite = mkOption {
+      default = {};
+      type = types.attrsOf types.anything;
+      example = literalExpression ''
+        {
+          "dfs.nameservices" = "namenode1";
+        }
+      '';
+      description = ''
+        Additional options and overrides for hdfs-site.xml
+        <link xlink:href="https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml"/>
+      '';
+    };
+    hdfsSiteInternal = mkOption {
+      default = {};
+      type = types.attrsOf types.anything;
+      internal = true;
+      description = ''
+        Internal option to add configs to hdfs-site.xml based on module options
+      '';
+    };
+
+    mapredSiteDefault = mkOption {
+      default = {
+        "mapreduce.framework.name" = "yarn";
+        "yarn.app.mapreduce.am.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}";
+        "mapreduce.map.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}";
+        "mapreduce.reduce.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}";
+      };
+      defaultText = literalExpression ''
+        {
+          "mapreduce.framework.name" = "yarn";
+          "yarn.app.mapreduce.am.env" = "HADOOP_MAPRED_HOME=''${config.${opt.package}}/lib/''${config.${opt.package}.untarDir}";
+          "mapreduce.map.env" = "HADOOP_MAPRED_HOME=''${config.${opt.package}}/lib/''${config.${opt.package}.untarDir}";
+          "mapreduce.reduce.env" = "HADOOP_MAPRED_HOME=''${config.${opt.package}}/lib/''${config.${opt.package}.untarDir}";
+        }
+      '';
+      type = types.attrsOf types.anything;
+      description = ''
+        Default options for mapred-site.xml
+      '';
+    };
+    mapredSite = mkOption {
+      default = {};
+      type = types.attrsOf types.anything;
+      example = literalExpression ''
+        {
+          "mapreduce.map.java.opts" = "-Xmx900m -XX:+UseParallelGC";
+        }
+      '';
+      description = ''
+        Additional options and overrides for mapred-site.xml
+        <link xlink:href="https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml"/>
+      '';
+    };
+
+    yarnSiteDefault = mkOption {
+      default = {
+        "yarn.nodemanager.admin-env" = "PATH=$PATH";
+        "yarn.nodemanager.aux-services" = "mapreduce_shuffle";
+        "yarn.nodemanager.aux-services.mapreduce_shuffle.class" = "org.apache.hadoop.mapred.ShuffleHandler";
+        "yarn.nodemanager.bind-host" = "0.0.0.0";
+        "yarn.nodemanager.container-executor.class" = "org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor";
+        "yarn.nodemanager.env-whitelist" = "JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,LANG,TZ";
+        "yarn.nodemanager.linux-container-executor.group" = "hadoop";
+        "yarn.nodemanager.linux-container-executor.path" = "/run/wrappers/yarn-nodemanager/bin/container-executor";
+        "yarn.nodemanager.log-dirs" = "/var/log/hadoop/yarn/nodemanager";
+        "yarn.resourcemanager.bind-host" = "0.0.0.0";
+        "yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler";
+      };
+      type = types.attrsOf types.anything;
+      description = ''
+        Default options for yarn-site.xml
+      '';
+    };
+    yarnSite = mkOption {
+      default = {};
+      type = types.attrsOf types.anything;
+      example = literalExpression ''
+        {
+          "yarn.resourcemanager.hostname" = "''${config.networking.hostName}";
+        }
+      '';
+      description = ''
+        Additional options and overrides for yarn-site.xml
+        <link xlink:href="https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-common/yarn-default.xml"/>
+      '';
+    };
+    yarnSiteInternal = mkOption {
+      default = {};
+      type = types.attrsOf types.anything;
+      internal = true;
+      description = ''
+        Internal option to add configs to yarn-site.xml based on module options
+      '';
+    };
+
+    httpfsSite = mkOption {
+      default = { };
+      type = types.attrsOf types.anything;
+      example = literalExpression ''
+        {
+          "hadoop.http.max.threads" = 500;
+        }
+      '';
+      description = ''
+        Hadoop httpfs-site.xml definition
+        <link xlink:href="https://hadoop.apache.org/docs/current/hadoop-hdfs-httpfs/httpfs-default.html"/>
+      '';
+    };
+
+    log4jProperties = mkOption {
+      default = "${cfg.package}/lib/${cfg.package.untarDir}/etc/hadoop/log4j.properties";
+      defaultText = literalExpression ''
+        "''${config.${opt.package}}/lib/''${config.${opt.package}.untarDir}/etc/hadoop/log4j.properties"
+      '';
+      type = types.path;
+      example = literalExpression ''
+        "''${pkgs.hadoop}/lib/''${pkgs.hadoop.untarDir}/etc/hadoop/log4j.properties";
+      '';
+      description = "log4j.properties file added to HADOOP_CONF_DIR";
+    };
+
+    containerExecutorCfg = mkOption {
+      default = {
+        # must be the same as yarn.nodemanager.linux-container-executor.group in yarnSite
+        "yarn.nodemanager.linux-container-executor.group"="hadoop";
+        "min.user.id"=1000;
+        "feature.terminal.enabled"=1;
+        "feature.mount-cgroup.enabled" = 1;
+      };
+      type = types.attrsOf types.anything;
+      example = literalExpression ''
+        options.services.hadoop.containerExecutorCfg.default // {
+          "feature.terminal.enabled" = 0;
+        }
+      '';
+      description = ''
+        Yarn container-executor.cfg definition
+        <link xlink:href="https://hadoop.apache.org/docs/r2.7.2/hadoop-yarn/hadoop-yarn-site/SecureContainer.html"/>
+      '';
+    };
+
+    extraConfDirs = mkOption {
+      default = [];
+      type = types.listOf types.path;
+      example = literalExpression ''
+        [
+          ./extraHDFSConfs
+          ./extraYARNConfs
+        ]
+      '';
+      description = "Directories containing additional config files to be added to HADOOP_CONF_DIR";
+    };
+
+    gatewayRole.enable = mkEnableOption "gateway role for deploying hadoop configs";
+
+    package = mkOption {
+      type = types.package;
+      default = pkgs.hadoop;
+      defaultText = literalExpression "pkgs.hadoop";
+      description = "";
+    };
+  };
+
+
+  config = mkIf cfg.gatewayRole.enable {
+    users.groups.hadoop = {
+      gid = config.ids.gids.hadoop;
+    };
+    environment = {
+      systemPackages = [ cfg.package ];
+      etc."hadoop-conf".source = let
+        hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
+      in "${hadoopConf}";
+      variables.HADOOP_CONF_DIR = "/etc/hadoop-conf/";
+    };
+  };
+}
diff --git a/nixos/modules/services/cluster/hadoop/hdfs.nix b/nixos/modules/services/cluster/hadoop/hdfs.nix
new file mode 100644
index 00000000000..325a002ad32
--- /dev/null
+++ b/nixos/modules/services/cluster/hadoop/hdfs.nix
@@ -0,0 +1,204 @@
+{ config, lib, pkgs, ... }:
+with lib;
+let
+  cfg = config.services.hadoop;
+
+  # Config files for hadoop services
+  hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
+
+  # Generator for HDFS service options
+  hadoopServiceOption = { serviceName, firewallOption ? true, extraOpts ? null }: {
+    enable = mkEnableOption serviceName;
+    restartIfChanged = mkOption {
+      type = types.bool;
+      description = ''
+        Automatically restart the service on config change.
+        This can be set to false to defer restarts on clusters running critical applications.
+        Please consider the security implications of inadvertently running an older version,
+        and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option.
+      '';
+      default = false;
+    };
+    extraFlags = mkOption{
+      type = with types; listOf str;
+      default = [];
+      description = "Extra command line flags to pass to ${serviceName}";
+      example = [
+        "-Dcom.sun.management.jmxremote"
+        "-Dcom.sun.management.jmxremote.port=8010"
+      ];
+    };
+    extraEnv = mkOption{
+      type = with types; attrsOf str;
+      default = {};
+      description = "Extra environment variables for ${serviceName}";
+    };
+  } // (optionalAttrs firewallOption {
+    openFirewall = mkOption {
+      type = types.bool;
+      default = false;
+      description = "Open firewall ports for ${serviceName}.";
+    };
+  }) // (optionalAttrs (extraOpts != null) extraOpts);
+
+  # Generator for HDFS service configs
+  hadoopServiceConfig =
+    { name
+    , serviceOptions ? cfg.hdfs."${toLower name}"
+    , description ? "Hadoop HDFS ${name}"
+    , User ? "hdfs"
+    , allowedTCPPorts ? [ ]
+    , preStart ? ""
+    , environment ? { }
+    , extraConfig ? { }
+    }: (
+
+      mkIf serviceOptions.enable ( mkMerge [{
+        systemd.services."hdfs-${toLower name}" = {
+          inherit description preStart;
+          environment = environment // serviceOptions.extraEnv;
+          wantedBy = [ "multi-user.target" ];
+          inherit (serviceOptions) restartIfChanged;
+          serviceConfig = {
+            inherit User;
+            SyslogIdentifier = "hdfs-${toLower name}";
+            ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} ${toLower name} ${escapeShellArgs serviceOptions.extraFlags}";
+            Restart = "always";
+          };
+        };
+
+        services.hadoop.gatewayRole.enable = true;
+
+        networking.firewall.allowedTCPPorts = mkIf
+          ((builtins.hasAttr "openFirewall" serviceOptions) && serviceOptions.openFirewall)
+          allowedTCPPorts;
+      } extraConfig])
+    );
+
+in
+{
+  options.services.hadoop.hdfs = {
+
+    namenode = hadoopServiceOption { serviceName = "HDFS NameNode"; } // {
+      formatOnInit = mkOption {
+        type = types.bool;
+        default = false;
+        description = ''
+          Format HDFS namenode on first start. This is useful for quickly spinning up
+          ephemeral HDFS clusters with a single namenode.
+          For HA clusters, initialization involves multiple steps across multiple nodes.
+          Follow this guide to initialize an HA cluster manually:
+          <link xlink:href="https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html"/>
+        '';
+      };
+    };
+
+    datanode = hadoopServiceOption { serviceName = "HDFS DataNode"; } // {
+      dataDirs = mkOption {
+        default = null;
+        description = "Tier and path definitions for datanode storage.";
+        type = with types; nullOr (listOf (submodule {
+          options = {
+            type = mkOption {
+              type = enum [ "SSD" "DISK" "ARCHIVE" "RAM_DISK" ];
+              description = ''
+                Storage types ([SSD]/[DISK]/[ARCHIVE]/[RAM_DISK]) for HDFS storage policies.
+              '';
+            };
+            path = mkOption {
+              type = path;
+              example = [ "/var/lib/hadoop/hdfs/dn" ];
+              description = "Determines where on the local filesystem a data node should store its blocks.";
+            };
+          };
+        }));
+      };
+    };
+
+    journalnode = hadoopServiceOption { serviceName = "HDFS JournalNode"; };
+
+    zkfc = hadoopServiceOption {
+      serviceName = "HDFS ZooKeeper failover controller";
+      firewallOption = false;
+    };
+
+    httpfs = hadoopServiceOption { serviceName = "HDFS JournalNode"; } // {
+      tempPath = mkOption {
+        type = types.path;
+        default = "/tmp/hadoop/httpfs";
+        description = "HTTPFS_TEMP path used by HTTPFS";
+      };
+    };
+
+  };
+
+  config = mkMerge [
+    (hadoopServiceConfig {
+      name = "NameNode";
+      allowedTCPPorts = [
+        9870 # namenode.http-address
+        8020 # namenode.rpc-address
+        8022 # namenode.servicerpc-address
+        8019 # dfs.ha.zkfc.port
+      ];
+      preStart = (mkIf cfg.hdfs.namenode.formatOnInit
+        "${cfg.package}/bin/hdfs --config ${hadoopConf} namenode -format -nonInteractive || true"
+      );
+    })
+
+    (hadoopServiceConfig {
+      name = "DataNode";
+      # port numbers for datanode changed between hadoop 2 and 3
+      allowedTCPPorts = if versionAtLeast cfg.package.version "3" then [
+        9864 # datanode.http.address
+        9866 # datanode.address
+        9867 # datanode.ipc.address
+      ] else [
+        50075 # datanode.http.address
+        50010 # datanode.address
+        50020 # datanode.ipc.address
+      ];
+      extraConfig.services.hadoop.hdfsSiteInternal."dfs.datanode.data.dir" = let d = cfg.hdfs.datanode.dataDirs; in
+        if (d!= null) then (concatMapStringsSep "," (x: "["+x.type+"]file://"+x.path) cfg.hdfs.datanode.dataDirs) else d;
+    })
+
+    (hadoopServiceConfig {
+      name = "JournalNode";
+      allowedTCPPorts = [
+        8480 # dfs.journalnode.http-address
+        8485 # dfs.journalnode.rpc-address
+      ];
+    })
+
+    (hadoopServiceConfig {
+      name = "zkfc";
+      description = "Hadoop HDFS ZooKeeper failover controller";
+    })
+
+    (hadoopServiceConfig {
+      name = "HTTPFS";
+      environment.HTTPFS_TEMP = cfg.hdfs.httpfs.tempPath;
+      preStart = "mkdir -p $HTTPFS_TEMP";
+      User = "httpfs";
+      allowedTCPPorts = [
+        14000 # httpfs.http.port
+      ];
+    })
+
+    (mkIf cfg.gatewayRole.enable {
+      users.users.hdfs = {
+        description = "Hadoop HDFS user";
+        group = "hadoop";
+        uid = config.ids.uids.hdfs;
+      };
+    })
+    (mkIf cfg.hdfs.httpfs.enable {
+      users.users.httpfs = {
+        description = "Hadoop HTTPFS user";
+        group = "hadoop";
+        isSystemUser = true;
+      };
+    })
+
+  ];
+}
diff --git a/nixos/modules/services/cluster/hadoop/yarn.nix b/nixos/modules/services/cluster/hadoop/yarn.nix
new file mode 100644
index 00000000000..74e16bdec68
--- /dev/null
+++ b/nixos/modules/services/cluster/hadoop/yarn.nix
@@ -0,0 +1,200 @@
+{ config, lib, pkgs, ...}:
+with lib;
+let
+  cfg = config.services.hadoop;
+  hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
+  restartIfChanged  = mkOption {
+    type = types.bool;
+    description = ''
+      Automatically restart the service on config change.
+      This can be set to false to defer restarts on clusters running critical applications.
+      Please consider the security implications of inadvertently running an older version,
+      and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option.
+    '';
+    default = false;
+  };
+  extraFlags = mkOption{
+    type = with types; listOf str;
+    default = [];
+    description = "Extra command line flags to pass to the service";
+    example = [
+      "-Dcom.sun.management.jmxremote"
+      "-Dcom.sun.management.jmxremote.port=8010"
+    ];
+  };
+  extraEnv = mkOption{
+    type = with types; attrsOf str;
+    default = {};
+    description = "Extra environment variables";
+  };
+in
+{
+  options.services.hadoop.yarn = {
+    resourcemanager = {
+      enable = mkEnableOption "Hadoop YARN ResourceManager";
+      inherit restartIfChanged extraFlags extraEnv;
+
+      openFirewall = mkOption {
+        type = types.bool;
+        default = false;
+        description = ''
+          Open firewall ports for resourcemanager
+        '';
+      };
+    };
+    nodemanager = {
+      enable = mkEnableOption "Hadoop YARN NodeManager";
+      inherit restartIfChanged extraFlags extraEnv;
+
+      resource = {
+        cpuVCores = mkOption {
+          description = "Number of vcores that can be allocated for containers.";
+          type = with types; nullOr ints.positive;
+          default = null;
+        };
+        maximumAllocationVCores = mkOption {
+          description = "The maximum virtual CPU cores any container can be allocated.";
+          type = with types; nullOr ints.positive;
+          default = null;
+        };
+        memoryMB = mkOption {
+          description = "Amount of physical memory, in MB, that can be allocated for containers.";
+          type = with types; nullOr ints.positive;
+          default = null;
+        };
+        maximumAllocationMB = mkOption {
+          description = "The maximum physical memory any container can be allocated.";
+          type = with types; nullOr ints.positive;
+          default = null;
+        };
+      };
+
+      useCGroups = mkOption {
+        type = types.bool;
+        default = true;
+        description = ''
+          Use cgroups to enforce resource limits on containers
+        '';
+      };
+
+      localDir = mkOption {
+        description = "List of directories to store localized files in.";
+        type = with types; nullOr (listOf path);
+        example = [ "/var/lib/hadoop/yarn/nm" ];
+        default = null;
+      };
+
+      addBinBash = mkOption {
+        type = types.bool;
+        default = true;
+        description = ''
+          Add /bin/bash. This is needed by the linux container executor's launch script.
+        '';
+      };
+      openFirewall = mkOption {
+        type = types.bool;
+        default = false;
+        description = ''
+          Open firewall ports for nodemanager.
+          Because containers can listen on any ephemeral port, TCP ports 1024–65535 will be opened.
+        '';
+      };
+    };
+  };
+
+  config = mkMerge [
+    (mkIf cfg.gatewayRole.enable {
+      users.users.yarn = {
+        description = "Hadoop YARN user";
+        group = "hadoop";
+        uid = config.ids.uids.yarn;
+      };
+    })
+
+    (mkIf cfg.yarn.resourcemanager.enable {
+      systemd.services.yarn-resourcemanager = {
+        description = "Hadoop YARN ResourceManager";
+        wantedBy = [ "multi-user.target" ];
+        inherit (cfg.yarn.resourcemanager) restartIfChanged;
+        environment = cfg.yarn.resourcemanager.extraEnv;
+
+        serviceConfig = {
+          User = "yarn";
+          SyslogIdentifier = "yarn-resourcemanager";
+          ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
+                      " resourcemanager ${escapeShellArgs cfg.yarn.resourcemanager.extraFlags}";
+          Restart = "always";
+        };
+      };
+
+      services.hadoop.gatewayRole.enable = true;
+
+      networking.firewall.allowedTCPPorts = (mkIf cfg.yarn.resourcemanager.openFirewall [
+        8088 # resourcemanager.webapp.address
+        8030 # resourcemanager.scheduler.address
+        8031 # resourcemanager.resource-tracker.address
+        8032 # resourcemanager.address
+        8033 # resourcemanager.admin.address
+      ]);
+    })
+
+    (mkIf cfg.yarn.nodemanager.enable {
+      # Needed because yarn hardcodes /bin/bash in container start scripts
+      # These scripts can't be patched, they are generated at runtime
+      systemd.tmpfiles.rules = [
+        (mkIf cfg.yarn.nodemanager.addBinBash "L /bin/bash - - - - /run/current-system/sw/bin/bash")
+      ];
+
+      systemd.services.yarn-nodemanager = {
+        description = "Hadoop YARN NodeManager";
+        wantedBy = [ "multi-user.target" ];
+        inherit (cfg.yarn.nodemanager) restartIfChanged;
+        environment = cfg.yarn.nodemanager.extraEnv;
+
+        preStart = ''
+          # create log dir
+          mkdir -p /var/log/hadoop/yarn/nodemanager
+          chown yarn:hadoop /var/log/hadoop/yarn/nodemanager
+
+          # set up setuid container executor binary
+          umount /run/wrappers/yarn-nodemanager/cgroup/cpu || true
+          rm -rf /run/wrappers/yarn-nodemanager/ || true
+          mkdir -p /run/wrappers/yarn-nodemanager/{bin,etc/hadoop,cgroup/cpu}
+          cp ${cfg.package}/lib/${cfg.package.untarDir}/bin/container-executor /run/wrappers/yarn-nodemanager/bin/
+          chgrp hadoop /run/wrappers/yarn-nodemanager/bin/container-executor
+          chmod 6050 /run/wrappers/yarn-nodemanager/bin/container-executor
+          cp ${hadoopConf}/container-executor.cfg /run/wrappers/yarn-nodemanager/etc/hadoop/
+        '';
+
+        serviceConfig = {
+          User = "yarn";
+          SyslogIdentifier = "yarn-nodemanager";
+          PermissionsStartOnly = true;
+          ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
+                      " nodemanager ${escapeShellArgs cfg.yarn.nodemanager.extraFlags}";
+          Restart = "always";
+        };
+      };
+
+      services.hadoop.gatewayRole.enable = true;
+
+      services.hadoop.yarnSiteInternal = with cfg.yarn.nodemanager; {
+        "yarn.nodemanager.local-dirs" = localDir;
+        "yarn.scheduler.maximum-allocation-vcores" = resource.maximumAllocationVCores;
+        "yarn.scheduler.maximum-allocation-mb" = resource.maximumAllocationMB;
+        "yarn.nodemanager.resource.cpu-vcores" = resource.cpuVCores;
+        "yarn.nodemanager.resource.memory-mb" = resource.memoryMB;
+      } // mkIf useCGroups {
+        "yarn.nodemanager.linux-container-executor.cgroups.hierarchy" = "/hadoop-yarn";
+        "yarn.nodemanager.linux-container-executor.resources-handler.class" = "org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler";
+        "yarn.nodemanager.linux-container-executor.cgroups.mount" = "true";
+        "yarn.nodemanager.linux-container-executor.cgroups.mount-path" = "/run/wrappers/yarn-nodemanager/cgroup";
+      };
+
+      networking.firewall.allowedTCPPortRanges = [
+        (mkIf (cfg.yarn.nodemanager.openFirewall) {from = 1024; to = 65535;})
+      ];
+    })
+
+  ];
+}
diff --git a/nixos/modules/services/cluster/k3s/default.nix b/nixos/modules/services/cluster/k3s/default.nix
new file mode 100644
index 00000000000..3a36cfa3f37
--- /dev/null
+++ b/nixos/modules/services/cluster/k3s/default.nix
@@ -0,0 +1,128 @@
+{ config, lib, pkgs, ... }:
+
+with lib;
+let
+  cfg = config.services.k3s;
+in
+{
+  # interface
+  options.services.k3s = {
+    enable = mkEnableOption "k3s";
+
+    package = mkOption {
+      type = types.package;
+      default = pkgs.k3s;
+      defaultText = literalExpression "pkgs.k3s";
+      description = "Package that should be used for k3s";
+    };
+
+    role = mkOption {
+      description = ''
+        Whether k3s should run as a server or agent.
+        Note that the server, by default, also runs as an agent.
+      '';
+      default = "server";
+      type = types.enum [ "server" "agent" ];
+    };
+
+    serverAddr = mkOption {
+      type = types.str;
+      description = "The k3s server to connect to. This option only makes sense for an agent.";
+      example = "https://10.0.0.10:6443";
+      default = "";
+    };
+
+    token = mkOption {
+      type = types.str;
+      description = ''
+        The k3s token to use when connecting to the server. This option only makes sense for an agent.
+        WARNING: This option will expose store your token unencrypted world-readable in the nix store.
+        If this is undesired use the tokenFile option instead.
+      '';
+      default = "";
+    };
+
+    tokenFile = mkOption {
+      type = types.nullOr types.path;
+      description = "File path containing k3s token to use when connecting to the server. This option only makes sense for an agent.";
+      default = null;
+    };
+
+    docker = mkOption {
+      type = types.bool;
+      default = false;
+      description = "Use docker to run containers rather than the built-in containerd.";
+    };
+
+    extraFlags = mkOption {
+      description = "Extra flags to pass to the k3s command.";
+      type = types.str;
+      default = "";
+      example = "--no-deploy traefik --cluster-cidr 10.24.0.0/16";
+    };
+
+    disableAgent = mkOption {
+      type = types.bool;
+      default = false;
+      description = "Only run the server. This option only makes sense for a server.";
+    };
+
+    configPath = mkOption {
+      type = types.nullOr types.path;
+      default = null;
+      description = "File path containing the k3s YAML config. This is useful when the config is generated (for example on boot).";
+    };
+  };
+
+  # implementation
+
+  config = mkIf cfg.enable {
+    assertions = [
+      {
+        assertion = cfg.role == "agent" -> (cfg.configPath != null || cfg.serverAddr != "");
+        message = "serverAddr or configPath (with 'server' key) should be set if role is 'agent'";
+      }
+      {
+        assertion = cfg.role == "agent" -> cfg.configPath != null || cfg.tokenFile != null || cfg.token != "";
+        message = "token or tokenFile or configPath (with 'token' or 'token-file' keys) should be set if role is 'agent'";
+      }
+    ];
+
+    virtualisation.docker = mkIf cfg.docker {
+      enable = mkDefault true;
+    };
+    environment.systemPackages = [ config.services.k3s.package ];
+
+    systemd.services.k3s = {
+      description = "k3s service";
+      after = [ "network.service" "firewall.service" ] ++ (optional cfg.docker "docker.service");
+      wants = [ "network.service" "firewall.service" ];
+      wantedBy = [ "multi-user.target" ];
+      path = optional config.boot.zfs.enabled config.boot.zfs.package;
+      serviceConfig = {
+        # See: https://github.com/rancher/k3s/blob/dddbd16305284ae4bd14c0aade892412310d7edc/install.sh#L197
+        Type = if cfg.role == "agent" then "exec" else "notify";
+        KillMode = "process";
+        Delegate = "yes";
+        Restart = "always";
+        RestartSec = "5s";
+        LimitNOFILE = 1048576;
+        LimitNPROC = "infinity";
+        LimitCORE = "infinity";
+        TasksMax = "infinity";
+        ExecStart = concatStringsSep " \\\n " (
+          [
+            "${cfg.package}/bin/k3s ${cfg.role}"
+          ] ++ (optional cfg.docker "--docker")
+          ++ (optional (cfg.docker && config.systemd.enableUnifiedCgroupHierarchy) "--kubelet-arg=cgroup-driver=systemd")
+          ++ (optional cfg.disableAgent "--disable-agent")
+          ++ (optional (cfg.serverAddr != "") "--server ${cfg.serverAddr}")
+          ++ (optional (cfg.token != "") "--token ${cfg.token}")
+          ++ (optional (cfg.tokenFile != null) "--token-file ${cfg.tokenFile}")
+          ++ (optional (cfg.configPath != null) "--config ${cfg.configPath}")
+          ++ [ cfg.extraFlags ]
+        );
+      };
+    };
+  };
+}
diff --git a/nixos/modules/services/cluster/kubernetes/addon-manager.nix b/nixos/modules/services/cluster/kubernetes/addon-manager.nix
new file mode 100644
index 00000000000..b677d900ff5
--- /dev/null
+++ b/nixos/modules/services/cluster/kubernetes/addon-manager.nix
@@ -0,0 +1,171 @@
+{ config, lib, pkgs, ... }:
+
+with lib;
+
+let
+  top = config.services.kubernetes;
+  cfg = top.addonManager;
+
+  isRBACEnabled = elem "RBAC" top.apiserver.authorizationMode;
+
+  addons = pkgs.runCommand "kubernetes-addons" { } ''
+    mkdir -p $out
+    # since we are mounting the addons to the addon manager, they need to be copied
+    ${concatMapStringsSep ";" (a: "cp -v ${a}/* $out/") (mapAttrsToList (name: addon:
+      pkgs.writeTextDir "${name}.json" (builtins.toJSON addon)
+    ) (cfg.addons))}
+  '';
+in
+{
+  ###### interface
+  options.services.kubernetes.addonManager = with lib.types; {
+
+    bootstrapAddons = mkOption {
+      description = ''
+        Bootstrap addons are like regular addons, but they are applied with cluster-admin rigths.
+        They are applied at addon-manager startup only.
+      '';
+      default = { };
+      type = attrsOf attrs;
+      example = literalExpression ''
+        {
+          "my-service" = {
+            "apiVersion" = "v1";
+            "kind" = "Service";
+            "metadata" = {
+              "name" = "my-service";
+              "namespace" = "default";
+            };
+            "spec" = { ... };
+          };
+        }
+      '';
+    };
+
+    addons = mkOption {
+      description = "Kubernetes addons (any kind of Kubernetes resource can be an addon).";
+      default = { };
+      type = attrsOf (either attrs (listOf attrs));
+      example = literalExpression ''
+        {
+          "my-service" = {
+            "apiVersion" = "v1";
+            "kind" = "Service";
+            "metadata" = {
+              "name" = "my-service";
+              "namespace" = "default";
+            };
+            "spec" = { ... };
+          };
+        }
+        // import <nixpkgs/nixos/modules/services/cluster/kubernetes/dns.nix> { cfg = config.services.kubernetes; };
+      '';
+    };
+
+    enable = mkEnableOption "Kubernetes addon manager.";
+  };
+
+  ###### implementation
+  config = mkIf cfg.enable {
+    environment.etc."kubernetes/addons".source = "${addons}/";
+
+    systemd.services.kube-addon-manager = {
+      description = "Kubernetes addon manager";
+      wantedBy = [ "kubernetes.target" ];
+      after = [ "kube-apiserver.service" ];
+      environment.ADDON_PATH = "/etc/kubernetes/addons/";
+      path = [ pkgs.gawk ];
+      serviceConfig = {
+        Slice = "kubernetes.slice";
+        ExecStart = "${top.package}/bin/kube-addons";
+        WorkingDirectory = top.dataDir;
+        User = "kubernetes";
+        Group = "kubernetes";
+        Restart = "on-failure";
+        RestartSec = 10;
+      };
+      unitConfig = {
+        StartLimitIntervalSec = 0;
+      };
+    };
+
+    services.kubernetes.addonManager.bootstrapAddons = mkIf isRBACEnabled
+    (let
+      name = "system:kube-addon-manager";
+      namespace = "kube-system";
+    in
+    {
+
+      kube-addon-manager-r = {
+        apiVersion = "rbac.authorization.k8s.io/v1";
+        kind = "Role";
+        metadata = {
+          inherit name namespace;
+        };
+        rules = [{
+          apiGroups = ["*"];
+          resources = ["*"];
+          verbs = ["*"];
+        }];
+      };
+
+      kube-addon-manager-rb = {
+        apiVersion = "rbac.authorization.k8s.io/v1";
+        kind = "RoleBinding";
+        metadata = {
+          inherit name namespace;
+        };
+        roleRef = {
+          apiGroup = "rbac.authorization.k8s.io";
+          kind = "Role";
+          inherit name;
+        };
+        subjects = [{
+          apiGroup = "rbac.authorization.k8s.io";
+          kind = "User";
+          inherit name;
+        }];
+      };
+
+      kube-addon-manager-cluster-lister-cr = {
+        apiVersion = "rbac.authorization.k8s.io/v1";
+        kind = "ClusterRole";
+        metadata = {
+          name = "${name}:cluster-lister";
+        };
+        rules = [{
+          apiGroups = ["*"];
+          resources = ["*"];
+          verbs = ["list"];
+        }];
+      };
+
+      kube-addon-manager-cluster-lister-crb = {
+        apiVersion = "rbac.authorization.k8s.io/v1";
+        kind = "ClusterRoleBinding";
+        metadata = {
+          name = "${name}:cluster-lister";
+        };
+        roleRef = {
+          apiGroup = "rbac.authorization.k8s.io";
+          kind = "ClusterRole";
+          name = "${name}:cluster-lister";
+        };
+        subjects = [{
+          kind = "User";
+          inherit name;
+        }];
+      };
+    });
+
+    services.kubernetes.pki.certs = {
+      addonManager = top.lib.mkCert {
+        name = "kube-addon-manager";
+        CN = "system:kube-addon-manager";
+        action = "systemctl restart kube-addon-manager.service";
+      };
+    };
+  };
+
+  meta.buildDocsInSandbox = false;
+}
diff --git a/nixos/modules/services/cluster/kubernetes/addons/dns.nix b/nixos/modules/services/cluster/kubernetes/addons/dns.nix
new file mode 100644
index 00000000000..7bd4991f43f
--- /dev/null
+++ b/nixos/modules/services/cluster/kubernetes/addons/dns.nix
@@ -0,0 +1,368 @@
+{ config, options, pkgs, lib, ... }:
+
+with lib;
+
+let
+  version = "1.7.1";
+  cfg = config.services.kubernetes.addons.dns;
+  ports = {
+    dns = 10053;
+    health = 10054;
+    metrics = 10055;
+  };
+in {
+  options.services.kubernetes.addons.dns = {
+    enable = mkEnableOption "kubernetes dns addon";
+
+    clusterIp = mkOption {
+      description = "Dns addon clusterIP";
+
+      # this default is also what kubernetes users
+      default = (
+        concatStringsSep "." (
+          take 3 (splitString "." config.services.kubernetes.apiserver.serviceClusterIpRange
+        ))
+      ) + ".254";
+      defaultText = literalDocBook ''
+        The <literal>x.y.z.254</literal> IP of
+        <literal>config.${options.services.kubernetes.apiserver.serviceClusterIpRange}</literal>.
+      '';
+      type = types.str;
+    };
+
+    clusterDomain = mkOption {
+      description = "Dns cluster domain";
+      default = "cluster.local";
+      type = types.str;
+    };
+
+    replicas = mkOption {
+      description = "Number of DNS pod replicas to deploy in the cluster.";
+      default = 2;
+      type = types.int;
+    };
+
+    reconcileMode = mkOption {
+      description = ''
+        Controls the addon manager reconciliation mode for the DNS addon.
+
+        Setting reconcile mode to EnsureExists makes it possible to tailor DNS behavior by editing the coredns ConfigMap.
+
+        See: <link xlink:href="https://github.com/kubernetes/kubernetes/blob/master/cluster/addons/addon-manager/README.md"/>.
+      '';
+      default = "Reconcile";
+      type = types.enum [ "Reconcile" "EnsureExists" ];
+    };
+
+    coredns = mkOption {
+      description = "Docker image to seed for the CoreDNS container.";
+      type = types.attrs;
+      default = {
+        imageName = "coredns/coredns";
+        imageDigest = "sha256:4a6e0769130686518325b21b0c1d0688b54e7c79244d48e1b15634e98e40c6ef";
+        finalImageTag = version;
+        sha256 = "02r440xcdsgi137k5lmmvp0z5w5fmk8g9mysq5pnysq1wl8sj6mw";
+      };
+    };
+
+    corefile = mkOption {
+      description = ''
+        Custom coredns corefile configuration.
+
+        See: <link xlink:href="https://coredns.io/manual/toc/#configuration"/>.
+      '';
+      type = types.str;
+      default = ''
+        .:${toString ports.dns} {
+          errors
+          health :${toString ports.health}
+          kubernetes ${cfg.clusterDomain} in-addr.arpa ip6.arpa {
+            pods insecure
+            fallthrough in-addr.arpa ip6.arpa
+          }
+          prometheus :${toString ports.metrics}
+          forward . /etc/resolv.conf
+          cache 30
+          loop
+          reload
+          loadbalance
+        }'';
+      defaultText = literalExpression ''
+        '''
+          .:${toString ports.dns} {
+            errors
+            health :${toString ports.health}
+            kubernetes ''${config.services.kubernetes.addons.dns.clusterDomain} in-addr.arpa ip6.arpa {
+              pods insecure
+              fallthrough in-addr.arpa ip6.arpa
+            }
+            prometheus :${toString ports.metrics}
+            forward . /etc/resolv.conf
+            cache 30
+            loop
+            reload
+            loadbalance
+          }
+        '''
+      '';
+    };
+  };
+
+  config = mkIf cfg.enable {
+    services.kubernetes.kubelet.seedDockerImages =
+      singleton (pkgs.dockerTools.pullImage cfg.coredns);
+
+    services.kubernetes.addonManager.bootstrapAddons = {
+      coredns-cr = {
+        apiVersion = "rbac.authorization.k8s.io/v1";
+        kind = "ClusterRole";
+        metadata = {
+          labels = {
+            "addonmanager.kubernetes.io/mode" = "Reconcile";
+            k8s-app = "kube-dns";
+            "kubernetes.io/cluster-service" = "true";
+            "kubernetes.io/bootstrapping" = "rbac-defaults";
+          };
+          name = "system:coredns";
+        };
+        rules = [
+          {
+            apiGroups = [ "" ];
+            resources = [ "endpoints" "services" "pods" "namespaces" ];
+            verbs = [ "list" "watch" ];
+          }
+          {
+            apiGroups = [ "" ];
+            resources = [ "nodes" ];
+            verbs = [ "get" ];
+          }
+        ];
+      };
+
+      coredns-crb = {
+        apiVersion = "rbac.authorization.k8s.io/v1";
+        kind = "ClusterRoleBinding";
+        metadata = {
+          annotations = {
+            "rbac.authorization.kubernetes.io/autoupdate" = "true";
+          };
+          labels = {
+            "addonmanager.kubernetes.io/mode" = "Reconcile";
+            k8s-app = "kube-dns";
+            "kubernetes.io/cluster-service" = "true";
+            "kubernetes.io/bootstrapping" = "rbac-defaults";
+          };
+          name = "system:coredns";
+        };
+        roleRef = {
+          apiGroup = "rbac.authorization.k8s.io";
+          kind = "ClusterRole";
+          name = "system:coredns";
+        };
+        subjects = [
+          {
+            kind = "ServiceAccount";
+            name = "coredns";
+            namespace = "kube-system";
+          }
+        ];
+      };
+    };
+
+    services.kubernetes.addonManager.addons = {
+      coredns-sa = {
+        apiVersion = "v1";
+        kind = "ServiceAccount";
+        metadata = {
+          labels = {
+            "addonmanager.kubernetes.io/mode" = "Reconcile";
+            k8s-app = "kube-dns";
+            "kubernetes.io/cluster-service" = "true";
+          };
+          name = "coredns";
+          namespace = "kube-system";
+        };
+      };
+
+      coredns-cm = {
+        apiVersion = "v1";
+        kind = "ConfigMap";
+        metadata = {
+          labels = {
+            "addonmanager.kubernetes.io/mode" = cfg.reconcileMode;
+            k8s-app = "kube-dns";
+            "kubernetes.io/cluster-service" = "true";
+          };
+          name = "coredns";
+          namespace = "kube-system";
+        };
+        data = {
+          Corefile = cfg.corefile;
+        };
+      };
+
+      coredns-deploy = {
+        apiVersion = "apps/v1";
+        kind = "Deployment";
+        metadata = {
+          labels = {
+            "addonmanager.kubernetes.io/mode" = cfg.reconcileMode;
+            k8s-app = "kube-dns";
+            "kubernetes.io/cluster-service" = "true";
+            "kubernetes.io/name" = "CoreDNS";
+          };
+          name = "coredns";
+          namespace = "kube-system";
+        };
+        spec = {
+          replicas = cfg.replicas;
+          selector = {
+            matchLabels = { k8s-app = "kube-dns"; };
+          };
+          strategy = {
+            rollingUpdate = { maxUnavailable = 1; };
+            type = "RollingUpdate";
+          };
+          template = {
+            metadata = {
+              labels = {
+                k8s-app = "kube-dns";
+              };
+            };
+            spec = {
+              containers = [
+                {
+                  args = [ "-conf" "/etc/coredns/Corefile" ];
+                  image = with cfg.coredns; "${imageName}:${finalImageTag}";
+                  imagePullPolicy = "Never";
+                  livenessProbe = {
+                    failureThreshold = 5;
+                    httpGet = {
+                      path = "/health";
+                      port = ports.health;
+                      scheme = "HTTP";
+                    };
+                    initialDelaySeconds = 60;
+                    successThreshold = 1;
+                    timeoutSeconds = 5;
+                  };
+                  name = "coredns";
+                  ports = [
+                    {
+                      containerPort = ports.dns;
+                      name = "dns";
+                      protocol = "UDP";
+                    }
+                    {
+                      containerPort = ports.dns;
+                      name = "dns-tcp";
+                      protocol = "TCP";
+                    }
+                    {
+                      containerPort = ports.metrics;
+                      name = "metrics";
+                      protocol = "TCP";
+                    }
+                  ];
+                  resources = {
+                    limits = {
+                      memory = "170Mi";
+                    };
+                    requests = {
+                      cpu = "100m";
+                      memory = "70Mi";
+                    };
+                  };
+                  securityContext = {
+                    allowPrivilegeEscalation = false;
+                    capabilities = {
+                      drop = [ "all" ];
+                    };
+                    readOnlyRootFilesystem = true;
+                  };
+                  volumeMounts = [
+                    {
+                      mountPath = "/etc/coredns";
+                      name = "config-volume";
+                      readOnly = true;
+                    }
+                  ];
+                }
+              ];
+              dnsPolicy = "Default";
+              nodeSelector = {
+                "beta.kubernetes.io/os" = "linux";
+              };
+              serviceAccountName = "coredns";
+              tolerations = [
+                {
+                  effect = "NoSchedule";
+                  key = "node-role.kubernetes.io/master";
+                }
+                {
+                  key = "CriticalAddonsOnly";
+                  operator = "Exists";
+                }
+              ];
+              volumes = [
+                {
+                  configMap = {
+                    items = [
+                      {
+                        key = "Corefile";
+                        path = "Corefile";
+                      }
+                    ];
+                    name = "coredns";
+                  };
+                  name = "config-volume";
+                }
+              ];
+            };
+          };
+        };
+      };
+
+      coredns-svc = {
+        apiVersion = "v1";
+        kind = "Service";
+        metadata = {
+          annotations = {
+            "prometheus.io/port" = toString ports.metrics;
+            "prometheus.io/scrape" = "true";
+          };
+          labels = {
+            "addonmanager.kubernetes.io/mode" = "Reconcile";
+            k8s-app = "kube-dns";
+            "kubernetes.io/cluster-service" = "true";
+            "kubernetes.io/name" = "CoreDNS";
+          };
+          name = "kube-dns";
+          namespace = "kube-system";
+        };
+        spec = {
+          clusterIP = cfg.clusterIp;
+          ports = [
+            {
+              name = "dns";
+              port = 53;
+              targetPort = ports.dns;
+              protocol = "UDP";
+            }
+            {
+              name = "dns-tcp";
+              port = 53;
+              targetPort = ports.dns;
+              protocol = "TCP";
+            }
+          ];
+          selector = { k8s-app = "kube-dns"; };
+        };
+      };
+    };
+
+    services.kubernetes.kubelet.clusterDns = mkDefault cfg.clusterIp;
+  };
+
+  meta.buildDocsInSandbox = false;
+}
diff --git a/nixos/modules/services/cluster/kubernetes/apiserver.nix b/nixos/modules/services/cluster/kubernetes/apiserver.nix
new file mode 100644
index 00000000000..a192e93badc
--- /dev/null
+++ b/nixos/modules/services/cluster/kubernetes/apiserver.nix
@@ -0,0 +1,500 @@
+  { config, lib, options, pkgs, ... }:
+
+with lib;
+
+let
+  top = config.services.kubernetes;
+  otop = options.services.kubernetes;
+  cfg = top.apiserver;
+
+  isRBACEnabled = elem "RBAC" cfg.authorizationMode;
+
+  apiserverServiceIP = (concatStringsSep "." (
+    take 3 (splitString "." cfg.serviceClusterIpRange
+  )) + ".1");
+in
+{
+
+  imports = [
+    (mkRenamedOptionModule [ "services" "kubernetes" "apiserver" "admissionControl" ] [ "services" "kubernetes" "apiserver" "enableAdmissionPlugins" ])
+    (mkRenamedOptionModule [ "services" "kubernetes" "apiserver" "address" ] ["services" "kubernetes" "apiserver" "bindAddress"])
+    (mkRenamedOptionModule [ "services" "kubernetes" "apiserver" "port" ] ["services" "kubernetes" "apiserver" "insecurePort"])
+    (mkRemovedOptionModule [ "services" "kubernetes" "apiserver" "publicAddress" ] "")
+    (mkRenamedOptionModule [ "services" "kubernetes" "etcd" "servers" ] [ "services" "kubernetes" "apiserver" "etcd" "servers" ])
+    (mkRenamedOptionModule [ "services" "kubernetes" "etcd" "keyFile" ] [ "services" "kubernetes" "apiserver" "etcd" "keyFile" ])
+    (mkRenamedOptionModule [ "services" "kubernetes" "etcd" "certFile" ] [ "services" "kubernetes" "apiserver" "etcd" "certFile" ])
+    (mkRenamedOptionModule [ "services" "kubernetes" "etcd" "caFile" ] [ "services" "kubernetes" "apiserver" "etcd" "caFile" ])
+  ];
+
+  ###### interface
+  options.services.kubernetes.apiserver = with lib.types; {
+
+    advertiseAddress = mkOption {
+      description = ''
+        Kubernetes apiserver IP address on which to advertise the apiserver
+        to members of the cluster. This address must be reachable by the rest
+        of the cluster.
+      '';
+      default = null;
+      type = nullOr str;
+    };
+
+    allowPrivileged = mkOption {
+      description = "Whether to allow privileged containers on Kubernetes.";
+      default = false;
+      type = bool;
+    };
+
+    authorizationMode = mkOption {
+      description = ''
+        Kubernetes apiserver authorization mode (AlwaysAllow/AlwaysDeny/ABAC/Webhook/RBAC/Node). See
+        <link xlink:href="https://kubernetes.io/docs/reference/access-authn-authz/authorization/"/>
+      '';
+      default = ["RBAC" "Node"]; # Enabling RBAC by default, although kubernetes default is AllowAllow
+      type = listOf (enum ["AlwaysAllow" "AlwaysDeny" "ABAC" "Webhook" "RBAC" "Node"]);
+    };
+
+    authorizationPolicy = mkOption {
+      description = ''
+        Kubernetes apiserver authorization policy file. See
+        <link xlink:href="https://kubernetes.io/docs/reference/access-authn-authz/authorization/"/>
+      '';
+      default = [];
+      type = listOf attrs;
+    };
+
+    basicAuthFile = mkOption {
+      description = ''
+        Kubernetes apiserver basic authentication file. See
+        <link xlink:href="https://kubernetes.io/docs/reference/access-authn-authz/authentication"/>
+      '';
+      default = null;
+      type = nullOr path;
+    };
+
+    bindAddress = mkOption {
+      description = ''
+        The IP address on which to listen for the --secure-port port.
+        The associated interface(s) must be reachable by the rest
+        of the cluster, and by CLI/web clients.
+      '';
+      default = "0.0.0.0";
+      type = str;
+    };
+
+    clientCaFile = mkOption {
+      description = "Kubernetes apiserver CA file for client auth.";
+      default = top.caFile;
+      defaultText = literalExpression "config.${otop.caFile}";
+      type = nullOr path;
+    };
+
+    disableAdmissionPlugins = mkOption {
+      description = ''
+        Kubernetes admission control plugins to disable. See
+        <link xlink:href="https://kubernetes.io/docs/admin/admission-controllers/"/>
+      '';
+      default = [];
+      type = listOf str;
+    };
+
+    enable = mkEnableOption "Kubernetes apiserver";
+
+    enableAdmissionPlugins = mkOption {
+      description = ''
+        Kubernetes admission control plugins to enable. See
+        <link xlink:href="https://kubernetes.io/docs/admin/admission-controllers/"/>
+      '';
+      default = [
+        "NamespaceLifecycle" "LimitRanger" "ServiceAccount"
+        "ResourceQuota" "DefaultStorageClass" "DefaultTolerationSeconds"
+        "NodeRestriction"
+      ];
+      example = [
+        "NamespaceLifecycle" "NamespaceExists" "LimitRanger"
+        "SecurityContextDeny" "ServiceAccount" "ResourceQuota"
+        "PodSecurityPolicy" "NodeRestriction" "DefaultStorageClass"
+      ];
+      type = listOf str;
+    };
+
+    etcd = {
+      servers = mkOption {
+        description = "List of etcd servers.";
+        default = ["http://127.0.0.1:2379"];
+        type = types.listOf types.str;
+      };
+
+      keyFile = mkOption {
+        description = "Etcd key file.";
+        default = null;
+        type = types.nullOr types.path;
+      };
+
+      certFile = mkOption {
+        description = "Etcd cert file.";
+        default = null;
+        type = types.nullOr types.path;
+      };
+
+      caFile = mkOption {
+        description = "Etcd ca file.";
+        default = top.caFile;
+        defaultText = literalExpression "config.${otop.caFile}";
+        type = types.nullOr types.path;
+      };
+    };
+
+    extraOpts = mkOption {
+      description = "Kubernetes apiserver extra command line options.";
+      default = "";
+      type = separatedString " ";
+    };
+
+    extraSANs = mkOption {
+      description = "Extra x509 Subject Alternative Names to be added to the kubernetes apiserver tls cert.";
+      default = [];
+      type = listOf str;
+    };
+
+    featureGates = mkOption {
+      description = "List set of feature gates";
+      default = top.featureGates;
+      defaultText = literalExpression "config.${otop.featureGates}";
+      type = listOf str;
+    };
+
+    insecureBindAddress = mkOption {
+      description = "The IP address on which to serve the --insecure-port.";
+      default = "127.0.0.1";
+      type = str;
+    };
+
+    insecurePort = mkOption {
+      description = "Kubernetes apiserver insecure listening port. (0 = disabled)";
+      default = 0;
+      type = int;
+    };
+
+    kubeletClientCaFile = mkOption {
+      description = "Path to a cert file for connecting to kubelet.";
+      default = top.caFile;
+      defaultText = literalExpression "config.${otop.caFile}";
+      type = nullOr path;
+    };
+
+    kubeletClientCertFile = mkOption {
+      description = "Client certificate to use for connections to kubelet.";
+      default = null;
+      type = nullOr path;
+    };
+
+    kubeletClientKeyFile = mkOption {
+      description = "Key to use for connections to kubelet.";
+      default = null;
+      type = nullOr path;
+    };
+
+    preferredAddressTypes = mkOption {
+      description = "List of the preferred NodeAddressTypes to use for kubelet connections.";
+      type = nullOr str;
+      default = null;
+    };
+
+    proxyClientCertFile = mkOption {
+      description = "Client certificate to use for connections to proxy.";
+      default = null;
+      type = nullOr path;
+    };
+
+    proxyClientKeyFile = mkOption {
+      description = "Key to use for connections to proxy.";
+      default = null;
+      type = nullOr path;
+    };
+
+    runtimeConfig = mkOption {
+      description = ''
+        Api runtime configuration. See
+        <link xlink:href="https://kubernetes.io/docs/tasks/administer-cluster/cluster-management/"/>
+      '';
+      default = "authentication.k8s.io/v1beta1=true";
+      example = "api/all=false,api/v1=true";
+      type = str;
+    };
+
+    storageBackend = mkOption {
+      description = ''
+        Kubernetes apiserver storage backend.
+      '';
+      default = "etcd3";
+      type = enum ["etcd2" "etcd3"];
+    };
+
+    securePort = mkOption {
+      description = "Kubernetes apiserver secure port.";
+      default = 6443;
+      type = int;
+    };
+
+    apiAudiences = mkOption {
+      description = ''
+        Kubernetes apiserver ServiceAccount issuer.
+      '';
+      default = "api,https://kubernetes.default.svc";
+      type = str;
+    };
+
+    serviceAccountIssuer = mkOption {
+      description = ''
+        Kubernetes apiserver ServiceAccount issuer.
+      '';
+      default = "https://kubernetes.default.svc";
+      type = str;
+    };
+
+    serviceAccountSigningKeyFile = mkOption {
+      description = ''
+        Path to the file that contains the current private key of the service
+        account token issuer. The issuer will sign issued ID tokens with this
+        private key.
+      '';
+      type = path;
+    };
+
+    serviceAccountKeyFile = mkOption {
+      description = ''
+        File containing PEM-encoded x509 RSA or ECDSA private or public keys,
+        used to verify ServiceAccount tokens. The specified file can contain
+        multiple keys, and the flag can be specified multiple times with
+        different files. If unspecified, --tls-private-key-file is used.
+        Must be specified when --service-account-signing-key is provided
+      '';
+      type = path;
+    };
+
+    serviceClusterIpRange = mkOption {
+      description = ''
+        A CIDR notation IP range from which to assign service cluster IPs.
+        This must not overlap with any IP ranges assigned to nodes for pods.
+      '';
+      default = "10.0.0.0/24";
+      type = str;
+    };
+
+    tlsCertFile = mkOption {
+      description = "Kubernetes apiserver certificate file.";
+      default = null;
+      type = nullOr path;
+    };
+
+    tlsKeyFile = mkOption {
+      description = "Kubernetes apiserver private key file.";
+      default = null;
+      type = nullOr path;
+    };
+
+    tokenAuthFile = mkOption {
+      description = ''
+        Kubernetes apiserver token authentication file. See
+        <link xlink:href="https://kubernetes.io/docs/reference/access-authn-authz/authentication"/>
+      '';
+      default = null;
+      type = nullOr path;
+    };
+
+    verbosity = mkOption {
+      description = ''
+        Optional glog verbosity level for logging statements. See
+        <link xlink:href="https://github.com/kubernetes/community/blob/master/contributors/devel/logging.md"/>
+      '';
+      default = null;
+      type = nullOr int;
+    };
+
+    webhookConfig = mkOption {
+      description = ''
+        Kubernetes apiserver Webhook config file. It uses the kubeconfig file format.
+        See <link xlink:href="https://kubernetes.io/docs/reference/access-authn-authz/webhook/"/>
+      '';
+      default = null;
+      type = nullOr path;
+    };
+
+  };
+
+
+  ###### implementation
+  config = mkMerge [
+
+    (mkIf cfg.enable {
+        systemd.services.kube-apiserver = {
+          description = "Kubernetes APIServer Service";
+          wantedBy = [ "kubernetes.target" ];
+          after = [ "network.target" ];
+          serviceConfig = {
+            Slice = "kubernetes.slice";
+            ExecStart = ''${top.package}/bin/kube-apiserver \
+              --allow-privileged=${boolToString cfg.allowPrivileged} \
+              --authorization-mode=${concatStringsSep "," cfg.authorizationMode} \
+                ${optionalString (elem "ABAC" cfg.authorizationMode)
+                  "--authorization-policy-file=${
+                    pkgs.writeText "kube-auth-policy.jsonl"
+                    (concatMapStringsSep "\n" (l: builtins.toJSON l) cfg.authorizationPolicy)
+                  }"
+                } \
+                ${optionalString (elem "Webhook" cfg.authorizationMode)
+                  "--authorization-webhook-config-file=${cfg.webhookConfig}"
+                } \
+              --bind-address=${cfg.bindAddress} \
+              ${optionalString (cfg.advertiseAddress != null)
+                "--advertise-address=${cfg.advertiseAddress}"} \
+              ${optionalString (cfg.clientCaFile != null)
+                "--client-ca-file=${cfg.clientCaFile}"} \
+              --disable-admission-plugins=${concatStringsSep "," cfg.disableAdmissionPlugins} \
+              --enable-admission-plugins=${concatStringsSep "," cfg.enableAdmissionPlugins} \
+              --etcd-servers=${concatStringsSep "," cfg.etcd.servers} \
+              ${optionalString (cfg.etcd.caFile != null)
+                "--etcd-cafile=${cfg.etcd.caFile}"} \
+              ${optionalString (cfg.etcd.certFile != null)
+                "--etcd-certfile=${cfg.etcd.certFile}"} \
+              ${optionalString (cfg.etcd.keyFile != null)
+                "--etcd-keyfile=${cfg.etcd.keyFile}"} \
+              ${optionalString (cfg.featureGates != [])
+                "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.featureGates}"} \
+              ${optionalString (cfg.basicAuthFile != null)
+                "--basic-auth-file=${cfg.basicAuthFile}"} \
+              ${optionalString (cfg.kubeletClientCaFile != null)
+                "--kubelet-certificate-authority=${cfg.kubeletClientCaFile}"} \
+              ${optionalString (cfg.kubeletClientCertFile != null)
+                "--kubelet-client-certificate=${cfg.kubeletClientCertFile}"} \
+              ${optionalString (cfg.kubeletClientKeyFile != null)
+                "--kubelet-client-key=${cfg.kubeletClientKeyFile}"} \
+              ${optionalString (cfg.preferredAddressTypes != null)
+                "--kubelet-preferred-address-types=${cfg.preferredAddressTypes}"} \
+              ${optionalString (cfg.proxyClientCertFile != null)
+                "--proxy-client-cert-file=${cfg.proxyClientCertFile}"} \
+              ${optionalString (cfg.proxyClientKeyFile != null)
+                "--proxy-client-key-file=${cfg.proxyClientKeyFile}"} \
+              --insecure-bind-address=${cfg.insecureBindAddress} \
+              --insecure-port=${toString cfg.insecurePort} \
+              ${optionalString (cfg.runtimeConfig != "")
+                "--runtime-config=${cfg.runtimeConfig}"} \
+              --secure-port=${toString cfg.securePort} \
+              --api-audiences=${toString cfg.apiAudiences} \
+              --service-account-issuer=${toString cfg.serviceAccountIssuer} \
+              --service-account-signing-key-file=${cfg.serviceAccountSigningKeyFile} \
+              --service-account-key-file=${cfg.serviceAccountKeyFile} \
+              --service-cluster-ip-range=${cfg.serviceClusterIpRange} \
+              --storage-backend=${cfg.storageBackend} \
+              ${optionalString (cfg.tlsCertFile != null)
+                "--tls-cert-file=${cfg.tlsCertFile}"} \
+              ${optionalString (cfg.tlsKeyFile != null)
+                "--tls-private-key-file=${cfg.tlsKeyFile}"} \
+              ${optionalString (cfg.tokenAuthFile != null)
+                "--token-auth-file=${cfg.tokenAuthFile}"} \
+              ${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \
+              ${cfg.extraOpts}
+            '';
+            WorkingDirectory = top.dataDir;
+            User = "kubernetes";
+            Group = "kubernetes";
+            AmbientCapabilities = "cap_net_bind_service";
+            Restart = "on-failure";
+            RestartSec = 5;
+          };
+
+          unitConfig = {
+            StartLimitIntervalSec = 0;
+          };
+        };
+
+        services.etcd = {
+          clientCertAuth = mkDefault true;
+          peerClientCertAuth = mkDefault true;
+          listenClientUrls = mkDefault ["https://0.0.0.0:2379"];
+          listenPeerUrls = mkDefault ["https://0.0.0.0:2380"];
+          advertiseClientUrls = mkDefault ["https://${top.masterAddress}:2379"];
+          initialCluster = mkDefault ["${top.masterAddress}=https://${top.masterAddress}:2380"];
+          name = mkDefault top.masterAddress;
+          initialAdvertisePeerUrls = mkDefault ["https://${top.masterAddress}:2380"];
+        };
+
+        services.kubernetes.addonManager.bootstrapAddons = mkIf isRBACEnabled {
+
+          apiserver-kubelet-api-admin-crb = {
+            apiVersion = "rbac.authorization.k8s.io/v1";
+            kind = "ClusterRoleBinding";
+            metadata = {
+              name = "system:kube-apiserver:kubelet-api-admin";
+            };
+            roleRef = {
+              apiGroup = "rbac.authorization.k8s.io";
+              kind = "ClusterRole";
+              name = "system:kubelet-api-admin";
+            };
+            subjects = [{
+              kind = "User";
+              name = "system:kube-apiserver";
+            }];
+          };
+
+        };
+
+      services.kubernetes.pki.certs = with top.lib; {
+        apiServer = mkCert {
+          name = "kube-apiserver";
+          CN = "kubernetes";
+          hosts = [
+                    "kubernetes.default.svc"
+                    "kubernetes.default.svc.${top.addons.dns.clusterDomain}"
+                    cfg.advertiseAddress
+                    top.masterAddress
+                    apiserverServiceIP
+                    "127.0.0.1"
+                  ] ++ cfg.extraSANs;
+          action = "systemctl restart kube-apiserver.service";
+        };
+        apiserverProxyClient = mkCert {
+          name = "kube-apiserver-proxy-client";
+          CN = "front-proxy-client";
+          action = "systemctl restart kube-apiserver.service";
+        };
+        apiserverKubeletClient = mkCert {
+          name = "kube-apiserver-kubelet-client";
+          CN = "system:kube-apiserver";
+          action = "systemctl restart kube-apiserver.service";
+        };
+        apiserverEtcdClient = mkCert {
+          name = "kube-apiserver-etcd-client";
+          CN = "etcd-client";
+          action = "systemctl restart kube-apiserver.service";
+        };
+        clusterAdmin = mkCert {
+          name = "cluster-admin";
+          CN = "cluster-admin";
+          fields = {
+            O = "system:masters";
+          };
+          privateKeyOwner = "root";
+        };
+        etcd = mkCert {
+          name = "etcd";
+          CN = top.masterAddress;
+          hosts = [
+                    "etcd.local"
+                    "etcd.${top.addons.dns.clusterDomain}"
+                    top.masterAddress
+                    cfg.advertiseAddress
+                  ];
+          privateKeyOwner = "etcd";
+          action = "systemctl restart etcd.service";
+        };
+      };
+
+    })
+
+  ];
+
+  meta.buildDocsInSandbox = false;
+}
diff --git a/nixos/modules/services/cluster/kubernetes/controller-manager.nix b/nixos/modules/services/cluster/kubernetes/controller-manager.nix
new file mode 100644
index 00000000000..7c317e94dee
--- /dev/null
+++ b/nixos/modules/services/cluster/kubernetes/controller-manager.nix
@@ -0,0 +1,176 @@
+{ config, lib, options, pkgs, ... }:
+
+with lib;
+
+let
+  top = config.services.kubernetes;
+  otop = options.services.kubernetes;
+  cfg = top.controllerManager;
+in
+{
+  imports = [
+    (mkRenamedOptionModule [ "services" "kubernetes" "controllerManager" "address" ] ["services" "kubernetes" "controllerManager" "bindAddress"])
+    (mkRenamedOptionModule [ "services" "kubernetes" "controllerManager" "port" ] ["services" "kubernetes" "controllerManager" "insecurePort"])
+  ];
+
+  ###### interface
+  options.services.kubernetes.controllerManager = with lib.types; {
+
+    allocateNodeCIDRs = mkOption {
+      description = "Whether to automatically allocate CIDR ranges for cluster nodes.";
+      default = true;
+      type = bool;
+    };
+
+    bindAddress = mkOption {
+      description = "Kubernetes controller manager listening address.";
+      default = "127.0.0.1";
+      type = str;
+    };
+
+    clusterCidr = mkOption {
+      description = "Kubernetes CIDR Range for Pods in cluster.";
+      default = top.clusterCidr;
+      defaultText = literalExpression "config.${otop.clusterCidr}";
+      type = str;
+    };
+
+    enable = mkEnableOption "Kubernetes controller manager";
+
+    extraOpts = mkOption {
+      description = "Kubernetes controller manager extra command line options.";
+      default = "";
+      type = separatedString " ";
+    };
+
+    featureGates = mkOption {
+      description = "List set of feature gates";
+      default = top.featureGates;
+      defaultText = literalExpression "config.${otop.featureGates}";
+      type = listOf str;
+    };
+
+    insecurePort = mkOption {
+      description = "Kubernetes controller manager insecure listening port.";
+      default = 0;
+      type = int;
+    };
+
+    kubeconfig = top.lib.mkKubeConfigOptions "Kubernetes controller manager";
+
+    leaderElect = mkOption {
+      description = "Whether to start leader election before executing main loop.";
+      type = bool;
+      default = true;
+    };
+
+    rootCaFile = mkOption {
+      description = ''
+        Kubernetes controller manager certificate authority file included in
+        service account's token secret.
+      '';
+      default = top.caFile;
+      defaultText = literalExpression "config.${otop.caFile}";
+      type = nullOr path;
+    };
+
+    securePort = mkOption {
+      description = "Kubernetes controller manager secure listening port.";
+      default = 10252;
+      type = int;
+    };
+
+    serviceAccountKeyFile = mkOption {
+      description = ''
+        Kubernetes controller manager PEM-encoded private RSA key file used to
+        sign service account tokens
+      '';
+      default = null;
+      type = nullOr path;
+    };
+
+    tlsCertFile = mkOption {
+      description = "Kubernetes controller-manager certificate file.";
+      default = null;
+      type = nullOr path;
+    };
+
+    tlsKeyFile = mkOption {
+      description = "Kubernetes controller-manager private key file.";
+      default = null;
+      type = nullOr path;
+    };
+
+    verbosity = mkOption {
+      description = ''
+        Optional glog verbosity level for logging statements. See
+        <link xlink:href="https://github.com/kubernetes/community/blob/master/contributors/devel/logging.md"/>
+      '';
+      default = null;
+      type = nullOr int;
+    };
+
+  };
+
+  ###### implementation
+  config = mkIf cfg.enable {
+    systemd.services.kube-controller-manager = {
+      description = "Kubernetes Controller Manager Service";
+      wantedBy = [ "kubernetes.target" ];
+      after = [ "kube-apiserver.service" ];
+      serviceConfig = {
+        RestartSec = "30s";
+        Restart = "on-failure";
+        Slice = "kubernetes.slice";
+        ExecStart = ''${top.package}/bin/kube-controller-manager \
+          --allocate-node-cidrs=${boolToString cfg.allocateNodeCIDRs} \
+          --bind-address=${cfg.bindAddress} \
+          ${optionalString (cfg.clusterCidr!=null)
+            "--cluster-cidr=${cfg.clusterCidr}"} \
+          ${optionalString (cfg.featureGates != [])
+            "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.featureGates}"} \
+          --kubeconfig=${top.lib.mkKubeConfig "kube-controller-manager" cfg.kubeconfig} \
+          --leader-elect=${boolToString cfg.leaderElect} \
+          ${optionalString (cfg.rootCaFile!=null)
+            "--root-ca-file=${cfg.rootCaFile}"} \
+          --port=${toString cfg.insecurePort} \
+          --secure-port=${toString cfg.securePort} \
+          ${optionalString (cfg.serviceAccountKeyFile!=null)
+            "--service-account-private-key-file=${cfg.serviceAccountKeyFile}"} \
+          ${optionalString (cfg.tlsCertFile!=null)
+            "--tls-cert-file=${cfg.tlsCertFile}"} \
+          ${optionalString (cfg.tlsKeyFile!=null)
+            "--tls-private-key-file=${cfg.tlsKeyFile}"} \
+          ${optionalString (elem "RBAC" top.apiserver.authorizationMode)
+            "--use-service-account-credentials"} \
+          ${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \
+          ${cfg.extraOpts}
+        '';
+        WorkingDirectory = top.dataDir;
+        User = "kubernetes";
+        Group = "kubernetes";
+      };
+      unitConfig = {
+        StartLimitIntervalSec = 0;
+      };
+      path = top.path;
+    };
+
+    services.kubernetes.pki.certs = with top.lib; {
+      controllerManager = mkCert {
+        name = "kube-controller-manager";
+        CN = "kube-controller-manager";
+        action = "systemctl restart kube-controller-manager.service";
+      };
+      controllerManagerClient = mkCert {
+        name = "kube-controller-manager-client";
+        CN = "system:kube-controller-manager";
+        action = "systemctl restart kube-controller-manager.service";
+      };
+    };
+
+    services.kubernetes.controllerManager.kubeconfig.server = mkDefault top.apiserverAddress;
+  };
+
+  meta.buildDocsInSandbox = false;
+}
diff --git a/nixos/modules/services/cluster/kubernetes/default.nix b/nixos/modules/services/cluster/kubernetes/default.nix
new file mode 100644
index 00000000000..35ec99d83c8
--- /dev/null
+++ b/nixos/modules/services/cluster/kubernetes/default.nix
@@ -0,0 +1,315 @@
+{ config, lib, options, pkgs, ... }:
+
+with lib;
+
+let
+  cfg = config.services.kubernetes;
+  opt = options.services.kubernetes;
+
+  defaultContainerdSettings = {
+    version = 2;
+    root = "/var/lib/containerd";
+    state = "/run/containerd";
+    oom_score = 0;
+
+    grpc = {
+      address = "/run/containerd/containerd.sock";
+    };
+
+    plugins."io.containerd.grpc.v1.cri" = {
+      sandbox_image = "pause:latest";
+
+      cni = {
+        bin_dir = "/opt/cni/bin";
+        max_conf_num = 0;
+      };
+
+      containerd.runtimes.runc = {
+        runtime_type = "io.containerd.runc.v2";
+        options.SystemdCgroup = true;
+      };
+    };
+  };
+
+  mkKubeConfig = name: conf: pkgs.writeText "${name}-kubeconfig" (builtins.toJSON {
+    apiVersion = "v1";
+    kind = "Config";
+    clusters = [{
+      name = "local";
+      cluster.certificate-authority = conf.caFile or cfg.caFile;
+      cluster.server = conf.server;
+    }];
+    users = [{
+      inherit name;
+      user = {
+        client-certificate = conf.certFile;
+        client-key = conf.keyFile;
+      };
+    }];
+    contexts = [{
+      context = {
+        cluster = "local";
+        user = name;
+      };
+      name = "local";
+    }];
+    current-context = "local";
+  });
+
+  caCert = secret "ca";
+
+  etcdEndpoints = ["https://${cfg.masterAddress}:2379"];
+
+  mkCert = { name, CN, hosts ? [], fields ? {}, action ? "",
+             privateKeyOwner ? "kubernetes" }: rec {
+    inherit name caCert CN hosts fields action;
+    cert = secret name;
+    key = secret "${name}-key";
+    privateKeyOptions = {
+      owner = privateKeyOwner;
+      group = "nogroup";
+      mode = "0600";
+      path = key;
+    };
+  };
+
+  secret = name: "${cfg.secretsPath}/${name}.pem";
+
+  mkKubeConfigOptions = prefix: {
+    server = mkOption {
+      description = "${prefix} kube-apiserver server address.";
+      type = types.str;
+    };
+
+    caFile = mkOption {
+      description = "${prefix} certificate authority file used to connect to kube-apiserver.";
+      type = types.nullOr types.path;
+      default = cfg.caFile;
+      defaultText = literalExpression "config.${opt.caFile}";
+    };
+
+    certFile = mkOption {
+      description = "${prefix} client certificate file used to connect to kube-apiserver.";
+      type = types.nullOr types.path;
+      default = null;
+    };
+
+    keyFile = mkOption {
+      description = "${prefix} client key file used to connect to kube-apiserver.";
+      type = types.nullOr types.path;
+      default = null;
+    };
+  };
+in {
+
+  imports = [
+    (mkRemovedOptionModule [ "services" "kubernetes" "addons" "dashboard" ] "Removed due to it being an outdated version")
+    (mkRemovedOptionModule [ "services" "kubernetes" "verbose" ] "")
+  ];
+
+  ###### interface
+
+  options.services.kubernetes = {
+    roles = mkOption {
+      description = ''
+        Kubernetes role that this machine should take.
+
+        Master role will enable etcd, apiserver, scheduler, controller manager
+        addon manager, flannel and proxy services.
+        Node role will enable flannel, docker, kubelet and proxy services.
+      '';
+      default = [];
+      type = types.listOf (types.enum ["master" "node"]);
+    };
+
+    package = mkOption {
+      description = "Kubernetes package to use.";
+      type = types.package;
+      default = pkgs.kubernetes;
+      defaultText = literalExpression "pkgs.kubernetes";
+    };
+
+    kubeconfig = mkKubeConfigOptions "Default kubeconfig";
+
+    apiserverAddress = mkOption {
+      description = ''
+        Clusterwide accessible address for the kubernetes apiserver,
+        including protocol and optional port.
+      '';
+      example = "https://kubernetes-apiserver.example.com:6443";
+      type = types.str;
+    };
+
+    caFile = mkOption {
+      description = "Default kubernetes certificate authority";
+      type = types.nullOr types.path;
+      default = null;
+    };
+
+    dataDir = mkOption {
+      description = "Kubernetes root directory for managing kubelet files.";
+      default = "/var/lib/kubernetes";
+      type = types.path;
+    };
+
+    easyCerts = mkOption {
+      description = "Automatically setup x509 certificates and keys for the entire cluster.";
+      default = false;
+      type = types.bool;
+    };
+
+    featureGates = mkOption {
+      description = "List set of feature gates.";
+      default = [];
+      type = types.listOf types.str;
+    };
+
+    masterAddress = mkOption {
+      description = "Clusterwide available network address or hostname for the kubernetes master server.";
+      example = "master.example.com";
+      type = types.str;
+    };
+
+    path = mkOption {
+      description = "Packages added to the services' PATH environment variable. Both the bin and sbin subdirectories of each package are added.";
+      type = types.listOf types.package;
+      default = [];
+    };
+
+    clusterCidr = mkOption {
+      description = "Kubernetes controller manager and proxy CIDR Range for Pods in cluster.";
+      default = "10.1.0.0/16";
+      type = types.nullOr types.str;
+    };
+
+    lib = mkOption {
+      description = "Common functions for the kubernetes modules.";
+      default = {
+        inherit mkCert;
+        inherit mkKubeConfig;
+        inherit mkKubeConfigOptions;
+      };
+      type = types.attrs;
+    };
+
+    secretsPath = mkOption {
+      description = "Default location for kubernetes secrets. Not a store location.";
+      type = types.path;
+      default = cfg.dataDir + "/secrets";
+      defaultText = literalExpression ''
+        config.${opt.dataDir} + "/secrets"
+      '';
+    };
+  };
+
+  ###### implementation
+
+  config = mkMerge [
+
+    (mkIf cfg.easyCerts {
+      services.kubernetes.pki.enable = mkDefault true;
+      services.kubernetes.caFile = caCert;
+    })
+
+    (mkIf (elem "master" cfg.roles) {
+      services.kubernetes.apiserver.enable = mkDefault true;
+      services.kubernetes.scheduler.enable = mkDefault true;
+      services.kubernetes.controllerManager.enable = mkDefault true;
+      services.kubernetes.addonManager.enable = mkDefault true;
+      services.kubernetes.proxy.enable = mkDefault true;
+      services.etcd.enable = true; # Cannot mkDefault because of flannel default options
+      services.kubernetes.kubelet = {
+        enable = mkDefault true;
+        taints = mkIf (!(elem "node" cfg.roles)) {
+          master = {
+            key = "node-role.kubernetes.io/master";
+            value = "true";
+            effect = "NoSchedule";
+          };
+        };
+      };
+    })
+
+
+    (mkIf (all (el: el == "master") cfg.roles) {
+      # if this node is only a master make it unschedulable by default
+      services.kubernetes.kubelet.unschedulable = mkDefault true;
+    })
+
+    (mkIf (elem "node" cfg.roles) {
+      services.kubernetes.kubelet.enable = mkDefault true;
+      services.kubernetes.proxy.enable = mkDefault true;
+    })
+
+    # Using "services.kubernetes.roles" will automatically enable easyCerts and flannel
+    (mkIf (cfg.roles != []) {
+      services.kubernetes.flannel.enable = mkDefault true;
+      services.flannel.etcd.endpoints = mkDefault etcdEndpoints;
+      services.kubernetes.easyCerts = mkDefault true;
+    })
+
+    (mkIf cfg.apiserver.enable {
+      services.kubernetes.pki.etcClusterAdminKubeconfig = mkDefault "kubernetes/cluster-admin.kubeconfig";
+      services.kubernetes.apiserver.etcd.servers = mkDefault etcdEndpoints;
+    })
+
+    (mkIf cfg.kubelet.enable {
+      virtualisation.containerd = {
+        enable = mkDefault true;
+        settings = mapAttrsRecursive (name: mkDefault) defaultContainerdSettings;
+      };
+    })
+
+    (mkIf (cfg.apiserver.enable || cfg.controllerManager.enable) {
+      services.kubernetes.pki.certs = {
+        serviceAccount = mkCert {
+          name = "service-account";
+          CN = "system:service-account-signer";
+          action = ''
+            systemctl reload \
+              kube-apiserver.service \
+              kube-controller-manager.service
+          '';
+        };
+      };
+    })
+
+    (mkIf (
+        cfg.apiserver.enable ||
+        cfg.scheduler.enable ||
+        cfg.controllerManager.enable ||
+        cfg.kubelet.enable ||
+        cfg.proxy.enable ||
+        cfg.addonManager.enable
+    ) {
+      systemd.targets.kubernetes = {
+        description = "Kubernetes";
+        wantedBy = [ "multi-user.target" ];
+      };
+
+      systemd.tmpfiles.rules = [
+        "d /opt/cni/bin 0755 root root -"
+        "d /run/kubernetes 0755 kubernetes kubernetes -"
+        "d /var/lib/kubernetes 0755 kubernetes kubernetes -"
+      ];
+
+      users.users.kubernetes = {
+        uid = config.ids.uids.kubernetes;
+        description = "Kubernetes user";
+        group = "kubernetes";
+        home = cfg.dataDir;
+        createHome = true;
+      };
+      users.groups.kubernetes.gid = config.ids.gids.kubernetes;
+
+      # dns addon is enabled by default
+      services.kubernetes.addons.dns.enable = mkDefault true;
+
+      services.kubernetes.apiserverAddress = mkDefault ("https://${if cfg.apiserver.advertiseAddress != null
+                          then cfg.apiserver.advertiseAddress
+                          else "${cfg.masterAddress}:${toString cfg.apiserver.securePort}"}");
+    })
+  ];
+
+  meta.buildDocsInSandbox = false;
+}
diff --git a/nixos/modules/services/cluster/kubernetes/flannel.nix b/nixos/modules/services/cluster/kubernetes/flannel.nix
new file mode 100644
index 00000000000..cb81eaaf016
--- /dev/null
+++ b/nixos/modules/services/cluster/kubernetes/flannel.nix
@@ -0,0 +1,100 @@
+{ config, lib, pkgs, ... }:
+
+with lib;
+
+let
+  top = config.services.kubernetes;
+  cfg = top.flannel;
+
+  # we want flannel to use kubernetes itself as configuration backend, not direct etcd
+  storageBackend = "kubernetes";
+in
+{
+  ###### interface
+  options.services.kubernetes.flannel = {
+    enable = mkEnableOption "enable flannel networking";
+  };
+
+  ###### implementation
+  config = mkIf cfg.enable {
+    services.flannel = {
+
+      enable = mkDefault true;
+      network = mkDefault top.clusterCidr;
+      inherit storageBackend;
+      nodeName = config.services.kubernetes.kubelet.hostname;
+    };
+
+    services.kubernetes.kubelet = {
+      networkPlugin = mkDefault "cni";
+      cni.config = mkDefault [{
+        name = "mynet";
+        type = "flannel";
+        cniVersion = "0.3.1";
+        delegate = {
+          isDefaultGateway = true;
+          bridge = "mynet";
+        };
+      }];
+    };
+
+    networking = {
+      firewall.allowedUDPPorts = [
+        8285  # flannel udp
+        8472  # flannel vxlan
+      ];
+      dhcpcd.denyInterfaces = [ "mynet*" "flannel*" ];
+    };
+
+    services.kubernetes.pki.certs = {
+      flannelClient = top.lib.mkCert {
+        name = "flannel-client";
+        CN = "flannel-client";
+        action = "systemctl restart flannel.service";
+      };
+    };
+
+    # give flannel som kubernetes rbac permissions if applicable
+    services.kubernetes.addonManager.bootstrapAddons = mkIf ((storageBackend == "kubernetes") && (elem "RBAC" top.apiserver.authorizationMode)) {
+
+      flannel-cr = {
+        apiVersion = "rbac.authorization.k8s.io/v1";
+        kind = "ClusterRole";
+        metadata = { name = "flannel"; };
+        rules = [{
+          apiGroups = [ "" ];
+          resources = [ "pods" ];
+          verbs = [ "get" ];
+        }
+        {
+          apiGroups = [ "" ];
+          resources = [ "nodes" ];
+          verbs = [ "list" "watch" ];
+        }
+        {
+          apiGroups = [ "" ];
+          resources = [ "nodes/status" ];
+          verbs = [ "patch" ];
+        }];
+      };
+
+      flannel-crb = {
+        apiVersion = "rbac.authorization.k8s.io/v1";
+        kind = "ClusterRoleBinding";
+        metadata = { name = "flannel"; };
+        roleRef = {
+          apiGroup = "rbac.authorization.k8s.io";
+          kind = "ClusterRole";
+          name = "flannel";
+        };
+        subjects = [{
+          kind = "User";
+          name = "flannel-client";
+        }];
+      };
+
+    };
+  };
+
+  meta.buildDocsInSandbox = false;
+}
diff --git a/nixos/modules/services/cluster/kubernetes/kubelet.nix b/nixos/modules/services/cluster/kubernetes/kubelet.nix
new file mode 100644
index 00000000000..af3a5062feb
--- /dev/null
+++ b/nixos/modules/services/cluster/kubernetes/kubelet.nix
@@ -0,0 +1,398 @@
+{ config, lib, options, pkgs, ... }:
+
+with lib;
+
+let
+  top = config.services.kubernetes;
+  otop = options.services.kubernetes;
+  cfg = top.kubelet;
+
+  cniConfig =
+    if cfg.cni.config != [] && cfg.cni.configDir != null then
+      throw "Verbatim CNI-config and CNI configDir cannot both be set."
+    else if cfg.cni.configDir != null then
+      cfg.cni.configDir
+    else
+      (pkgs.buildEnv {
+        name = "kubernetes-cni-config";
+        paths = imap (i: entry:
+          pkgs.writeTextDir "${toString (10+i)}-${entry.type}.conf" (builtins.toJSON entry)
+        ) cfg.cni.config;
+      });
+
+  infraContainer = pkgs.dockerTools.buildImage {
+    name = "pause";
+    tag = "latest";
+    contents = top.package.pause;
+    config.Cmd = ["/bin/pause"];
+  };
+
+  kubeconfig = top.lib.mkKubeConfig "kubelet" cfg.kubeconfig;
+
+  manifestPath = "kubernetes/manifests";
+
+  taintOptions = with lib.types; { name, ... }: {
+    options = {
+      key = mkOption {
+        description = "Key of taint.";
+        default = name;
+        defaultText = literalDocBook "Name of this submodule.";
+        type = str;
+      };
+      value = mkOption {
+        description = "Value of taint.";
+        type = str;
+      };
+      effect = mkOption {
+        description = "Effect of taint.";
+        example = "NoSchedule";
+        type = enum ["NoSchedule" "PreferNoSchedule" "NoExecute"];
+      };
+    };
+  };
+
+  taints = concatMapStringsSep "," (v: "${v.key}=${v.value}:${v.effect}") (mapAttrsToList (n: v: v) cfg.taints);
+in
+{
+  imports = [
+    (mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "applyManifests" ] "")
+    (mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "cadvisorPort" ] "")
+    (mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "allowPrivileged" ] "")
+  ];
+
+  ###### interface
+  options.services.kubernetes.kubelet = with lib.types; {
+
+    address = mkOption {
+      description = "Kubernetes kubelet info server listening address.";
+      default = "0.0.0.0";
+      type = str;
+    };
+
+    clusterDns = mkOption {
+      description = "Use alternative DNS.";
+      default = "10.1.0.1";
+      type = str;
+    };
+
+    clusterDomain = mkOption {
+      description = "Use alternative domain.";
+      default = config.services.kubernetes.addons.dns.clusterDomain;
+      defaultText = literalExpression "config.${options.services.kubernetes.addons.dns.clusterDomain}";
+      type = str;
+    };
+
+    clientCaFile = mkOption {
+      description = "Kubernetes apiserver CA file for client authentication.";
+      default = top.caFile;
+      defaultText = literalExpression "config.${otop.caFile}";
+      type = nullOr path;
+    };
+
+    cni = {
+      packages = mkOption {
+        description = "List of network plugin packages to install.";
+        type = listOf package;
+        default = [];
+      };
+
+      config = mkOption {
+        description = "Kubernetes CNI configuration.";
+        type = listOf attrs;
+        default = [];
+        example = literalExpression ''
+          [{
+            "cniVersion": "0.3.1",
+            "name": "mynet",
+            "type": "bridge",
+            "bridge": "cni0",
+            "isGateway": true,
+            "ipMasq": true,
+            "ipam": {
+                "type": "host-local",
+                "subnet": "10.22.0.0/16",
+                "routes": [
+                    { "dst": "0.0.0.0/0" }
+                ]
+            }
+          } {
+            "cniVersion": "0.3.1",
+            "type": "loopback"
+          }]
+        '';
+      };
+
+      configDir = mkOption {
+        description = "Path to Kubernetes CNI configuration directory.";
+        type = nullOr path;
+        default = null;
+      };
+    };
+
+    containerRuntime = mkOption {
+      description = "Which container runtime type to use";
+      type = enum ["docker" "remote"];
+      default = "remote";
+    };
+
+    containerRuntimeEndpoint = mkOption {
+      description = "Endpoint at which to find the container runtime api interface/socket";
+      type = str;
+      default = "unix:///run/containerd/containerd.sock";
+    };
+
+    enable = mkEnableOption "Kubernetes kubelet.";
+
+    extraOpts = mkOption {
+      description = "Kubernetes kubelet extra command line options.";
+      default = "";
+      type = separatedString " ";
+    };
+
+    featureGates = mkOption {
+      description = "List set of feature gates";
+      default = top.featureGates;
+      defaultText = literalExpression "config.${otop.featureGates}";
+      type = listOf str;
+    };
+
+    healthz = {
+      bind = mkOption {
+        description = "Kubernetes kubelet healthz listening address.";
+        default = "127.0.0.1";
+        type = str;
+      };
+
+      port = mkOption {
+        description = "Kubernetes kubelet healthz port.";
+        default = 10248;
+        type = int;
+      };
+    };
+
+    hostname = mkOption {
+      description = "Kubernetes kubelet hostname override.";
+      default = config.networking.hostName;
+      defaultText = literalExpression "config.networking.hostName";
+      type = str;
+    };
+
+    kubeconfig = top.lib.mkKubeConfigOptions "Kubelet";
+
+    manifests = mkOption {
+      description = "List of manifests to bootstrap with kubelet (only pods can be created as manifest entry)";
+      type = attrsOf attrs;
+      default = {};
+    };
+
+    networkPlugin = mkOption {
+      description = "Network plugin to use by Kubernetes.";
+      type = nullOr (enum ["cni" "kubenet"]);
+      default = "kubenet";
+    };
+
+    nodeIp = mkOption {
+      description = "IP address of the node. If set, kubelet will use this IP address for the node.";
+      default = null;
+      type = nullOr str;
+    };
+
+    registerNode = mkOption {
+      description = "Whether to auto register kubelet with API server.";
+      default = true;
+      type = bool;
+    };
+
+    port = mkOption {
+      description = "Kubernetes kubelet info server listening port.";
+      default = 10250;
+      type = int;
+    };
+
+    seedDockerImages = mkOption {
+      description = "List of docker images to preload on system";
+      default = [];
+      type = listOf package;
+    };
+
+    taints = mkOption {
+      description = "Node taints (https://kubernetes.io/docs/concepts/configuration/assign-pod-node/).";
+      default = {};
+      type = attrsOf (submodule [ taintOptions ]);
+    };
+
+    tlsCertFile = mkOption {
+      description = "File containing x509 Certificate for HTTPS.";
+      default = null;
+      type = nullOr path;
+    };
+
+    tlsKeyFile = mkOption {
+      description = "File containing x509 private key matching tlsCertFile.";
+      default = null;
+      type = nullOr path;
+    };
+
+    unschedulable = mkOption {
+      description = "Whether to set node taint to unschedulable=true as it is the case of node that has only master role.";
+      default = false;
+      type = bool;
+    };
+
+    verbosity = mkOption {
+      description = ''
+        Optional glog verbosity level for logging statements. See
+        <link xlink:href="https://github.com/kubernetes/community/blob/master/contributors/devel/logging.md"/>
+      '';
+      default = null;
+      type = nullOr int;
+    };
+
+  };
+
+  ###### implementation
+  config = mkMerge [
+    (mkIf cfg.enable {
+
+      environment.etc."cni/net.d".source = cniConfig;
+
+      services.kubernetes.kubelet.seedDockerImages = [infraContainer];
+
+      boot.kernel.sysctl = {
+        "net.bridge.bridge-nf-call-iptables"  = 1;
+        "net.ipv4.ip_forward"                 = 1;
+        "net.bridge.bridge-nf-call-ip6tables" = 1;
+      };
+
+      systemd.services.kubelet = {
+        description = "Kubernetes Kubelet Service";
+        wantedBy = [ "kubernetes.target" ];
+        after = [ "containerd.service" "network.target" "kube-apiserver.service" ];
+        path = with pkgs; [
+          gitMinimal
+          openssh
+          util-linux
+          iproute2
+          ethtool
+          thin-provisioning-tools
+          iptables
+          socat
+        ] ++ lib.optional config.boot.zfs.enabled config.boot.zfs.package ++ top.path;
+        preStart = ''
+          ${concatMapStrings (img: ''
+            echo "Seeding container image: ${img}"
+            ${if (lib.hasSuffix "gz" img) then
+              ''${pkgs.gzip}/bin/zcat "${img}" | ${pkgs.containerd}/bin/ctr -n k8s.io image import --all-platforms -''
+            else
+              ''${pkgs.coreutils}/bin/cat "${img}" | ${pkgs.containerd}/bin/ctr -n k8s.io image import --all-platforms -''
+            }
+          '') cfg.seedDockerImages}
+
+          rm /opt/cni/bin/* || true
+          ${concatMapStrings (package: ''
+            echo "Linking cni package: ${package}"
+            ln -fs ${package}/bin/* /opt/cni/bin
+          '') cfg.cni.packages}
+        '';
+        serviceConfig = {
+          Slice = "kubernetes.slice";
+          CPUAccounting = true;
+          MemoryAccounting = true;
+          Restart = "on-failure";
+          RestartSec = "1000ms";
+          ExecStart = ''${top.package}/bin/kubelet \
+            --address=${cfg.address} \
+            --authentication-token-webhook \
+            --authentication-token-webhook-cache-ttl="10s" \
+            --authorization-mode=Webhook \
+            ${optionalString (cfg.clientCaFile != null)
+              "--client-ca-file=${cfg.clientCaFile}"} \
+            ${optionalString (cfg.clusterDns != "")
+              "--cluster-dns=${cfg.clusterDns}"} \
+            ${optionalString (cfg.clusterDomain != "")
+              "--cluster-domain=${cfg.clusterDomain}"} \
+            --cni-conf-dir=${cniConfig} \
+            ${optionalString (cfg.featureGates != [])
+              "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.featureGates}"} \
+            --hairpin-mode=hairpin-veth \
+            --healthz-bind-address=${cfg.healthz.bind} \
+            --healthz-port=${toString cfg.healthz.port} \
+            --hostname-override=${cfg.hostname} \
+            --kubeconfig=${kubeconfig} \
+            ${optionalString (cfg.networkPlugin != null)
+              "--network-plugin=${cfg.networkPlugin}"} \
+            ${optionalString (cfg.nodeIp != null)
+              "--node-ip=${cfg.nodeIp}"} \
+            --pod-infra-container-image=pause \
+            ${optionalString (cfg.manifests != {})
+              "--pod-manifest-path=/etc/${manifestPath}"} \
+            --port=${toString cfg.port} \
+            --register-node=${boolToString cfg.registerNode} \
+            ${optionalString (taints != "")
+              "--register-with-taints=${taints}"} \
+            --root-dir=${top.dataDir} \
+            ${optionalString (cfg.tlsCertFile != null)
+              "--tls-cert-file=${cfg.tlsCertFile}"} \
+            ${optionalString (cfg.tlsKeyFile != null)
+              "--tls-private-key-file=${cfg.tlsKeyFile}"} \
+            ${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \
+            --container-runtime=${cfg.containerRuntime} \
+            --container-runtime-endpoint=${cfg.containerRuntimeEndpoint} \
+            --cgroup-driver=systemd \
+            ${cfg.extraOpts}
+          '';
+          WorkingDirectory = top.dataDir;
+        };
+        unitConfig = {
+          StartLimitIntervalSec = 0;
+        };
+      };
+
+      # Allways include cni plugins
+      services.kubernetes.kubelet.cni.packages = [pkgs.cni-plugins pkgs.cni-plugin-flannel];
+
+      boot.kernelModules = ["br_netfilter" "overlay"];
+
+      services.kubernetes.kubelet.hostname = with config.networking;
+        mkDefault (hostName + optionalString (domain != null) ".${domain}");
+
+      services.kubernetes.pki.certs = with top.lib; {
+        kubelet = mkCert {
+          name = "kubelet";
+          CN = top.kubelet.hostname;
+          action = "systemctl restart kubelet.service";
+
+        };
+        kubeletClient = mkCert {
+          name = "kubelet-client";
+          CN = "system:node:${top.kubelet.hostname}";
+          fields = {
+            O = "system:nodes";
+          };
+          action = "systemctl restart kubelet.service";
+        };
+      };
+
+      services.kubernetes.kubelet.kubeconfig.server = mkDefault top.apiserverAddress;
+    })
+
+    (mkIf (cfg.enable && cfg.manifests != {}) {
+      environment.etc = mapAttrs' (name: manifest:
+        nameValuePair "${manifestPath}/${name}.json" {
+          text = builtins.toJSON manifest;
+          mode = "0755";
+        }
+      ) cfg.manifests;
+    })
+
+    (mkIf (cfg.unschedulable && cfg.enable) {
+      services.kubernetes.kubelet.taints.unschedulable = {
+        value = "true";
+        effect = "NoSchedule";
+      };
+    })
+
+  ];
+
+  meta.buildDocsInSandbox = false;
+}
diff --git a/nixos/modules/services/cluster/kubernetes/pki.nix b/nixos/modules/services/cluster/kubernetes/pki.nix
new file mode 100644
index 00000000000..7d9198d20e8
--- /dev/null
+++ b/nixos/modules/services/cluster/kubernetes/pki.nix
@@ -0,0 +1,406 @@
+{ config, lib, pkgs, ... }:
+
+with lib;
+
+let
+  top = config.services.kubernetes;
+  cfg = top.pki;
+
+  csrCA = pkgs.writeText "kube-pki-cacert-csr.json" (builtins.toJSON {
+    key = {
+        algo = "rsa";
+        size = 2048;
+    };
+    names = singleton cfg.caSpec;
+  });
+
+  csrCfssl = pkgs.writeText "kube-pki-cfssl-csr.json" (builtins.toJSON {
+    key = {
+        algo = "rsa";
+        size = 2048;
+    };
+    CN = top.masterAddress;
+    hosts = [top.masterAddress] ++ cfg.cfsslAPIExtraSANs;
+  });
+
+  cfsslAPITokenBaseName = "apitoken.secret";
+  cfsslAPITokenPath = "${config.services.cfssl.dataDir}/${cfsslAPITokenBaseName}";
+  certmgrAPITokenPath = "${top.secretsPath}/${cfsslAPITokenBaseName}";
+  cfsslAPITokenLength = 32;
+
+  clusterAdminKubeconfig = with cfg.certs.clusterAdmin;
+    top.lib.mkKubeConfig "cluster-admin" {
+        server = top.apiserverAddress;
+        certFile = cert;
+        keyFile = key;
+    };
+
+  remote = with config.services; "https://${kubernetes.masterAddress}:${toString cfssl.port}";
+in
+{
+  ###### interface
+  options.services.kubernetes.pki = with lib.types; {
+
+    enable = mkEnableOption "easyCert issuer service";
+
+    certs = mkOption {
+      description = "List of certificate specs to feed to cert generator.";
+      default = {};
+      type = attrs;
+    };
+
+    genCfsslCACert = mkOption {
+      description = ''
+        Whether to automatically generate cfssl CA certificate and key,
+        if they don't exist.
+      '';
+      default = true;
+      type = bool;
+    };
+
+    genCfsslAPICerts = mkOption {
+      description = ''
+        Whether to automatically generate cfssl API webserver TLS cert and key,
+        if they don't exist.
+      '';
+      default = true;
+      type = bool;
+    };
+
+    cfsslAPIExtraSANs = mkOption {
+      description = ''
+        Extra x509 Subject Alternative Names to be added to the cfssl API webserver TLS cert.
+      '';
+      default = [];
+      example = [ "subdomain.example.com" ];
+      type = listOf str;
+    };
+
+    genCfsslAPIToken = mkOption {
+      description = ''
+        Whether to automatically generate cfssl API-token secret,
+        if they doesn't exist.
+      '';
+      default = true;
+      type = bool;
+    };
+
+    pkiTrustOnBootstrap = mkOption {
+      description = "Whether to always trust remote cfssl server upon initial PKI bootstrap.";
+      default = true;
+      type = bool;
+    };
+
+    caCertPathPrefix = mkOption {
+      description = ''
+        Path-prefrix for the CA-certificate to be used for cfssl signing.
+        Suffixes ".pem" and "-key.pem" will be automatically appended for
+        the public and private keys respectively.
+      '';
+      default = "${config.services.cfssl.dataDir}/ca";
+      defaultText = literalExpression ''"''${config.services.cfssl.dataDir}/ca"'';
+      type = str;
+    };
+
+    caSpec = mkOption {
+      description = "Certificate specification for the auto-generated CAcert.";
+      default = {
+        CN = "kubernetes-cluster-ca";
+        O = "NixOS";
+        OU = "services.kubernetes.pki.caSpec";
+        L = "auto-generated";
+      };
+      type = attrs;
+    };
+
+    etcClusterAdminKubeconfig = mkOption {
+      description = ''
+        Symlink a kubeconfig with cluster-admin privileges to environment path
+        (/etc/&lt;path&gt;).
+      '';
+      default = null;
+      type = nullOr str;
+    };
+
+  };
+
+  ###### implementation
+  config = mkIf cfg.enable
+  (let
+    cfsslCertPathPrefix = "${config.services.cfssl.dataDir}/cfssl";
+    cfsslCert = "${cfsslCertPathPrefix}.pem";
+    cfsslKey = "${cfsslCertPathPrefix}-key.pem";
+  in
+  {
+
+    services.cfssl = mkIf (top.apiserver.enable) {
+      enable = true;
+      address = "0.0.0.0";
+      tlsCert = cfsslCert;
+      tlsKey = cfsslKey;
+      configFile = toString (pkgs.writeText "cfssl-config.json" (builtins.toJSON {
+        signing = {
+          profiles = {
+            default = {
+              usages = ["digital signature"];
+              auth_key = "default";
+              expiry = "720h";
+            };
+          };
+        };
+        auth_keys = {
+          default = {
+            type = "standard";
+            key = "file:${cfsslAPITokenPath}";
+          };
+        };
+      }));
+    };
+
+    systemd.services.cfssl.preStart = with pkgs; with config.services.cfssl; mkIf (top.apiserver.enable)
+    (concatStringsSep "\n" [
+      "set -e"
+      (optionalString cfg.genCfsslCACert ''
+        if [ ! -f "${cfg.caCertPathPrefix}.pem" ]; then
+          ${cfssl}/bin/cfssl genkey -initca ${csrCA} | \
+            ${cfssl}/bin/cfssljson -bare ${cfg.caCertPathPrefix}
+        fi
+      '')
+      (optionalString cfg.genCfsslAPICerts ''
+        if [ ! -f "${dataDir}/cfssl.pem" ]; then
+          ${cfssl}/bin/cfssl gencert -ca "${cfg.caCertPathPrefix}.pem" -ca-key "${cfg.caCertPathPrefix}-key.pem" ${csrCfssl} | \
+            ${cfssl}/bin/cfssljson -bare ${cfsslCertPathPrefix}
+        fi
+      '')
+      (optionalString cfg.genCfsslAPIToken ''
+        if [ ! -f "${cfsslAPITokenPath}" ]; then
+          head -c ${toString (cfsslAPITokenLength / 2)} /dev/urandom | od -An -t x | tr -d ' ' >"${cfsslAPITokenPath}"
+        fi
+        chown cfssl "${cfsslAPITokenPath}" && chmod 400 "${cfsslAPITokenPath}"
+      '')]);
+
+    systemd.services.kube-certmgr-bootstrap = {
+      description = "Kubernetes certmgr bootstrapper";
+      wantedBy = [ "certmgr.service" ];
+      after = [ "cfssl.target" ];
+      script = concatStringsSep "\n" [''
+        set -e
+
+        # If there's a cfssl (cert issuer) running locally, then don't rely on user to
+        # manually paste it in place. Just symlink.
+        # otherwise, create the target file, ready for users to insert the token
+
+        mkdir -p "$(dirname "${certmgrAPITokenPath}")"
+        if [ -f "${cfsslAPITokenPath}" ]; then
+          ln -fs "${cfsslAPITokenPath}" "${certmgrAPITokenPath}"
+        else
+          touch "${certmgrAPITokenPath}" && chmod 600 "${certmgrAPITokenPath}"
+        fi
+      ''
+      (optionalString (cfg.pkiTrustOnBootstrap) ''
+        if [ ! -f "${top.caFile}" ] || [ $(cat "${top.caFile}" | wc -c) -lt 1 ]; then
+          ${pkgs.curl}/bin/curl --fail-early -f -kd '{}' ${remote}/api/v1/cfssl/info | \
+            ${pkgs.cfssl}/bin/cfssljson -stdout >${top.caFile}
+        fi
+      '')
+      ];
+      serviceConfig = {
+        RestartSec = "10s";
+        Restart = "on-failure";
+      };
+    };
+
+    services.certmgr = {
+      enable = true;
+      package = pkgs.certmgr-selfsigned;
+      svcManager = "command";
+      specs =
+        let
+          mkSpec = _: cert: {
+            inherit (cert) action;
+            authority = {
+              inherit remote;
+              file.path = cert.caCert;
+              root_ca = cert.caCert;
+              profile = "default";
+              auth_key_file = certmgrAPITokenPath;
+            };
+            certificate = {
+              path = cert.cert;
+            };
+            private_key = cert.privateKeyOptions;
+            request = {
+              hosts = [cert.CN] ++ cert.hosts;
+              inherit (cert) CN;
+              key = {
+                algo = "rsa";
+                size = 2048;
+              };
+              names = [ cert.fields ];
+            };
+          };
+        in
+          mapAttrs mkSpec cfg.certs;
+      };
+
+      #TODO: Get rid of kube-addon-manager in the future for the following reasons
+      # - it is basically just a shell script wrapped around kubectl
+      # - it assumes that it is clusterAdmin or can gain clusterAdmin rights through serviceAccount
+      # - it is designed to be used with k8s system components only
+      # - it would be better with a more Nix-oriented way of managing addons
+      systemd.services.kube-addon-manager = mkIf top.addonManager.enable (mkMerge [{
+        environment.KUBECONFIG = with cfg.certs.addonManager;
+          top.lib.mkKubeConfig "addon-manager" {
+            server = top.apiserverAddress;
+            certFile = cert;
+            keyFile = key;
+          };
+        }
+
+        (optionalAttrs (top.addonManager.bootstrapAddons != {}) {
+          serviceConfig.PermissionsStartOnly = true;
+          preStart = with pkgs;
+          let
+            files = mapAttrsToList (n: v: writeText "${n}.json" (builtins.toJSON v))
+              top.addonManager.bootstrapAddons;
+          in
+          ''
+            export KUBECONFIG=${clusterAdminKubeconfig}
+            ${kubernetes}/bin/kubectl apply -f ${concatStringsSep " \\\n -f " files}
+          '';
+        })]);
+
+      environment.etc.${cfg.etcClusterAdminKubeconfig}.source = mkIf (!isNull cfg.etcClusterAdminKubeconfig)
+        clusterAdminKubeconfig;
+
+      environment.systemPackages = mkIf (top.kubelet.enable || top.proxy.enable) [
+      (pkgs.writeScriptBin "nixos-kubernetes-node-join" ''
+        set -e
+        exec 1>&2
+
+        if [ $# -gt 0 ]; then
+          echo "Usage: $(basename $0)"
+          echo ""
+          echo "No args. Apitoken must be provided on stdin."
+          echo "To get the apitoken, execute: 'sudo cat ${certmgrAPITokenPath}' on the master node."
+          exit 1
+        fi
+
+        if [ $(id -u) != 0 ]; then
+          echo "Run as root please."
+          exit 1
+        fi
+
+        read -r token
+        if [ ''${#token} != ${toString cfsslAPITokenLength} ]; then
+          echo "Token must be of length ${toString cfsslAPITokenLength}."
+          exit 1
+        fi
+
+        echo $token > ${certmgrAPITokenPath}
+        chmod 600 ${certmgrAPITokenPath}
+
+        echo "Restarting certmgr..." >&1
+        systemctl restart certmgr
+
+        echo "Waiting for certs to appear..." >&1
+
+        ${optionalString top.kubelet.enable ''
+          while [ ! -f ${cfg.certs.kubelet.cert} ]; do sleep 1; done
+          echo "Restarting kubelet..." >&1
+          systemctl restart kubelet
+        ''}
+
+        ${optionalString top.proxy.enable ''
+          while [ ! -f ${cfg.certs.kubeProxyClient.cert} ]; do sleep 1; done
+          echo "Restarting kube-proxy..." >&1
+          systemctl restart kube-proxy
+        ''}
+
+        ${optionalString top.flannel.enable ''
+          while [ ! -f ${cfg.certs.flannelClient.cert} ]; do sleep 1; done
+          echo "Restarting flannel..." >&1
+          systemctl restart flannel
+        ''}
+
+        echo "Node joined succesfully"
+      '')];
+
+      # isolate etcd on loopback at the master node
+      # easyCerts doesn't support multimaster clusters anyway atm.
+      services.etcd = with cfg.certs.etcd; {
+        listenClientUrls = ["https://127.0.0.1:2379"];
+        listenPeerUrls = ["https://127.0.0.1:2380"];
+        advertiseClientUrls = ["https://etcd.local:2379"];
+        initialCluster = ["${top.masterAddress}=https://etcd.local:2380"];
+        initialAdvertisePeerUrls = ["https://etcd.local:2380"];
+        certFile = mkDefault cert;
+        keyFile = mkDefault key;
+        trustedCaFile = mkDefault caCert;
+      };
+      networking.extraHosts = mkIf (config.services.etcd.enable) ''
+        127.0.0.1 etcd.${top.addons.dns.clusterDomain} etcd.local
+      '';
+
+      services.flannel = with cfg.certs.flannelClient; {
+        kubeconfig = top.lib.mkKubeConfig "flannel" {
+          server = top.apiserverAddress;
+          certFile = cert;
+          keyFile = key;
+        };
+      };
+
+      services.kubernetes = {
+
+        apiserver = mkIf top.apiserver.enable (with cfg.certs.apiServer; {
+          etcd = with cfg.certs.apiserverEtcdClient; {
+            servers = ["https://etcd.local:2379"];
+            certFile = mkDefault cert;
+            keyFile = mkDefault key;
+            caFile = mkDefault caCert;
+          };
+          clientCaFile = mkDefault caCert;
+          tlsCertFile = mkDefault cert;
+          tlsKeyFile = mkDefault key;
+          serviceAccountKeyFile = mkDefault cfg.certs.serviceAccount.cert;
+          serviceAccountSigningKeyFile = mkDefault cfg.certs.serviceAccount.key;
+          kubeletClientCaFile = mkDefault caCert;
+          kubeletClientCertFile = mkDefault cfg.certs.apiserverKubeletClient.cert;
+          kubeletClientKeyFile = mkDefault cfg.certs.apiserverKubeletClient.key;
+          proxyClientCertFile = mkDefault cfg.certs.apiserverProxyClient.cert;
+          proxyClientKeyFile = mkDefault cfg.certs.apiserverProxyClient.key;
+        });
+        controllerManager = mkIf top.controllerManager.enable {
+          serviceAccountKeyFile = mkDefault cfg.certs.serviceAccount.key;
+          rootCaFile = cfg.certs.controllerManagerClient.caCert;
+          kubeconfig = with cfg.certs.controllerManagerClient; {
+            certFile = mkDefault cert;
+            keyFile = mkDefault key;
+          };
+        };
+        scheduler = mkIf top.scheduler.enable {
+          kubeconfig = with cfg.certs.schedulerClient; {
+            certFile = mkDefault cert;
+            keyFile = mkDefault key;
+          };
+        };
+        kubelet = mkIf top.kubelet.enable {
+          clientCaFile = mkDefault cfg.certs.kubelet.caCert;
+          tlsCertFile = mkDefault cfg.certs.kubelet.cert;
+          tlsKeyFile = mkDefault cfg.certs.kubelet.key;
+          kubeconfig = with cfg.certs.kubeletClient; {
+            certFile = mkDefault cert;
+            keyFile = mkDefault key;
+          };
+        };
+        proxy = mkIf top.proxy.enable {
+          kubeconfig = with cfg.certs.kubeProxyClient; {
+            certFile = mkDefault cert;
+            keyFile = mkDefault key;
+          };
+        };
+      };
+    });
+
+  meta.buildDocsInSandbox = false;
+}
diff --git a/nixos/modules/services/cluster/kubernetes/proxy.nix b/nixos/modules/services/cluster/kubernetes/proxy.nix
new file mode 100644
index 00000000000..0fd98d1c157
--- /dev/null
+++ b/nixos/modules/services/cluster/kubernetes/proxy.nix
@@ -0,0 +1,102 @@
+{ config, lib, options, pkgs, ... }:
+
+with lib;
+
+let
+  top = config.services.kubernetes;
+  otop = options.services.kubernetes;
+  cfg = top.proxy;
+in
+{
+  imports = [
+    (mkRenamedOptionModule [ "services" "kubernetes" "proxy" "address" ] ["services" "kubernetes" "proxy" "bindAddress"])
+  ];
+
+  ###### interface
+  options.services.kubernetes.proxy = with lib.types; {
+
+    bindAddress = mkOption {
+      description = "Kubernetes proxy listening address.";
+      default = "0.0.0.0";
+      type = str;
+    };
+
+    enable = mkEnableOption "Kubernetes proxy";
+
+    extraOpts = mkOption {
+      description = "Kubernetes proxy extra command line options.";
+      default = "";
+      type = separatedString " ";
+    };
+
+    featureGates = mkOption {
+      description = "List set of feature gates";
+      default = top.featureGates;
+      defaultText = literalExpression "config.${otop.featureGates}";
+      type = listOf str;
+    };
+
+    hostname = mkOption {
+      description = "Kubernetes proxy hostname override.";
+      default = config.networking.hostName;
+      defaultText = literalExpression "config.networking.hostName";
+      type = str;
+    };
+
+    kubeconfig = top.lib.mkKubeConfigOptions "Kubernetes proxy";
+
+    verbosity = mkOption {
+      description = ''
+        Optional glog verbosity level for logging statements. See
+        <link xlink:href="https://github.com/kubernetes/community/blob/master/contributors/devel/logging.md"/>
+      '';
+      default = null;
+      type = nullOr int;
+    };
+
+  };
+
+  ###### implementation
+  config = mkIf cfg.enable {
+    systemd.services.kube-proxy = {
+      description = "Kubernetes Proxy Service";
+      wantedBy = [ "kubernetes.target" ];
+      after = [ "kube-apiserver.service" ];
+      path = with pkgs; [ iptables conntrack-tools ];
+      serviceConfig = {
+        Slice = "kubernetes.slice";
+        ExecStart = ''${top.package}/bin/kube-proxy \
+          --bind-address=${cfg.bindAddress} \
+          ${optionalString (top.clusterCidr!=null)
+            "--cluster-cidr=${top.clusterCidr}"} \
+          ${optionalString (cfg.featureGates != [])
+            "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.featureGates}"} \
+          --hostname-override=${cfg.hostname} \
+          --kubeconfig=${top.lib.mkKubeConfig "kube-proxy" cfg.kubeconfig} \
+          ${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \
+          ${cfg.extraOpts}
+        '';
+        WorkingDirectory = top.dataDir;
+        Restart = "on-failure";
+        RestartSec = 5;
+      };
+      unitConfig = {
+        StartLimitIntervalSec = 0;
+      };
+    };
+
+    services.kubernetes.proxy.hostname = with config.networking; mkDefault hostName;
+
+    services.kubernetes.pki.certs = {
+      kubeProxyClient = top.lib.mkCert {
+        name = "kube-proxy-client";
+        CN = "system:kube-proxy";
+        action = "systemctl restart kube-proxy.service";
+      };
+    };
+
+    services.kubernetes.proxy.kubeconfig.server = mkDefault top.apiserverAddress;
+  };
+
+  meta.buildDocsInSandbox = false;
+}
diff --git a/nixos/modules/services/cluster/kubernetes/scheduler.nix b/nixos/modules/services/cluster/kubernetes/scheduler.nix
new file mode 100644
index 00000000000..2d95528a6ea
--- /dev/null
+++ b/nixos/modules/services/cluster/kubernetes/scheduler.nix
@@ -0,0 +1,101 @@
+{ config, lib, options, pkgs, ... }:
+
+with lib;
+
+let
+  top = config.services.kubernetes;
+  otop = options.services.kubernetes;
+  cfg = top.scheduler;
+in
+{
+  ###### interface
+  options.services.kubernetes.scheduler = with lib.types; {
+
+    address = mkOption {
+      description = "Kubernetes scheduler listening address.";
+      default = "127.0.0.1";
+      type = str;
+    };
+
+    enable = mkEnableOption "Kubernetes scheduler";
+
+    extraOpts = mkOption {
+      description = "Kubernetes scheduler extra command line options.";
+      default = "";
+      type = separatedString " ";
+    };
+
+    featureGates = mkOption {
+      description = "List set of feature gates";
+      default = top.featureGates;
+      defaultText = literalExpression "config.${otop.featureGates}";
+      type = listOf str;
+    };
+
+    kubeconfig = top.lib.mkKubeConfigOptions "Kubernetes scheduler";
+
+    leaderElect = mkOption {
+      description = "Whether to start leader election before executing main loop.";
+      type = bool;
+      default = true;
+    };
+
+    port = mkOption {
+      description = "Kubernetes scheduler listening port.";
+      default = 10251;
+      type = int;
+    };
+
+    verbosity = mkOption {
+      description = ''
+        Optional glog verbosity level for logging statements. See
+        <link xlink:href="https://github.com/kubernetes/community/blob/master/contributors/devel/logging.md"/>
+      '';
+      default = null;
+      type = nullOr int;
+    };
+
+  };
+
+  ###### implementation
+  config = mkIf cfg.enable {
+    systemd.services.kube-scheduler = {
+      description = "Kubernetes Scheduler Service";
+      wantedBy = [ "kubernetes.target" ];
+      after = [ "kube-apiserver.service" ];
+      serviceConfig = {
+        Slice = "kubernetes.slice";
+        ExecStart = ''${top.package}/bin/kube-scheduler \
+          --bind-address=${cfg.address} \
+          ${optionalString (cfg.featureGates != [])
+            "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.featureGates}"} \
+          --kubeconfig=${top.lib.mkKubeConfig "kube-scheduler" cfg.kubeconfig} \
+          --leader-elect=${boolToString cfg.leaderElect} \
+          --secure-port=${toString cfg.port} \
+          ${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \
+          ${cfg.extraOpts}
+        '';
+        WorkingDirectory = top.dataDir;
+        User = "kubernetes";
+        Group = "kubernetes";
+        Restart = "on-failure";
+        RestartSec = 5;
+      };
+      unitConfig = {
+        StartLimitIntervalSec = 0;
+      };
+    };
+
+    services.kubernetes.pki.certs = {
+      schedulerClient = top.lib.mkCert {
+        name = "kube-scheduler-client";
+        CN = "system:kube-scheduler";
+        action = "systemctl restart kube-scheduler.service";
+      };
+    };
+
+    services.kubernetes.scheduler.kubeconfig.server = mkDefault top.apiserverAddress;
+  };
+
+  meta.buildDocsInSandbox = false;
+}
diff --git a/nixos/modules/services/cluster/pacemaker/default.nix b/nixos/modules/services/cluster/pacemaker/default.nix
new file mode 100644
index 00000000000..7eeadffcc58
--- /dev/null
+++ b/nixos/modules/services/cluster/pacemaker/default.nix
@@ -0,0 +1,52 @@
+{ config, lib, pkgs, ... }:
+
+with lib;
+let
+  cfg = config.services.pacemaker;
+in
+{
+  # interface
+  options.services.pacemaker = {
+    enable = mkEnableOption "pacemaker";
+
+    package = mkOption {
+      type = types.package;
+      default = pkgs.pacemaker;
+      defaultText = literalExpression "pkgs.pacemaker";
+      description = "Package that should be used for pacemaker.";
+    };
+  };
+
+  # implementation
+  config = mkIf cfg.enable {
+    assertions = [ {
+      assertion = config.services.corosync.enable;
+      message = ''
+        Enabling services.pacemaker requires a services.corosync configuration.
+      '';
+    } ];
+
+    environment.systemPackages = [ cfg.package ];
+
+    # required by pacemaker
+    users.users.hacluster = {
+      isSystemUser = true;
+      group = "pacemaker";
+      home = "/var/lib/pacemaker";
+    };
+    users.groups.pacemaker = {};
+
+    systemd.tmpfiles.rules = [
+      "d /var/log/pacemaker 0700 hacluster pacemaker -"
+    ];
+
+    systemd.packages = [ cfg.package ];
+    systemd.services.pacemaker = {
+      wantedBy = [ "multi-user.target" ];
+      serviceConfig = {
+        StateDirectory = "pacemaker";
+        StateDirectoryMode = "0700";
+      };
+    };
+  };
+}
diff --git a/nixos/modules/services/cluster/spark/default.nix b/nixos/modules/services/cluster/spark/default.nix
new file mode 100644
index 00000000000..e6b44e130a3
--- /dev/null
+++ b/nixos/modules/services/cluster/spark/default.nix
@@ -0,0 +1,162 @@
+{config, pkgs, lib, ...}:
+let
+  cfg = config.services.spark;
+in
+with lib;
+{
+  options = {
+    services.spark = {
+      master = {
+        enable = mkEnableOption "Spark master service";
+        bind = mkOption {
+          type = types.str;
+          description = "Address the spark master binds to.";
+          default = "127.0.0.1";
+          example = "0.0.0.0";
+        };
+        restartIfChanged  = mkOption {
+          type = types.bool;
+          description = ''
+            Automatically restart master service on config change.
+            This can be set to false to defer restarts on clusters running critical applications.
+            Please consider the security implications of inadvertently running an older version,
+            and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option.
+          '';
+          default = true;
+        };
+        extraEnvironment = mkOption {
+          type = types.attrsOf types.str;
+          description = "Extra environment variables to pass to spark master. See spark-standalone documentation.";
+          default = {};
+          example = {
+            SPARK_MASTER_WEBUI_PORT = 8181;
+            SPARK_MASTER_OPTS = "-Dspark.deploy.defaultCores=5";
+          };
+        };
+      };
+      worker = {
+        enable = mkEnableOption "Spark worker service";
+        workDir = mkOption {
+          type = types.path;
+          description = "Spark worker work dir.";
+          default = "/var/lib/spark";
+        };
+        master = mkOption {
+          type = types.str;
+          description = "Address of the spark master.";
+          default = "127.0.0.1:7077";
+        };
+        restartIfChanged  = mkOption {
+          type = types.bool;
+          description = ''
+            Automatically restart worker service on config change.
+            This can be set to false to defer restarts on clusters running critical applications.
+            Please consider the security implications of inadvertently running an older version,
+            and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option.
+          '';
+          default = true;
+        };
+        extraEnvironment = mkOption {
+          type = types.attrsOf types.str;
+          description = "Extra environment variables to pass to spark worker.";
+          default = {};
+          example = {
+            SPARK_WORKER_CORES = 5;
+            SPARK_WORKER_MEMORY = "2g";
+          };
+        };
+      };
+      confDir = mkOption {
+        type = types.path;
+        description = "Spark configuration directory. Spark will use the configuration files (spark-defaults.conf, spark-env.sh, log4j.properties, etc) from this directory.";
+        default = "${cfg.package}/lib/${cfg.package.untarDir}/conf";
+        defaultText = literalExpression ''"''${package}/lib/''${package.untarDir}/conf"'';
+      };
+      logDir = mkOption {
+        type = types.path;
+        description = "Spark log directory.";
+        default = "/var/log/spark";
+      };
+      package = mkOption {
+        type = types.package;
+        description = "Spark package.";
+        default = pkgs.spark;
+        defaultText = literalExpression "pkgs.spark";
+        example = literalExpression ''pkgs.spark.overrideAttrs (super: rec {
+          pname = "spark";
+          version = "2.4.4";
+
+          src = pkgs.fetchzip {
+            url    = "mirror://apache/spark/"''${pname}-''${version}/''${pname}-''${version}-bin-without-hadoop.tgz";
+            sha256 = "1a9w5k0207fysgpxx6db3a00fs5hdc2ncx99x4ccy2s0v5ndc66g";
+          };
+        })'';
+      };
+    };
+  };
+  config = lib.mkIf (cfg.worker.enable || cfg.master.enable) {
+    environment.systemPackages = [ cfg.package ];
+    systemd = {
+      services = {
+        spark-master = lib.mkIf cfg.master.enable {
+          path = with pkgs; [ procps openssh nettools ];
+          description = "spark master service.";
+          after = [ "network.target" ];
+          wantedBy = [ "multi-user.target" ];
+          restartIfChanged = cfg.master.restartIfChanged;
+          environment = cfg.master.extraEnvironment // {
+            SPARK_MASTER_HOST = cfg.master.bind;
+            SPARK_CONF_DIR = cfg.confDir;
+            SPARK_LOG_DIR = cfg.logDir;
+          };
+          serviceConfig = {
+            Type = "forking";
+            User = "spark";
+            Group = "spark";
+            WorkingDirectory = "${cfg.package}/lib/${cfg.package.untarDir}";
+            ExecStart = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/start-master.sh";
+            ExecStop  = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/stop-master.sh";
+            TimeoutSec = 300;
+            StartLimitBurst=10;
+            Restart = "always";
+          };
+        };
+        spark-worker = lib.mkIf cfg.worker.enable {
+          path = with pkgs; [ procps openssh nettools rsync ];
+          description = "spark master service.";
+          after = [ "network.target" ];
+          wantedBy = [ "multi-user.target" ];
+          restartIfChanged = cfg.worker.restartIfChanged;
+          environment = cfg.worker.extraEnvironment // {
+            SPARK_MASTER = cfg.worker.master;
+            SPARK_CONF_DIR = cfg.confDir;
+            SPARK_LOG_DIR = cfg.logDir;
+            SPARK_WORKER_DIR = cfg.worker.workDir;
+          };
+          serviceConfig = {
+            Type = "forking";
+            User = "spark";
+            WorkingDirectory = "${cfg.package}/lib/${cfg.package.untarDir}";
+            ExecStart = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/start-worker.sh spark://${cfg.worker.master}";
+            ExecStop  = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/stop-worker.sh";
+            TimeoutSec = 300;
+            StartLimitBurst=10;
+            Restart = "always";
+          };
+        };
+      };
+      tmpfiles.rules = [
+        "d '${cfg.worker.workDir}' - spark spark - -"
+        "d '${cfg.logDir}' - spark spark - -"
+      ];
+    };
+    users = {
+      users.spark = {
+        description = "spark user.";
+        group = "spark";
+        isSystemUser = true;
+      };
+      groups.spark = { };
+    };
+  };
+}