summary refs log tree commit diff
path: root/nixos/modules/services/cluster/kubernetes
diff options
context:
space:
mode:
authorChristian Albrecht <christian.albrecht@mayflower.de>2019-03-01 08:44:45 +0100
committerChristian Albrecht <christian.albrecht@mayflower.de>2019-03-03 19:39:02 +0100
commit62f03750e48ae7658ea18d7ac75833279da02a5a (patch)
tree7ea701d69b681a45ebc5e2b9aec90224f8de8fee /nixos/modules/services/cluster/kubernetes
parentf9e2f76a590d11cbeaa10e3953ddc96110bf1b3b (diff)
downloadnixpkgs-62f03750e48ae7658ea18d7ac75833279da02a5a.tar
nixpkgs-62f03750e48ae7658ea18d7ac75833279da02a5a.tar.gz
nixpkgs-62f03750e48ae7658ea18d7ac75833279da02a5a.tar.bz2
nixpkgs-62f03750e48ae7658ea18d7ac75833279da02a5a.tar.lz
nixpkgs-62f03750e48ae7658ea18d7ac75833279da02a5a.tar.xz
nixpkgs-62f03750e48ae7658ea18d7ac75833279da02a5a.tar.zst
nixpkgs-62f03750e48ae7658ea18d7ac75833279da02a5a.zip
nixos/kubernetes: Stabilize services startup across machines
by adding targets and curl wait loops to services to ensure services
are not started before their depended services are reachable.

Extra targets cfssl-online.target and kube-apiserver-online.target
syncronize starts across machines and node-online.target ensures
docker is restarted and ready to deploy containers on after flannel
has discussed the network cidr with apiserver.

Since flannel needs to be started before addon-manager to configure
the docker interface, it has to have its own rbac bootstrap service.

The curl wait loops within the other services exists to ensure that when
starting the service it is able to do its work immediately without
clobbering the log about failing conditions.

By ensuring kubernetes.target is only reached after starting the
cluster it can be used in the tests as a wait condition.

In kube-certmgr-bootstrap mkdir is needed for it to not fail to start.

The following is the relevant part of systemctl list-dependencies

default.target
● ├─certmgr.service
● ├─cfssl.service
● ├─docker.service
● ├─etcd.service
● ├─flannel.service
● ├─kubernetes.target
● │ ├─kube-addon-manager.service
● │ ├─kube-proxy.service
● │ ├─kube-apiserver-online.target
● │ │ ├─flannel-rbac-bootstrap.service
● │ │ ├─kube-apiserver-online.service
● │ │ ├─kube-apiserver.service
● │ │ ├─kube-controller-manager.service
● │ │ └─kube-scheduler.service
● │ └─node-online.target
● │   ├─node-online.service
● │   ├─flannel.target
● │   │ ├─flannel.service
● │   │ └─mk-docker-opts.service
● │   └─kubelet.target
● │     └─kubelet.service
● ├─network-online.target
● │ └─cfssl-online.target
● │   ├─certmgr.service
● │   ├─cfssl-online.service
● │   └─kube-certmgr-bootstrap.service
Diffstat (limited to 'nixos/modules/services/cluster/kubernetes')
-rw-r--r--nixos/modules/services/cluster/kubernetes/addon-manager.nix2
-rw-r--r--nixos/modules/services/cluster/kubernetes/apiserver.nix26
-rw-r--r--nixos/modules/services/cluster/kubernetes/controller-manager.nix11
-rw-r--r--nixos/modules/services/cluster/kubernetes/default.nix13
-rw-r--r--nixos/modules/services/cluster/kubernetes/flannel.nix121
-rw-r--r--nixos/modules/services/cluster/kubernetes/kubelet.nix40
-rw-r--r--nixos/modules/services/cluster/kubernetes/pki.nix65
-rw-r--r--nixos/modules/services/cluster/kubernetes/proxy.nix10
-rw-r--r--nixos/modules/services/cluster/kubernetes/scheduler.nix11
9 files changed, 246 insertions, 53 deletions
diff --git a/nixos/modules/services/cluster/kubernetes/addon-manager.nix b/nixos/modules/services/cluster/kubernetes/addon-manager.nix
index 17f2dde31a7..abd9e99ba02 100644
--- a/nixos/modules/services/cluster/kubernetes/addon-manager.nix
+++ b/nixos/modules/services/cluster/kubernetes/addon-manager.nix
@@ -72,7 +72,7 @@ in
     systemd.services.kube-addon-manager = {
       description = "Kubernetes addon manager";
       wantedBy = [ "kubernetes.target" ];
-      after = [ "kube-apiserver.service" ];
+      after = [ "kube-apiserver-online.target" "node-online.target" ];
       environment.ADDON_PATH = "/etc/kubernetes/addons/";
       path = [ pkgs.gawk ];
       serviceConfig = {
diff --git a/nixos/modules/services/cluster/kubernetes/apiserver.nix b/nixos/modules/services/cluster/kubernetes/apiserver.nix
index 08f929060aa..567d31f06ef 100644
--- a/nixos/modules/services/cluster/kubernetes/apiserver.nix
+++ b/nixos/modules/services/cluster/kubernetes/apiserver.nix
@@ -293,8 +293,9 @@ in
     in {
         systemd.services.kube-apiserver = {
           description = "Kubernetes APIServer Service";
-          wantedBy = [ "kubernetes.target" ];
-          after = [ "network.target" ];
+          wantedBy = [ "kube-apiserver-online.target" ];
+          after = [ "certmgr.service" ];
+          before = [ "kube-apiserver-online.target" ];
           serviceConfig = {
             Slice = "kubernetes.slice";
             ExecStart = ''${top.package}/bin/kube-apiserver \
@@ -459,7 +460,28 @@ in
       };
 
     }))
+    {
+      systemd.targets.kube-apiserver-online = {
+        wantedBy = [ "kubernetes.target" ];
+        before = [ "kubernetes.target" ];
+      };
 
+      systemd.services.kube-apiserver-online = mkIf top.flannel.enable {
+        description = "apiserver control plane is online";
+        wantedBy = [ "kube-apiserver-online.target" ];
+        after = [ "kube-scheduler.service" "kube-controller-manager.service" ];
+        before = [ "kube-apiserver-online.target" ];
+        preStart = ''
+          ${top.lib.mkWaitCurl (with top.pki.certs.flannelClient; {
+            sleep = 3;
+            path = "/healthz";
+            cacert = top.caFile;
+            inherit cert key;
+          })}
+        '';
+        script = "echo apiserver control plane is online";
+      };
+    }
   ];
 
 }
diff --git a/nixos/modules/services/cluster/kubernetes/controller-manager.nix b/nixos/modules/services/cluster/kubernetes/controller-manager.nix
index 27b28311adb..20f471215db 100644
--- a/nixos/modules/services/cluster/kubernetes/controller-manager.nix
+++ b/nixos/modules/services/cluster/kubernetes/controller-manager.nix
@@ -116,8 +116,17 @@ in
 
     systemd.services.kube-controller-manager = {
       description = "Kubernetes Controller Manager Service";
-      wantedBy = [ "kubernetes.target" ];
+      wantedBy = [ "kube-apiserver-online.target" ];
       after = [ "kube-apiserver.service" ];
+      before = [ "kube-apiserver-online.target" ];
+      preStart = ''
+        ${top.lib.mkWaitCurl (with top.pki.certs.controllerManagerClient; {
+          sleep = 1;
+          path = "/api";
+          cacert = top.caFile;
+          inherit cert key;
+        })}
+      '';
       serviceConfig = {
         RestartSec = "30s";
         Restart = "on-failure";
diff --git a/nixos/modules/services/cluster/kubernetes/default.nix b/nixos/modules/services/cluster/kubernetes/default.nix
index 375e33e91b5..f1f544afc4d 100644
--- a/nixos/modules/services/cluster/kubernetes/default.nix
+++ b/nixos/modules/services/cluster/kubernetes/default.nix
@@ -73,6 +73,18 @@ let
     };
   };
 
+  mkWaitCurl = { address ? cfg.apiserverAddress, sleep ? 2, path ? "", args ? "-o /dev/null",
+                 cacert ? null, cert ? null, key ? null, }: ''
+    while ! ${pkgs.curl}/bin/curl --fail-early -fs \
+      ${if cacert != null then "--cacert ${cacert}" else ""} \
+      ${if cert != null then "--cert ${cert}" else ""} \
+      ${if key != null then "--key ${key}" else ""} \
+      ${address}${path} ${args} ; do
+        sleep ${toString sleep}
+        echo Waiting to be able to reach ${address}${path}
+    done
+  '';
+
   kubeConfigDefaults = {
     server = mkDefault cfg.kubeconfig.server;
     caFile = mkDefault cfg.kubeconfig.caFile;
@@ -162,6 +174,7 @@ in {
         inherit mkCert;
         inherit mkKubeConfig;
         inherit mkKubeConfigOptions;
+        inherit mkWaitCurl;
       };
       type = types.attrs;
     };
diff --git a/nixos/modules/services/cluster/kubernetes/flannel.nix b/nixos/modules/services/cluster/kubernetes/flannel.nix
index ef06acb6de3..4aa547c9d3e 100644
--- a/nixos/modules/services/cluster/kubernetes/flannel.nix
+++ b/nixos/modules/services/cluster/kubernetes/flannel.nix
@@ -27,7 +27,12 @@ in
   };
 
   ###### implementation
-  config = mkIf cfg.enable {
+  config = mkIf cfg.enable (let
+    flannelBootstrapPaths = mkIf top.apiserver.enable [
+      top.pki.certs.clusterAdmin.cert
+      top.pki.certs.clusterAdmin.key
+    ];
+  in {
     services.flannel = {
 
       enable = mkDefault true;
@@ -48,8 +53,10 @@ in
       }];
     };
 
-    systemd.services."mk-docker-opts" = {
+    systemd.services.mk-docker-opts = {
       description = "Pre-Docker Actions";
+      wantedBy = [ "flannel.target" ];
+      before = [ "flannel.target" ];
       path = with pkgs; [ gawk gnugrep ];
       script = ''
         ${mkDockerOpts}/mk-docker-opts -d /run/flannel/docker
@@ -68,6 +75,17 @@ in
       };
     };
 
+    systemd.targets.flannel = {
+      wantedBy = [ "node-online.target" ];
+      before = [ "node-online.target" ];
+    };
+
+    systemd.services.flannel = {
+      wantedBy = [ "flannel.target" ];
+      after = [ "kubelet.target" ];
+      before = [ "flannel.target" ];
+    };
+
     systemd.services.docker = {
       environment.DOCKER_OPTS = "-b none";
       serviceConfig.EnvironmentFile = "-/run/flannel/docker";
@@ -93,44 +111,69 @@ in
     };
 
     # give flannel som kubernetes rbac permissions if applicable
-    services.kubernetes.addonManager.bootstrapAddons = mkIf ((storageBackend == "kubernetes") && (elem "RBAC" top.apiserver.authorizationMode)) {
-
-      flannel-cr = {
-        apiVersion = "rbac.authorization.k8s.io/v1beta1";
-        kind = "ClusterRole";
-        metadata = { name = "flannel"; };
-        rules = [{
-          apiGroups = [ "" ];
-          resources = [ "pods" ];
-          verbs = [ "get" ];
-        }
-        {
-          apiGroups = [ "" ];
-          resources = [ "nodes" ];
-          verbs = [ "list" "watch" ];
-        }
-        {
-          apiGroups = [ "" ];
-          resources = [ "nodes/status" ];
-          verbs = [ "patch" ];
-        }];
-      };
-
-      flannel-crb = {
-        apiVersion = "rbac.authorization.k8s.io/v1beta1";
-        kind = "ClusterRoleBinding";
-        metadata = { name = "flannel"; };
-        roleRef = {
-          apiGroup = "rbac.authorization.k8s.io";
-          kind = "ClusterRole";
-          name = "flannel";
+    systemd.services.flannel-rbac-bootstrap = mkIf (top.apiserver.enable && (elem "RBAC" top.apiserver.authorizationMode)) {
+
+      wantedBy = [ "kube-apiserver-online.target" ];
+      after = [ "kube-apiserver-online.target" ];
+      before = [ "flannel.service" ];
+      path = with pkgs; [ kubectl ];
+      preStart = let
+        files = mapAttrsToList (n: v: pkgs.writeText "${n}.json" (builtins.toJSON v)) {
+          flannel-cr = {
+            apiVersion = "rbac.authorization.k8s.io/v1beta1";
+            kind = "ClusterRole";
+            metadata = { name = "flannel"; };
+            rules = [{
+              apiGroups = [ "" ];
+              resources = [ "pods" ];
+              verbs = [ "get" ];
+            }
+            {
+              apiGroups = [ "" ];
+              resources = [ "nodes" ];
+              verbs = [ "list" "watch" ];
+            }
+            {
+              apiGroups = [ "" ];
+              resources = [ "nodes/status" ];
+              verbs = [ "patch" ];
+            }];
+          };
+
+          flannel-crb = {
+            apiVersion = "rbac.authorization.k8s.io/v1beta1";
+            kind = "ClusterRoleBinding";
+            metadata = { name = "flannel"; };
+            roleRef = {
+              apiGroup = "rbac.authorization.k8s.io";
+              kind = "ClusterRole";
+              name = "flannel";
+            };
+            subjects = [{
+              kind = "User";
+              name = "flannel-client";
+            }];
+          };
         };
-        subjects = [{
-          kind = "User";
-          name = "flannel-client";
-        }];
-      };
+      in ''
+        ${top.lib.mkWaitCurl (with top.pki.certs.clusterAdmin; {
+          path = "/";
+          cacert = top.caFile;
+          inherit cert key;
+        })}
+
+        kubectl -s ${top.apiserverAddress} --certificate-authority=${top.caFile} --client-certificate=${top.pki.certs.clusterAdmin.cert} --client-key=${top.pki.certs.clusterAdmin.key} apply -f ${concatStringsSep " \\\n -f " files}
+      '';
+      script = "echo Ok";
+      unitConfig.ConditionPathExists = flannelBootstrapPaths;
+    };
 
+    systemd.paths.flannel-rbac-bootstrap = mkIf top.apiserver.enable {
+      wantedBy = [ "flannel-rbac-bootstrap.service" ];
+      pathConfig = {
+        PathExists = flannelBootstrapPaths;
+        PathChanged = flannelBootstrapPaths;
+      };
     };
-  };
+  });
 }
diff --git a/nixos/modules/services/cluster/kubernetes/kubelet.nix b/nixos/modules/services/cluster/kubernetes/kubelet.nix
index 86402cba7c4..b3f3c036564 100644
--- a/nixos/modules/services/cluster/kubernetes/kubelet.nix
+++ b/nixos/modules/services/cluster/kubernetes/kubelet.nix
@@ -252,8 +252,9 @@ in
 
       systemd.services.kubelet = {
         description = "Kubernetes Kubelet Service";
-        wantedBy = [ "kubernetes.target" ];
-        after = [ "network.target" "docker.service" "kube-apiserver.service" ];
+        wantedBy = [ "kubelet.target" ];
+        after = [ "kube-apiserver-online.target" ];
+        before = [ "kubelet.target" ];
         path = with pkgs; [ gitMinimal openssh docker utillinux iproute ethtool thin-provisioning-tools iptables socat ] ++ top.path;
         preStart = ''
           ${concatMapStrings (img: ''
@@ -325,6 +326,30 @@ in
         };
       };
 
+      systemd.services.docker.before = [ "kubelet.service" ];
+
+      systemd.services.node-online = {
+        wantedBy = [ "node-online.target" ];
+        after = [ "flannel.target" "kubelet.target" ];
+        before = [ "node-online.target" ];
+        # it is complicated. flannel needs kubelet to run the pause container before
+        # it discusses the node CIDR with apiserver and afterwards configures and restarts
+        # dockerd. Until then prevent creating any pods because they have to be recreated anyway
+        # because the network of docker0 has been changed by flannel.
+        script = let
+          docker-env = "/run/flannel/docker";
+          flannel-date = "stat --print=%Y ${docker-env}";
+          docker-date = "systemctl show --property=ActiveEnterTimestamp --value docker";
+        in ''
+          while ! test -f ${docker-env} ; do sleep 1 ; done
+          while test `${flannel-date}` -gt `date +%s --date="$(${docker-date})"` ; do
+            sleep 1
+          done
+        '';
+        serviceConfig.Type = "oneshot";
+        serviceConfig.Slice = "kubernetes.slice";
+      };
+
       # Allways include cni plugins
       services.kubernetes.kubelet.cni.packages = [pkgs.cni-plugins];
 
@@ -369,5 +394,16 @@ in
       };
     })
 
+    {
+      systemd.targets.kubelet = {
+        wantedBy = [ "node-online.target" ];
+        before = [ "node-online.target" ];
+      };
+
+      systemd.targets.node-online = {
+        wantedBy = [ "kubernetes.target" ];
+        before = [ "kubernetes.target" ];
+      };
+    }
   ];
 }
diff --git a/nixos/modules/services/cluster/kubernetes/pki.nix b/nixos/modules/services/cluster/kubernetes/pki.nix
index 8ad17d4dfb4..d08d7892bb5 100644
--- a/nixos/modules/services/cluster/kubernetes/pki.nix
+++ b/nixos/modules/services/cluster/kubernetes/pki.nix
@@ -119,6 +119,7 @@ in
     cfsslCertPathPrefix = "${config.services.cfssl.dataDir}/cfssl";
     cfsslCert = "${cfsslCertPathPrefix}.pem";
     cfsslKey = "${cfsslCertPathPrefix}-key.pem";
+    cfsslPort = toString config.services.cfssl.port;
 
     certmgrPaths = [
       top.caFile
@@ -191,13 +192,39 @@ in
         chown cfssl "${cfsslAPITokenPath}" && chmod 400 "${cfsslAPITokenPath}"
       '')]);
 
+    systemd.targets.cfssl-online = {
+      wantedBy = [ "network-online.target" ];
+      after = [ "cfssl.service" "network-online.target" "cfssl-online.service" ];
+    };
+
+    systemd.services.cfssl-online = {
+      description = "Wait for ${remote} to be reachable.";
+      wantedBy = [ "cfssl-online.target" ];
+      before = [ "cfssl-online.target" ];
+      preStart = ''
+        ${top.lib.mkWaitCurl {
+          address = remote;
+          path = "/api/v1/cfssl/info";
+          args = "-kd '{}' -o /dev/null";
+        }}
+      '';
+      script = "echo Ok";
+      serviceConfig = {
+        TimeoutSec = "300";
+      };
+    };
+
     systemd.services.kube-certmgr-bootstrap = {
       description = "Kubernetes certmgr bootstrapper";
-      wantedBy = [ "certmgr.service" ];
-      after = [ "cfssl.target" ];
+      wantedBy = [ "cfssl-online.target" ];
+      after = [ "cfssl-online.target" ];
+      before = [ "certmgr.service" ];
       script = concatStringsSep "\n" [''
         set -e
 
+        mkdir -p $(dirname ${certmgrAPITokenPath})
+        mkdir -p $(dirname ${top.caFile})
+
         # If there's a cfssl (cert issuer) running locally, then don't rely on user to
         # manually paste it in place. Just symlink.
         # otherwise, create the target file, ready for users to insert the token
@@ -209,14 +236,18 @@ in
         fi
       ''
       (optionalString (cfg.pkiTrustOnBootstrap) ''
-        if [ ! -f "${top.caFile}" ] || [ $(cat "${top.caFile}" | wc -c) -lt 1 ]; then
-          ${pkgs.curl}/bin/curl --fail-early -f -kd '{}' ${remote}/api/v1/cfssl/info | \
-            ${pkgs.cfssl}/bin/cfssljson -stdout >${top.caFile}
+        if [ ! -s "${top.caFile}" ]; then
+          ${top.lib.mkWaitCurl {
+            address = "https://${top.masterAddress}:${cfsslPort}";
+            path = "/api/v1/cfssl/info";
+            args = "-kd '{}' -o - | ${pkgs.cfssl}/bin/cfssljson -stdout >${top.caFile}";
+          }}
         fi
       '')
       ];
       serviceConfig = {
-        RestartSec = "10s";
+        TimeoutSec = "300";
+        RestartSec = "1s";
         Restart = "on-failure";
       };
     };
@@ -254,6 +285,14 @@ in
       };
 
       systemd.services.certmgr = {
+        wantedBy = [ "cfssl-online.target" ];
+        after = [ "cfssl-online.target" "kube-certmgr-bootstrap.service" ];
+        preStart = ''
+          while ! test -s ${certmgrAPITokenPath} ; do
+            sleep 1
+            echo Waiting for ${certmgrAPITokenPath}
+          done
+        '';
         unitConfig.ConditionPathExists = certmgrPaths;
       };
 
@@ -289,6 +328,12 @@ in
           ''
             export KUBECONFIG=${clusterAdminKubeconfig}
             ${kubectl}/bin/kubectl apply -f ${concatStringsSep " \\\n -f " files}
+
+            ${top.lib.mkWaitCurl (with top.pki.certs.addonManager; {
+              path = "/api/v1/namespaces/kube-system/serviceaccounts/default";
+              cacert = top.caFile;
+              inherit cert key;
+            })}
           '';
         })
         {
@@ -384,6 +429,14 @@ in
       };
 
       systemd.services.flannel = {
+        preStart = ''
+          ${top.lib.mkWaitCurl (with top.pki.certs.flannelClient; {
+            path = "/api/v1/nodes";
+            cacert = top.caFile;
+            inherit cert key;
+            args = "-o - | grep podCIDR >/dev/null";
+          })}
+        '';
         unitConfig.ConditionPathExists = flannelPaths;
       };
 
diff --git a/nixos/modules/services/cluster/kubernetes/proxy.nix b/nixos/modules/services/cluster/kubernetes/proxy.nix
index 83cd3e23100..073756d58ab 100644
--- a/nixos/modules/services/cluster/kubernetes/proxy.nix
+++ b/nixos/modules/services/cluster/kubernetes/proxy.nix
@@ -49,8 +49,16 @@ in
     systemd.services.kube-proxy = {
       description = "Kubernetes Proxy Service";
       wantedBy = [ "kubernetes.target" ];
-      after = [ "kube-apiserver.service" ];
+      after = [ "node-online.target" ];
+      before = [ "kubernetes.target" ];
       path = with pkgs; [ iptables conntrack_tools ];
+      preStart = ''
+        ${top.lib.mkWaitCurl (with top.pki.certs.kubeProxyClient; {
+          path = "/api/v1/nodes/${top.kubelet.hostname}";
+          cacert = top.caFile;
+          inherit cert key;
+        })}
+      '';
       serviceConfig = {
         Slice = "kubernetes.slice";
         ExecStart = ''${top.package}/bin/kube-proxy \
diff --git a/nixos/modules/services/cluster/kubernetes/scheduler.nix b/nixos/modules/services/cluster/kubernetes/scheduler.nix
index 0305b9aefe5..d3302a15402 100644
--- a/nixos/modules/services/cluster/kubernetes/scheduler.nix
+++ b/nixos/modules/services/cluster/kubernetes/scheduler.nix
@@ -59,8 +59,17 @@ in
   config = mkIf cfg.enable {
     systemd.services.kube-scheduler = {
       description = "Kubernetes Scheduler Service";
-      wantedBy = [ "kubernetes.target" ];
+      wantedBy = [ "kube-apiserver-online.target" ];
       after = [ "kube-apiserver.service" ];
+      before = [ "kube-apiserver-online.target" ];
+      preStart = ''
+        ${top.lib.mkWaitCurl (with top.pki.certs.schedulerClient; {
+          sleep = 1;
+          path = "/api";
+          cacert = top.caFile;
+          inherit cert key;
+        })}
+      '';
       serviceConfig = {
         Slice = "kubernetes.slice";
         ExecStart = ''${top.package}/bin/kube-scheduler \