summary refs log tree commit diff
path: root/pkgs/build-support/docker/default.nix
diff options
context:
space:
mode:
authorGraham Christensen <graham.christensen@target.com>2018-09-25 10:53:42 -0400
committerGraham Christensen <graham.christensen@target.com>2018-09-26 17:54:14 -0400
commit4fe900619080f0b1a804abbccaab500d819ead10 (patch)
tree2313ef2722f9d6d10edf3bb3f4bc8b2c08e3107c /pkgs/build-support/docker/default.nix
parentfd045173cef84e65a8cb133ded28c99167cb0901 (diff)
downloadnixpkgs-4fe900619080f0b1a804abbccaab500d819ead10.tar
nixpkgs-4fe900619080f0b1a804abbccaab500d819ead10.tar.gz
nixpkgs-4fe900619080f0b1a804abbccaab500d819ead10.tar.bz2
nixpkgs-4fe900619080f0b1a804abbccaab500d819ead10.tar.lz
nixpkgs-4fe900619080f0b1a804abbccaab500d819ead10.tar.xz
nixpkgs-4fe900619080f0b1a804abbccaab500d819ead10.tar.zst
nixpkgs-4fe900619080f0b1a804abbccaab500d819ead10.zip
dockerTools.buildLayeredImage: init
Create a many-layered Docker Image.

Implements much less than buildImage:

 - Doesn't support specific uids/gids
 - Doesn't support runninng commands after building
 - Doesn't require qemu
 - Doesn't create mutable copies of the files in the path
 - Doesn't support parent images

If you want those feature, I recommend using buildLayeredImage as an
input to buildImage.

Notably, it does support:

 - Caching low level, common paths based on a graph traversial
   algorithm, see referencesByPopularity in
   0a80233487993256e811f566b1c80a40394c03d6
 - Configurable number of layers. If you're not using AUFS or not
   extending the image, you can specify a larger number of layers at
   build time:

       pkgs.dockerTools.buildLayeredImage {
         name = "hello";
         maxLayers = 128;
         config.Cmd = [ "${pkgs.gitFull}/bin/git" ];
       };

 - Parallelized creation of the layers, improving build speed.
 - The contents of the image includes the closure of the configuration,
   so you don't have to specify paths in contents and config.

   With buildImage, paths referred to by the config were not included
   automatically in the image. Thus, if you wanted to call Git, you
   had to specify it twice:

       pkgs.dockerTools.buildImage {
         name = "hello";
         contents = [ pkgs.gitFull ];
         config.Cmd = [ "${pkgs.gitFull}/bin/git" ];
       };

   buildLayeredImage on the other hand includes the runtime closure of
   the config when calculating the contents of the image:

       pkgs.dockerTools.buildImage {
         name = "hello";
         config.Cmd = [ "${pkgs.gitFull}/bin/git" ];
       };

Minor Problems

 - If any of the store paths change, every layer will be rebuilt in
   the nix-build. However, beacuse the layers are bit-for-bit
   reproducable, when these images are loaded in to Docker they will
   match existing layers and not be imported or uploaded twice.

Common Questions

 - Aren't Docker layers ordered?

   No. People who have used a Dockerfile before assume Docker's
   Layers are inherently ordered. However, this is not true -- Docker
   layers are content-addressable and are not explicitly layered until
   they are composed in to an Image.

 - What happens if I have more than maxLayers of store paths?

   The first (maxLayers-2) most "popular" paths will have their own
   individual layers, then layer #(maxLayers-1) will contain all the
   remaining "unpopular" paths, and finally layer #(maxLayers) will
   contain the Image configuration.
Diffstat (limited to 'pkgs/build-support/docker/default.nix')
-rw-r--r--pkgs/build-support/docker/default.nix175
1 files changed, 175 insertions, 0 deletions
diff --git a/pkgs/build-support/docker/default.nix b/pkgs/build-support/docker/default.nix
index 6b5a06486e7..73639a521b6 100644
--- a/pkgs/build-support/docker/default.nix
+++ b/pkgs/build-support/docker/default.nix
@@ -1,4 +1,5 @@
 {
+  symlinkJoin,
   coreutils,
   docker,
   e2fsprogs,
@@ -19,6 +20,7 @@
   utillinux,
   vmTools,
   writeReferencesToFile,
+  referencesByPopularity,
   writeScript,
   writeText,
 }:
@@ -272,6 +274,81 @@ rec {
       perl ${pkgs.pathsFromGraph} closure-* > $out/storePaths
     '';
 
+  # Create $maxLayers worth of Docker Layers, one layer per store path
+  # unless there are more paths than $maxLayers. In that case, create
+  # $maxLayers-1 for the most popular layers, and smush the remainaing
+  # store paths in to one final layer.
+  mkManyPureLayers = {
+    name,
+    # Files to add to the layer.
+    closure,
+    configJson,
+    # Docker has a 42-layer maximum, we pick 24 to ensure there is plenty
+    # of room for extension
+    maxLayers ? 24
+  }:
+    runCommand "${name}-granular-docker-layers" {
+      inherit maxLayers;
+      paths = referencesByPopularity closure;
+      buildInputs = [ jshon rsync tarsum ];
+      enableParallelBuilding = true;
+    }
+    ''
+      # Delete impurities for store path layers, so they don't get
+      # shared and taint other projects.
+      cat ${configJson} \
+        | jshon -d config \
+        | jshon -s "1970-01-01T00:00:01Z" -i created > generic.json
+
+      # WARNING!
+      # The following code is fiddly w.r.t. ensuring every layer is
+      # created, and that no paths are missed. If you change the
+      # following head and tail call lines, double-check that your
+      # code behaves properly when the number of layers equals:
+      #      maxLayers-1, maxLayers, and maxLayers+1
+      head -n $((maxLayers - 1)) $paths | cat -n | xargs -P$NIX_BUILD_CORES -n2 ${./store-path-to-layer.sh}
+      if [ $(cat $paths | wc -l) -ge $maxLayers ]; then
+        tail -n+$maxLayers $paths | xargs ${./store-path-to-layer.sh} $maxLayers
+      fi
+
+      echo "Finished building layer '$name'"
+
+      mv ./layers $out
+    '';
+
+  # Create a "Customisation" layer which adds symlinks at the root of
+  # the image to the root paths of the closure. Also add the config
+  # data like what command to run and the environment to run it in.
+  mkCustomisationLayer = {
+    name,
+    # Files to add to the layer.
+    contents,
+    baseJson,
+    uid ? 0, gid ? 0,
+  }:
+    runCommand "${name}-customisation-layer" {
+      buildInputs = [ jshon rsync tarsum ];
+    }
+    ''
+      cp -r ${contents}/ ./layer
+
+      # Tar up the layer and throw it into 'layer.tar'.
+      echo "Packing layer..."
+      mkdir $out
+      tar -C layer --sort=name --mtime="@$SOURCE_DATE_EPOCH" --owner=${toString uid} --group=${toString gid} -cf $out/layer.tar .
+
+      # Compute a checksum of the tarball.
+      echo "Computing layer checksum..."
+      tarhash=$(tarsum < $out/layer.tar)
+
+      # Add a 'checksum' field to the JSON, with the value set to the
+      # checksum of the tarball.
+      cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json
+
+      # Indicate to docker that we're using schema version 1.0.
+      echo -n "1.0" > $out/VERSION
+    '';
+
   # Create a "layer" (set of files).
   mkPureLayer = {
     # Name of the layer
@@ -413,6 +490,104 @@ rec {
       '';
     };
 
+  buildLayeredImage = {
+    # Image Name
+    name,
+    # Image tag, the Nix's output hash will be used if null
+    tag ? null,
+    # Files to put on the image (a nix store path or list of paths).
+    contents ? [],
+    # Docker config; e.g. what command to run on the container.
+    config ? {},
+    # Time of creation of the image. Passing "now" will make the
+    # created date be the time of building.
+    created ? "1970-01-01T00:00:01Z",
+    # Docker's lowest maximum layer limit is 42-layers for an old
+    # version of the AUFS graph driver. We pick 24 to ensure there is
+    # plenty of room for extension. I believe the actual maximum is
+    # 128.
+    maxLayers ? 24
+  }:
+    let
+      uid = 0;
+      gid = 0;
+      baseName = baseNameOf name;
+      contentsEnv = symlinkJoin { name = "bulk-layers"; paths = (if builtins.isList contents then contents else [ contents ]); };
+
+      configJson = let
+          pure = writeText "${baseName}-config.json" (builtins.toJSON {
+            inherit created config;
+            architecture = "amd64";
+            os = "linux";
+          });
+          impure = runCommand "${baseName}-standard-dynamic-date.json"
+            { buildInputs = [ jq ]; }
+            ''
+               jq ".created = \"$(TZ=utc date --iso-8601="seconds")\"" ${pure} > $out
+            '';
+        in if created == "now" then impure else pure;
+
+      bulkLayers = mkManyPureLayers {
+          name = baseName;
+          closure = writeText "closure" "${contentsEnv} ${configJson}";
+          # One layer will be taken up by the customisationLayer, so
+          # take up one less.
+          maxLayers = maxLayers - 1;
+          inherit configJson;
+        };
+      customisationLayer = mkCustomisationLayer {
+          name = baseName;
+          contents = contentsEnv;
+          baseJson = configJson;
+          inherit uid gid;
+        };
+      result = runCommand "docker-image-${baseName}.tar.gz" {
+        buildInputs = [ jshon pigz coreutils findutils jq ];
+        # Image name and tag must be lowercase
+        imageName = lib.toLower name;
+        imageTag = if tag == null then "" else lib.toLower tag;
+        baseJson = configJson;
+      } ''
+        ${lib.optionalString (tag == null) ''
+          outName="$(basename "$out")"
+          outHash=$(echo "$outName" | cut -d - -f 1)
+
+          imageTag=$outHash
+        ''}
+
+        find ${bulkLayers} -mindepth 1 -maxdepth 1 | sort -t/ -k5 -n > layer-list
+        echo ${customisationLayer} >> layer-list
+
+        mkdir image
+        imageJson=$(cat ${configJson} | jq ". + {\"rootfs\": {\"diff_ids\": [], \"type\": \"layers\"}}")
+        manifestJson=$(jq -n "[{\"RepoTags\":[\"$imageName:$imageTag\"]}]")
+        for layer in $(cat layer-list); do
+          layerChecksum=$(sha256sum $layer/layer.tar | cut -d ' ' -f1)
+          layerID=$(sha256sum "$layer/json" | cut -d ' ' -f 1)
+          ln -s "$layer" "./image/$layerID"
+
+          manifestJson=$(echo "$manifestJson" | jq ".[0].Layers |= [\"$layerID/layer.tar\"] + .")
+          imageJson=$(echo "$imageJson" | jq ".history |= [{\"created\": \"$(jq -r .created ${configJson})\"}] + .")
+          imageJson=$(echo "$imageJson" | jq ".rootfs.diff_ids |= [\"sha256:$layerChecksum\"] + .")
+        done
+        imageJsonChecksum=$(echo "$imageJson" | sha256sum | cut -d ' ' -f1)
+        echo "$imageJson" > "image/$imageJsonChecksum.json"
+        manifestJson=$(echo "$manifestJson" | jq ".[0].Config = \"$imageJsonChecksum.json\"")
+        echo "$manifestJson" > image/manifest.json
+
+        jshon -n object \
+          -n object -s "$layerID" -i "$imageTag" \
+          -i "$imageName" > image/repositories
+
+        echo "Cooking the image..."
+        tar -C image --dereference --hard-dereference --sort=name --mtime="@$SOURCE_DATE_EPOCH" --owner=0 --group=0  --mode=a-w --xform s:'^./':: -c . | pigz -nT > $out
+
+        echo "Finished."
+      '';
+
+    in
+    result;
+
   # 1. extract the base image
   # 2. create the layer
   # 3. add layer deps to the layer itself, diffing with the base image