summary refs log tree commit diff
path: root/pkgs/build-support/docker
diff options
context:
space:
mode:
Diffstat (limited to 'pkgs/build-support/docker')
-rw-r--r--pkgs/build-support/docker/default.nix393
-rwxr-xr-xpkgs/build-support/docker/store-path-to-layer.sh54
-rw-r--r--pkgs/build-support/docker/stream_layered_image.py308
3 files changed, 451 insertions, 304 deletions
diff --git a/pkgs/build-support/docker/default.nix b/pkgs/build-support/docker/default.nix
index 83f4a9e0c01..7ff325382a6 100644
--- a/pkgs/build-support/docker/default.nix
+++ b/pkgs/build-support/docker/default.nix
@@ -11,6 +11,7 @@
   jq,
   jshon,
   lib,
+  makeWrapper,
   moreutils,
   nix,
   pigz,
@@ -29,6 +30,7 @@
   writeReferencesToFile,
   writeScript,
   writeText,
+  writePython3,
 }:
 
 # WARNING: this API is unstable and may be subject to backwards-incompatible changes in the future.
@@ -204,24 +206,17 @@ rec {
         mkdir image
         tar -C image -xpf "$fromImage"
 
-        # If the image name isn't set, read it from the image repository json.
-        if [[ -z "$fromImageName" ]]; then
-          fromImageName=$(jshon -k < image/repositories | head -n 1)
-          echo "From-image name wasn't set. Read $fromImageName."
-        fi
-
-        # If the tag isn't set, use the name as an index into the json
-        # and read the first key found.
-        if [[ -z "$fromImageTag" ]]; then
-          fromImageTag=$(jshon -e $fromImageName -k < image/repositories \
-                         | head -n1)
-          echo "From-image tag wasn't set. Read $fromImageTag."
+        if [[ -n "$fromImageName" ]] && [[ -n "$fromImageTag" ]]; then
+          parentID="$(
+            cat "image/manifest.json" |
+              jq -r '.[] | select(.RepoTags | contains([$desiredTag])) | rtrimstr(".json")' \
+                --arg desiredTag "$fromImageName:$fromImageTag"
+          )"
+        else
+          echo "From-image name or tag wasn't set. Reading the first ID."
+          parentID="$(cat "image/manifest.json" | jq -r '.[0].Config | rtrimstr(".json")')"
         fi
 
-        # Use the name and tag to get the parent ID field.
-        parentID=$(jshon -e $fromImageName -e $fromImageTag -u \
-                   < image/repositories)
-
         cat ./image/manifest.json  | jq -r '.[0].Layers | .[]' > layer-list
       else
         touch layer-list
@@ -305,106 +300,6 @@ rec {
       ${text}
     '';
 
-  # Create $maxLayers worth of Docker Layers, one layer per store path
-  # unless there are more paths than $maxLayers. In that case, create
-  # $maxLayers-1 for the most popular layers, and smush the remainaing
-  # store paths in to one final layer.
-  #
-  # NOTE: the `closures` parameter is a list of closures to include.
-  # The TOP LEVEL store paths themselves will never be present in the
-  # resulting image. At this time (2019-12-16) none of these layers
-  # are appropriate to include, as they are all created as
-  # implementation details of dockerTools.
-  mkManyPureLayers = {
-    name,
-    # Files to add to the layer.
-    closures,
-    configJson,
-    # Docker has a 125-layer maximum, we pick 100 to ensure there is
-    # plenty of room for extension.
-    # https://github.com/moby/moby/blob/b3e9f7b13b0f0c414fa6253e1f17a86b2cff68b5/layer/layer_store.go#L23-L26
-    maxLayers ? 100
-  }:
-    let
-      storePathToLayer = substituteAll
-      { shell = runtimeShell;
-        isExecutable = true;
-        src = ./store-path-to-layer.sh;
-      };
-
-      overallClosure = writeText "closure" (lib.concatStringsSep " " closures);
-    in
-    runCommand "${name}-granular-docker-layers" {
-      inherit maxLayers;
-      paths = referencesByPopularity overallClosure;
-      nativeBuildInputs = [ jshon rsync tarsum moreutils ];
-      enableParallelBuilding = true;
-    }
-    ''
-      mkdir layers
-
-      # Delete impurities for store path layers, so they don't get
-      # shared and taint other projects.
-      cat ${configJson} \
-        | jshon -d config \
-        | jshon -s "1970-01-01T00:00:01Z" -i created > generic.json
-
-      # WARNING!
-      # The following code is fiddly w.r.t. ensuring every layer is
-      # created, and that no paths are missed. If you change the
-      # following head and tail call lines, double-check that your
-      # code behaves properly when the number of layers equals:
-      #      maxLayers-1, maxLayers, and maxLayers+1, 0
-      paths() {
-        cat $paths ${lib.concatMapStringsSep " " (path: "| (grep -v ${path} || true)") (closures ++ [ overallClosure ])}
-      }
-
-      paths | head -n $((maxLayers - 1)) | cat -n | xargs -r -P$NIX_BUILD_CORES -n2 ${storePathToLayer}
-      if [ $(paths | wc -l) -ge $maxLayers ]; then
-        paths | tail -n+$maxLayers | xargs ${storePathToLayer} $maxLayers
-      fi
-
-      echo "Finished building layer '$name'"
-
-      mv ./layers $out
-    '';
-
-  # Create a "Customisation" layer which adds symlinks at the root of
-  # the image to the root paths of the closure. Also add the config
-  # data like what command to run and the environment to run it in.
-  mkCustomisationLayer = {
-    name,
-    # Files to add to the layer.
-    contents,
-    baseJson,
-    extraCommands,
-    uid ? 0, gid ? 0,
-  }:
-    runCommand "${name}-customisation-layer" {
-      nativeBuildInputs = [ jshon rsync tarsum ];
-      inherit extraCommands;
-    }
-    ''
-      cp -r ${contents}/ ./layer
-
-      if [[ -n $extraCommands ]]; then
-        chmod ug+w layer
-        (cd layer; eval "$extraCommands")
-      fi
-
-      # Tar up the layer and throw it into 'layer.tar', while calculating its checksum.
-      echo "Packing layer..."
-      mkdir $out
-      tarhash=$(tar --transform='s|^\./||' -C layer --sort=name --mtime="@$SOURCE_DATE_EPOCH" --owner=${toString uid} --group=${toString gid} -cf - . | tee $out/layer.tar | tarsum)
-
-      # Add a 'checksum' field to the JSON, with the value set to the
-      # checksum of the tarball.
-      cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json
-
-      # Indicate to docker that we're using schema version 1.0.
-      echo -n "1.0" > $out/VERSION
-    '';
-
   # Create a "layer" (set of files).
   mkPureLayer = {
     # Name of the layer
@@ -438,7 +333,7 @@ rec {
 
       chmod ug+w layer
 
-      if [[ -n $extraCommands ]]; then
+      if [[ -n "$extraCommands" ]]; then
         (cd layer; eval "$extraCommands")
       fi
 
@@ -541,131 +436,14 @@ rec {
       '';
     };
 
-  buildLayeredImage = {
-    # Image Name
-    name,
-    # Image tag, the Nix's output hash will be used if null
-    tag ? null,
-    # Files to put on the image (a nix store path or list of paths).
-    contents ? [],
-    # Docker config; e.g. what command to run on the container.
-    config ? {},
-    # Time of creation of the image. Passing "now" will make the
-    # created date be the time of building.
-    created ? "1970-01-01T00:00:01Z",
-    # Optional bash script to run on the files prior to fixturizing the layer.
-    extraCommands ? "", uid ? 0, gid ? 0,
-    # We pick 100 to ensure there is plenty of room for extension. I
-    # believe the actual maximum is 128.
-    maxLayers ? 100
-  }:
-    assert
-      (lib.assertMsg (maxLayers > 1)
-      "the maxLayers argument of dockerTools.buildLayeredImage function must be greather than 1 (current value: ${toString maxLayers})");
+  buildLayeredImage = {name, ...}@args:
     let
-      baseName = baseNameOf name;
-      contentsEnv = symlinkJoin {
-        name = "bulk-layers";
-        paths = if builtins.isList contents
-          then contents
-          else [ contents ];
-      };
-
-      configJson = let
-          pure = writeText "${baseName}-config.json" (builtins.toJSON {
-            inherit created config;
-            architecture = buildPackages.go.GOARCH;
-            os = "linux";
-          });
-          impure = runCommand "${baseName}-standard-dynamic-date.json"
-            { nativeBuildInputs = [ jq ]; }
-            ''
-               jq ".created = \"$(TZ=utc date --iso-8601="seconds")\"" ${pure} > $out
-            '';
-        in if created == "now" then impure else pure;
-
-      bulkLayers = mkManyPureLayers {
-          name = baseName;
-          closures = [ contentsEnv configJson ];
-          # One layer will be taken up by the customisationLayer, so
-          # take up one less.
-          maxLayers = maxLayers - 1;
-          inherit configJson;
-        };
-      customisationLayer = mkCustomisationLayer {
-          name = baseName;
-          contents = contentsEnv;
-          baseJson = configJson;
-          inherit uid gid extraCommands;
-        };
-      result = runCommand "docker-image-${baseName}.tar.gz" {
-        nativeBuildInputs = [ jshon pigz coreutils findutils jq ];
-        # Image name and tag must be lowercase
-        imageName = lib.toLower name;
-        baseJson = configJson;
-        passthru.imageTag =
-          if tag == null
-          then lib.head (lib.splitString "-" (lib.last (lib.splitString "/" result)))
-          else lib.toLower tag;
-        # Docker can't be made to run darwin binaries
-        meta.badPlatforms = lib.platforms.darwin;
-      } ''
-        ${if (tag == null) then ''
-          outName="$(basename "$out")"
-          outHash=$(echo "$outName" | cut -d - -f 1)
-
-          imageTag=$outHash
-        '' else ''
-          imageTag="${tag}"
-        ''}
-
-        find ${bulkLayers} -mindepth 1 -maxdepth 1 | sort -t/ -k5 -n > layer-list
-        echo ${customisationLayer} >> layer-list
-
-        mkdir image
-        imageJson=$(cat ${configJson} | jq ". + {\"rootfs\": {\"diff_ids\": [], \"type\": \"layers\"}}")
-        manifestJson=$(jq -n "[{\"RepoTags\":[\"$imageName:$imageTag\"]}]")
-        for layer in $(cat layer-list); do
-          layerChecksum=$(sha256sum $layer/layer.tar | cut -d ' ' -f1)
-          layerID=$(sha256sum "$layer/json" | cut -d ' ' -f 1)
-          ln -s "$layer" "./image/$layerID"
-
-          manifestJson=$(echo "$manifestJson" | jq ".[0].Layers |= . + [\"$layerID/layer.tar\"]")
-          imageJson=$(echo "$imageJson" | jq ".history |= . + [{\"created\": \"$(jq -r .created ${configJson})\"}]")
-          imageJson=$(echo "$imageJson" | jq ".rootfs.diff_ids |= . + [\"sha256:$layerChecksum\"]")
-        done
-        imageJsonChecksum=$(echo "$imageJson" | sha256sum | cut -d ' ' -f1)
-        echo "$imageJson" > "image/$imageJsonChecksum.json"
-        manifestJson=$(echo "$manifestJson" | jq ".[0].Config = \"$imageJsonChecksum.json\"")
-        echo "$manifestJson" > image/manifest.json
-
-        jshon -n object \
-          -n object -s "$layerID" -i "$imageTag" \
-          -i "$imageName" > image/repositories
-
-        echo "Cooking the image..."
-        # tar exits with an exit code of 1 if files changed while it was
-        # reading them. It considers a change in the number of hard links
-        # to be a "change", which can cause this to fail if images are being
-        # built concurrently and the auto-optimise-store nix option is turned on.
-        # Since the contents of these files will not change, we can reasonably
-        # ignore this exit code.
-        set +e
-        tar -C image --dereference --hard-dereference --sort=name \
-          --mtime="@$SOURCE_DATE_EPOCH" --owner=0 --group=0  \
-          --mode=a-w --xform s:'^./':: --use-compress-program='pigz -nT' \
-          --warning=no-file-changed -cf $out .
-        RET=$?
-        if [ $RET -ne 0 ] && [ $RET -ne 1 ]; then
-          exit $RET
-        fi
-        set -e
-
-        echo "Finished."
-      '';
-
+      stream = streamLayeredImage args;
     in
-    result;
+      runCommand "${name}.tar.gz" {
+        inherit (stream) imageName;
+        buildInputs = [ pigz ];
+      } "${stream} | pigz -nT > $out";
 
   # 1. extract the base image
   # 2. create the layer
@@ -774,20 +552,22 @@ rec {
           configName="$(cat ./image/manifest.json | jq -r '.[0].Config')"
           baseEnvs="$(cat "./image/$configName" | jq '.config.Env // []')"
 
+          # Extract the parentID from the manifest
+          if [[ -n "$fromImageName" ]] && [[ -n "$fromImageTag" ]]; then
+            parentID="$(
+              cat "image/manifest.json" |
+                jq -r '.[] | select(.RepoTags | contains([$desiredTag])) | rtrimstr(".json")' \
+                  --arg desiredTag "$fromImageName:$fromImageTag"
+            )"
+          else
+            echo "From-image name or tag wasn't set. Reading the first ID."
+            parentID="$(cat "image/manifest.json" | jq -r '.[0].Config | rtrimstr(".json")')"
+          fi
+
           # Otherwise do not import the base image configuration and manifest
           chmod a+w image image/*.json
           rm -f image/*.json
 
-          if [[ -z "$fromImageName" ]]; then
-            fromImageName=$(jshon -k < image/repositories|head -n1)
-          fi
-          if [[ -z "$fromImageTag" ]]; then
-            fromImageTag=$(jshon -e $fromImageName -k \
-                           < image/repositories|head -n1)
-          fi
-          parentID=$(jshon -e $fromImageName -e $fromImageTag -u \
-                     < image/repositories)
-
           for l in image/*/layer.tar; do
             ls_tar $l >> baseFiles
           done
@@ -904,4 +684,117 @@ rec {
     })
   );
 
+  streamLayeredImage = {
+    # Image Name
+    name,
+    # Image tag, the Nix's output hash will be used if null
+    tag ? null,
+    # Files to put on the image (a nix store path or list of paths).
+    contents ? [],
+    # Docker config; e.g. what command to run on the container.
+    config ? {},
+    # Time of creation of the image. Passing "now" will make the
+    # created date be the time of building.
+    created ? "1970-01-01T00:00:01Z",
+    # Optional bash script to run on the files prior to fixturizing the layer.
+    extraCommands ? "",
+    # We pick 100 to ensure there is plenty of room for extension. I
+    # believe the actual maximum is 128.
+    maxLayers ? 100
+  }:
+    assert
+      (lib.assertMsg (maxLayers > 1)
+      "the maxLayers argument of dockerTools.buildLayeredImage function must be greather than 1 (current value: ${toString maxLayers})");
+    let
+      streamScript = writePython3 "stream" {} ./stream_layered_image.py;
+      baseJson = writeText "${name}-base.json" (builtins.toJSON {
+         inherit config;
+         architecture = buildPackages.go.GOARCH;
+         os = "linux";
+      });
+      customisationLayer = runCommand "${name}-customisation-layer" { inherit extraCommands; } ''
+        cp -r ${contentsEnv}/ $out
+
+        if [[ -n $extraCommands ]]; then
+          chmod u+w $out
+          (cd $out; eval "$extraCommands")
+        fi
+      '';
+      contentsEnv = symlinkJoin {
+        name = "${name}-bulk-layers";
+        paths = if builtins.isList contents
+          then contents
+          else [ contents ];
+      };
+
+      # NOTE: the `closures` parameter is a list of closures to include.
+      # The TOP LEVEL store paths themselves will never be present in the
+      # resulting image. At this time (2020-06-18) none of these layers
+      # are appropriate to include, as they are all created as
+      # implementation details of dockerTools.
+      closures = [ baseJson contentsEnv ];
+      overallClosure = writeText "closure" (lib.concatStringsSep " " closures);
+      conf = runCommand "${name}-conf.json" {
+        inherit maxLayers created;
+        imageName = lib.toLower name;
+        paths = referencesByPopularity overallClosure;
+        buildInputs = [ jq ];
+      } ''
+        paths() {
+          cat $paths ${lib.concatMapStringsSep " " (path: "| (grep -v ${path} || true)") (closures ++ [ overallClosure ])}
+        }
+        ${if (tag == null) then ''
+          outName="$(basename "$out")"
+          outHash=$(echo "$outName" | cut -d - -f 1)
+
+          imageTag=$outHash
+        '' else ''
+          imageTag="${tag}"
+        ''}
+
+        # convert "created" to iso format
+        if [[ "$created" != "now" ]]; then
+            created="$(date -Iseconds -d "$created")"
+        fi
+
+        # Create $maxLayers worth of Docker Layers, one layer per store path
+        # unless there are more paths than $maxLayers. In that case, create
+        # $maxLayers-1 for the most popular layers, and smush the remainaing
+        # store paths in to one final layer.
+        #
+        # The following code is fiddly w.r.t. ensuring every layer is
+        # created, and that no paths are missed. If you change the
+        # following lines, double-check that your code behaves properly
+        # when the number of layers equals:
+        #      maxLayers-1, maxLayers, and maxLayers+1, 0
+        store_layers="$(
+          paths |
+            jq -sR '
+              rtrimstr("\n") | split("\n")
+                | (.[:$maxLayers-1] | map([.])) + [ .[$maxLayers-1:] ]
+                | map(select(length > 0))
+            ' \
+              --argjson maxLayers "$(( maxLayers - 1 ))" # one layer will be taken up by the customisation layer
+        )"
+
+        cat ${baseJson} | jq '
+          . + {
+            "store_layers": $store_layers,
+            "customisation_layer", $customisation_layer,
+            "repo_tag": $repo_tag,
+            "created": $created
+          }
+          ' --argjson store_layers "$store_layers" \
+            --arg customisation_layer ${customisationLayer} \
+            --arg repo_tag "$imageName:$imageTag" \
+            --arg created "$created" |
+          tee $out
+      '';
+      result = runCommand "stream-${name}" {
+        inherit (conf) imageName;
+        buildInputs = [ makeWrapper ];
+      } ''
+        makeWrapper ${streamScript} $out --add-flags ${conf}
+      '';
+    in result;
 }
diff --git a/pkgs/build-support/docker/store-path-to-layer.sh b/pkgs/build-support/docker/store-path-to-layer.sh
deleted file mode 100755
index 3a1fcd0c27a..00000000000
--- a/pkgs/build-support/docker/store-path-to-layer.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!@shell@
-
-set -eu
-
-layerNumber=$1
-shift
-
-layerPath="./layers/$layerNumber"
-echo "Creating layer #$layerNumber for $@"
-
-mkdir -p "$layerPath"
-
-# Make sure /nix and /nix/store appear first in the archive.
-#
-# We create the directories here and use them because
-# when there are other things being added to the
-# nix store, tar could fail, saying,
-# "tar: /nix/store: file changed as we read it"
-#
-# In addition, we use `__Nix__` instead of `nix` to avoid renaming
-# relative symlink destinations like
-# /nix/store/...-nix-2.3.4/bin/nix-daemon -> nix
-mkdir -p __Nix__/store
-
-# Then we change into the /nix/store in order to
-# avoid a similar "file changed as we read it" error
-# as above. Namely, if we use the absolute path of
-# /nix/store/123-pkg and something new is added to the nix
-# store while tar is running, it will detect a change to
-# /nix/store and fail. Instead, if we cd into the nix store
-# and copy the relative nix store path, tar will ignore
-# changes to /nix/store. In order to create the correct
-# structure in the tar file, we transform the relative nix
-# store path to the absolute store path.
-tarhash=$(
-  basename -a "$@" |
-    tar --create --preserve-permissions --absolute-names nix \
-      --directory /nix/store --verbatim-files-from --files-from - \
-      --hard-dereference --sort=name \
-      --mtime="@$SOURCE_DATE_EPOCH" \
-      --owner=0 --group=0 \
-      --transform 's,^__Nix__$,/nix,' \
-      --transform 's,^__Nix__/store$,/nix/store,' \
-      --transform 's,^[^/],/nix/store/\0,rS' |
-    tee "$layerPath/layer.tar" |
-    tarsum
-)
-
-# Add a 'checksum' field to the JSON, with the value set to the
-# checksum of the tarball.
-cat ./generic.json | jshon -s "$tarhash" -i checksum > $layerPath/json
-
-# Indicate to docker that we're using schema version 1.0.
-echo -n "1.0" > $layerPath/VERSION
diff --git a/pkgs/build-support/docker/stream_layered_image.py b/pkgs/build-support/docker/stream_layered_image.py
new file mode 100644
index 00000000000..4348513338d
--- /dev/null
+++ b/pkgs/build-support/docker/stream_layered_image.py
@@ -0,0 +1,308 @@
+"""
+This script generates a Docker image from a set of store paths. Uses
+Docker Image Specification v1.2 as reference [1].
+
+It expects a JSON file with the following properties and writes the
+image as an uncompressed tarball to stdout:
+
+* "architecture", "config", "os", "created", "repo_tag" correspond to
+  the fields with the same name on the image spec [2].
+* "created" can be "now".
+* "created" is also used as mtime for files added to the image.
+* "store_layers" is a list of layers in ascending order, where each
+  layer is the list of store paths to include in that layer.
+
+The main challenge for this script to create the final image in a
+streaming fashion, without dumping any intermediate data to disk
+for performance.
+
+A docker image has each layer contents archived as separate tarballs,
+and they later all get enveloped into a single big tarball in a
+content addressed fashion. However, because how "tar" format works,
+we have to know about the name (which includes the checksum in our
+case) and the size of the tarball before we can start adding it to the
+outer tarball.  We achieve that by creating the layer tarballs twice;
+on the first iteration we calculate the file size and the checksum,
+and on the second one we actually stream the contents. 'add_layer_dir'
+function does all this.
+
+[1]: https://github.com/moby/moby/blob/master/image/spec/v1.2.md
+[2]: https://github.com/moby/moby/blob/4fb59c20a4fb54f944fe170d0ff1d00eb4a24d6f/image/spec/v1.2.md#image-json-field-descriptions
+"""  # noqa: E501
+
+
+import io
+import os
+import re
+import sys
+import json
+import hashlib
+import pathlib
+import tarfile
+import threading
+from datetime import datetime
+from collections import namedtuple
+
+
+def archive_paths_to(obj, paths, mtime, add_nix, filter=None):
+    """
+    Writes the given store paths as a tar file to the given stream.
+
+    obj: Stream to write to. Should have a 'write' method.
+    paths: List of store paths.
+    add_nix: Whether /nix and /nix/store directories should be
+             prepended to the archive.
+    filter: An optional transformation to be applied to TarInfo
+            objects. Should take a single TarInfo object and return
+            another one. Defaults to identity.
+    """
+
+    filter = filter if filter else lambda i: i
+
+    # gettarinfo makes the paths relative, this makes them
+    # absolute again
+    def append_root(ti):
+        ti.name = "/" + ti.name
+        return ti
+
+    def apply_filters(ti):
+        ti.mtime = mtime
+        ti.uid = 0
+        ti.gid = 0
+        ti.uname = "root"
+        ti.gname = "root"
+        return filter(ti)
+
+    def dir(path):
+        ti = tarfile.TarInfo(path)
+        ti.type = tarfile.DIRTYPE
+        return ti
+
+    with tarfile.open(fileobj=obj, mode="w|") as tar:
+        # To be consistent with the docker utilities, we need to have
+        # these directories first when building layer tarballs. But
+        # we don't need them on the customisation layer.
+        if add_nix:
+            tar.addfile(apply_filters(dir("/nix")))
+            tar.addfile(apply_filters(dir("/nix/store")))
+
+        for path in paths:
+            ti = tar.gettarinfo(os.path.join("/", path))
+            tar.addfile(apply_filters(append_root(ti)))
+
+            for filename in pathlib.Path(path).rglob("*"):
+                ti = append_root(tar.gettarinfo(filename))
+
+                # copy hardlinks as regular files
+                if ti.islnk():
+                    ti.type = tarfile.REGTYPE
+                    ti.linkname = ""
+
+                ti = apply_filters(ti)
+                if ti.isfile():
+                    with open(filename, "rb") as f:
+                        tar.addfile(ti, f)
+                else:
+                    tar.addfile(ti)
+
+
+class ExtractChecksum:
+    """
+    A writable stream which only calculates the final file size and
+    sha256sum, while discarding the actual contents.
+    """
+
+    def __init__(self):
+        self._digest = hashlib.sha256()
+        self._size = 0
+
+    def write(self, data):
+        self._digest.update(data)
+        self._size += len(data)
+
+    def extract(self):
+        """
+        Returns: Hex-encoded sha256sum and size as a tuple.
+        """
+        return (self._digest.hexdigest(), self._size)
+
+
+# Some metadata for a layer
+LayerInfo = namedtuple("LayerInfo", ["size", "checksum", "path", "paths"])
+
+
+def add_layer_dir(tar, paths, mtime, add_nix=True, filter=None):
+    """
+    Appends given store paths to a TarFile object as a new layer.
+
+    tar: 'tarfile.TarFile' object for the new layer to be added to.
+    paths: List of store paths.
+    mtime: 'mtime' of the added files and the layer tarball.
+           Should be an integer representing a POSIX time.
+    add_nix: Whether /nix and /nix/store directories should be
+             added to a layer.
+    filter: An optional transformation to be applied to TarInfo
+            objects inside the layer. Should take a single TarInfo
+            object and return another one. Defaults to identity.
+
+    Returns: A 'LayerInfo' object containing some metadata of
+             the layer added.
+    """
+
+    invalid_paths = [i for i in paths if not i.startswith("/nix/store/")]
+    assert len(invalid_paths) == 0, \
+        "Expecting absolute store paths, but got: {invalid_paths}"
+
+    # First, calculate the tarball checksum and the size.
+    extract_checksum = ExtractChecksum()
+    archive_paths_to(
+        extract_checksum,
+        paths,
+        mtime=mtime,
+        add_nix=add_nix,
+        filter=filter
+    )
+    (checksum, size) = extract_checksum.extract()
+
+    path = f"{checksum}/layer.tar"
+    layer_tarinfo = tarfile.TarInfo(path)
+    layer_tarinfo.size = size
+    layer_tarinfo.mtime = mtime
+
+    # Then actually stream the contents to the outer tarball.
+    read_fd, write_fd = os.pipe()
+    with open(read_fd, "rb") as read, open(write_fd, "wb") as write:
+        def producer():
+            archive_paths_to(
+                write,
+                paths,
+                mtime=mtime,
+                add_nix=add_nix,
+                filter=filter
+            )
+            write.close()
+
+        # Closing the write end of the fifo also closes the read end,
+        # so we don't need to wait until this thread is finished.
+        #
+        # Any exception from the thread will get printed by the default
+        # exception handler, and the 'addfile' call will fail since it
+        # won't be able to read required amount of bytes.
+        threading.Thread(target=producer).start()
+        tar.addfile(layer_tarinfo, read)
+
+    return LayerInfo(size=size, checksum=checksum, path=path, paths=paths)
+
+
+def add_customisation_layer(tar, path, mtime):
+    """
+    Adds the contents of the store path as a new layer. This is different
+    than the 'add_layer_dir' function defaults in the sense that the contents
+    of a single store path will be added to the root of the layer. eg (without
+    the /nix/store prefix).
+
+    tar: 'tarfile.TarFile' object for the new layer to be added to.
+    path: A store path.
+    mtime: 'mtime' of the added files and the layer tarball. Should be an
+           integer representing a POSIX time.
+    """
+
+    def filter(ti):
+        ti.name = re.sub("^/nix/store/[^/]*", "", ti.name)
+        return ti
+    return add_layer_dir(
+        tar,
+        [path],
+        mtime=mtime,
+        add_nix=False,
+        filter=filter
+      )
+
+
+def add_bytes(tar, path, content, mtime):
+    """
+    Adds a file to the tarball with given path and contents.
+
+    tar: 'tarfile.TarFile' object.
+    path: Path of the file as a string.
+    content: Contents of the file.
+    mtime: 'mtime' of the file. Should be an integer representing a POSIX time.
+    """
+    assert type(content) is bytes
+
+    ti = tarfile.TarInfo(path)
+    ti.size = len(content)
+    ti.mtime = mtime
+    tar.addfile(ti, io.BytesIO(content))
+
+
+def main():
+    with open(sys.argv[1], "r") as f:
+        conf = json.load(f)
+
+    created = (
+      datetime.now(tz=datetime.timezone.utc)
+      if conf["created"] == "now"
+      else datetime.fromisoformat(conf["created"])
+    )
+    mtime = int(created.timestamp())
+
+    with tarfile.open(mode="w|", fileobj=sys.stdout.buffer) as tar:
+        layers = []
+        for num, store_layer in enumerate(conf["store_layers"]):
+            print(
+              "Creating layer", num,
+              "from paths:", store_layer,
+              file=sys.stderr)
+            info = add_layer_dir(tar, store_layer, mtime=mtime)
+            layers.append(info)
+
+        print("Creating the customisation layer...", file=sys.stderr)
+        layers.append(
+          add_customisation_layer(
+            tar,
+            conf["customisation_layer"],
+            mtime=mtime
+          )
+        )
+
+        print("Adding manifests...", file=sys.stderr)
+
+        image_json = {
+            "created": datetime.isoformat(created),
+            "architecture": conf["architecture"],
+            "os": "linux",
+            "config": conf["config"],
+            "rootfs": {
+                "diff_ids": [f"sha256:{layer.checksum}" for layer in layers],
+                "type": "layers",
+            },
+            "history": [
+                {
+                  "created": conf["created"],
+                  "comment": f"store paths: {layer.paths}"
+                }
+                for layer in layers
+            ],
+        }
+
+        image_json = json.dumps(image_json, indent=4).encode("utf-8")
+        image_json_checksum = hashlib.sha256(image_json).hexdigest()
+        image_json_path = f"{image_json_checksum}.json"
+        add_bytes(tar, image_json_path, image_json, mtime=mtime)
+
+        manifest_json = [
+            {
+                "Config": image_json_path,
+                "RepoTags": [conf["repo_tag"]],
+                "Layers": [layer.path for layer in layers],
+            }
+        ]
+        manifest_json = json.dumps(manifest_json, indent=4).encode("utf-8")
+        add_bytes(tar, "manifest.json", manifest_json, mtime=mtime)
+
+        print("Done.", file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()