summary refs log tree commit diff
path: root/pkgs/build-support/docker
diff options
context:
space:
mode:
authorUtku Demir <me@utdemir.com>2020-06-18 14:29:59 +1200
committerUtku Demir <me@utdemir.com>2020-06-21 12:35:38 +1200
commit4ab7baf6f61f2c89bbd4f3825546b48adcd031c4 (patch)
tree626f9b31871010fdf263bdd6c650888a1c15b283 /pkgs/build-support/docker
parenta5a611cacbc11d267d394f2a7bde06c098ef65af (diff)
downloadnixpkgs-4ab7baf6f61f2c89bbd4f3825546b48adcd031c4.tar
nixpkgs-4ab7baf6f61f2c89bbd4f3825546b48adcd031c4.tar.gz
nixpkgs-4ab7baf6f61f2c89bbd4f3825546b48adcd031c4.tar.bz2
nixpkgs-4ab7baf6f61f2c89bbd4f3825546b48adcd031c4.tar.lz
nixpkgs-4ab7baf6f61f2c89bbd4f3825546b48adcd031c4.tar.xz
nixpkgs-4ab7baf6f61f2c89bbd4f3825546b48adcd031c4.tar.zst
nixpkgs-4ab7baf6f61f2c89bbd4f3825546b48adcd031c4.zip
stream_layered_image.py: comments
Diffstat (limited to 'pkgs/build-support/docker')
-rw-r--r--pkgs/build-support/docker/stream_layered_image.py46
1 files changed, 46 insertions, 0 deletions
diff --git a/pkgs/build-support/docker/stream_layered_image.py b/pkgs/build-support/docker/stream_layered_image.py
index f502e855e95..6687f49e1bc 100644
--- a/pkgs/build-support/docker/stream_layered_image.py
+++ b/pkgs/build-support/docker/stream_layered_image.py
@@ -1,3 +1,36 @@
+"""
+This script generates a Docker image from a set of store paths. Uses
+Docker Image Specification v1.2 as reference [1].
+
+It expects a JSON file with the following properties and writes the
+image as an uncompressed tarball to stdout:
+
+* "architecture", "config", "os", "created", "repo_tag" correspond to
+  the fields with the same name on the image spec [2].
+* "created" can be "now".
+* "created" is also used as mtime for files added to the image.
+* "store_layers" is a list of layers in ascending order, where each
+  layer is the list of store paths to include in that layer.
+
+The main challenge for this script to create the final image in a
+streaming fashion, without dumping any intermediate data to disk
+for performance.
+
+A docker image has each layer contents archived as separate tarballs,
+and they later all get enveloped into a single big tarball in a
+content addressed fashion. However, because how "tar" format works,
+we have to know about the name (which includes the checksum in our
+case) and the size of the tarball before we can start adding it to the
+outer tarball.  We achieve that by creating the layer tarballs twice;
+on the first iteration we calculate the file size and the checksum,
+and on the second one we actually stream the contents. 'add_layer_dir'
+function does all this.
+
+[1]: https://github.com/moby/moby/blob/master/image/spec/v1.2.md
+[2]: https://github.com/moby/moby/blob/4fb59c20a4fb54f944fe170d0ff1d00eb4a24d6f/image/spec/v1.2.md#image-json-field-descriptions
+"""  # noqa: E501
+
+
 import io
 import os
 import re
@@ -31,6 +64,9 @@ def archive_paths_to(obj, paths, mtime, add_nix, filter=None):
         return ti
 
     with tarfile.open(fileobj=obj, mode="w|") as tar:
+        # To be consistent with the docker utilities, we need to have
+        # these directories first when building layer tarballs. But
+        # we don't need them on the customisation layer.
         if add_nix:
             tar.addfile(apply_filters(dir("/nix")))
             tar.addfile(apply_filters(dir("/nix/store")))
@@ -80,6 +116,7 @@ LayerInfo = namedtuple("LayerInfo", ["size", "checksum", "path", "paths"])
 def add_layer_dir(tar, paths, mtime, add_nix=True, filter=None):
     assert all(i.startswith("/nix/store/") for i in paths)
 
+    # First, calculate the tarball checksum and the size.
     extract_checksum = ExtractChecksum()
     archive_paths_to(
         extract_checksum,
@@ -95,6 +132,7 @@ def add_layer_dir(tar, paths, mtime, add_nix=True, filter=None):
     ti.size = size
     ti.mtime = mtime
 
+    # Then actually stream the contents to the outer tarball.
     read_fd, write_fd = os.pipe()
     with open(read_fd, "rb") as read, open(write_fd, "wb") as write:
         def producer():
@@ -106,12 +144,20 @@ def add_layer_dir(tar, paths, mtime, add_nix=True, filter=None):
                 filter=filter
             )
             write.close()
+
+        # Closing the write end of the fifo also closes the read end,
+        # so we don't need to wait until this thread is finished.
+        #
+        # Any exception from the thread will get printed by the default
+        # exception handler, and the 'addfile' call will fail since it
+        # won't be able to read required amount of bytes.
         threading.Thread(target=producer).start()
         tar.addfile(ti, read)
 
     return LayerInfo(size=size, checksum=checksum, path=path, paths=paths)
 
 
+# Adds the contents of the store path to the root as a new layer.
 def add_customisation_layer(tar, path, mtime):
     def filter(ti):
         ti.name = re.sub("^/nix/store/[^/]*", "", ti.name)