summary refs log tree commit diff
path: root/lib/fileset
diff options
context:
space:
mode:
authorSilvan Mosberger <silvan.mosberger@tweag.io>2023-09-14 21:41:03 +0200
committerSilvan Mosberger <silvan.mosberger@tweag.io>2023-09-21 00:21:01 +0200
commit7ab764e575135586cb8ac496702663a40a2bfa56 (patch)
tree9cd36f6abb8f19314b4a7d32d6697cbd952986e2 /lib/fileset
parente04e40d05e5d959e30188d62c76e3dfb5cb26b16 (diff)
downloadnixpkgs-7ab764e575135586cb8ac496702663a40a2bfa56.tar
nixpkgs-7ab764e575135586cb8ac496702663a40a2bfa56.tar.gz
nixpkgs-7ab764e575135586cb8ac496702663a40a2bfa56.tar.bz2
nixpkgs-7ab764e575135586cb8ac496702663a40a2bfa56.tar.lz
nixpkgs-7ab764e575135586cb8ac496702663a40a2bfa56.tar.xz
nixpkgs-7ab764e575135586cb8ac496702663a40a2bfa56.tar.zst
nixpkgs-7ab764e575135586cb8ac496702663a40a2bfa56.zip
lib.fileset.unions: Don't stack overflow for many files
Diffstat (limited to 'lib/fileset')
-rw-r--r--lib/fileset/internal.nix65
-rwxr-xr-xlib/fileset/tests.sh13
2 files changed, 39 insertions, 39 deletions
diff --git a/lib/fileset/internal.nix b/lib/fileset/internal.nix
index 19db7adcff4..43af0acc73e 100644
--- a/lib/fileset/internal.nix
+++ b/lib/fileset/internal.nix
@@ -14,6 +14,7 @@ let
   inherit (lib.attrsets)
     attrValues
     mapAttrs
+    zipAttrsWith
     ;
 
   inherit (lib.filesystem)
@@ -25,6 +26,7 @@ let
     commonPrefix
     drop
     elemAt
+    filter
     findFirstIndex
     foldl'
     head
@@ -342,7 +344,9 @@ rec {
       # Base paths can be unioned by taking their common prefix,
       # e.g. such that `union /foo/bar /foo/baz` has the base path `/foo`
 
-      # A list of path components common to all base paths
+      # A list of path components common to all base paths.
+      # Note that commonPrefix can only be fully evaluated,
+      # so this cannot cause a stack overflow due to a build-up of unevaluated thunks.
       commonBaseComponents = foldl'
         (components: el: commonPrefix components el._internalBaseComponents)
         first._internalBaseComponents
@@ -371,46 +375,29 @@ rec {
         ) filesets;
 
       # Folds all trees together into a single one using _unionTree
-      resultTree = foldl'
-        _unionTree
-        (head trees)
-        # We could also not do the `tail` here to avoid a list allocation,
-        # but then we'd have to pay for a potentially expensive
-        # but unnecessary `_unionTree (head trees) (head trees)` call.
-        (tail trees);
+      # We do not use a fold here because it would cause a thunk build-up
+      # which could cause a stack overflow for a large number of trees
+      resultTree = _unionTrees trees;
     in
     _create commonBase resultTree;
 
-
-  # Legend for branch tables in the below tree combinator functions
-  # - lhs\rhs  : The values for the left hand side (columns) and right hand side (rows) arguments
-  # - null     : Value `null`, a file/directory that's not included
-  # - attrs    : Satisfies `isAttrs value`, an explicitly listed directory containing nested trees
-  # - str      : Satisfies `isString value`, either "directory" or a file type, a fully included file/directory
-  # - rec      : A result computed by recursing
-  # - <number> : Indicates that the result is computed by the branch with that number
-  # - *        : Only the lhs/rhs needs to be evaluated, the result is always the same no matter the other side
-
-  # The union of two filesetTree's with the same base path.
-  # The second argument is only evaluated if necessary.
-  # Type: filesetTree -> filesetTree -> filesetTree
-  _unionTree = lhs: rhs:
-    # This branch table shows the correctness of the branch conditions,
-    # see the legend above for more details
-    #
-    # lhs\rhs |   null  |   attrs |   str |
-    # ------- | ------- | ------- | ----- |
-    #   null  | 1 null  | 3 attrs | 3 str |
-    #   attrs | 1 attrs | 2 rec   | 3 str |
-    # * str   | 1 str   | 1 str   | 1 str |
-
-    if isString lhs || rhs == null then
-      # Branch 1
-      lhs
-    else if isAttrs lhs && isAttrs rhs then
-      # Branch 2
-      mapAttrs (name: _unionTree lhs.${name}) rhs
+  # The union of multiple filesetTree's with the same base path.
+  # Later elements are only evaluated if necessary.
+  # Type: [ filesetTree ] -> filesetTree
+  _unionTrees = trees:
+    let
+      stringIndex = findFirstIndex isString null trees;
+      withoutNull = filter (tree: tree != null) trees;
+    in
+    if stringIndex != null then
+      # If there's a string, it's always a fully included tree (dir or file),
+      # no need to look at other elements
+      elemAt trees stringIndex
+    else if withoutNull == [ ] then
+      # If all trees are null, then the resulting tree is also null
+      null
     else
-      # Branch 3
-      rhs;
+      # The non-null elements have to be attribute sets representing partial trees
+      # We need to recurse into those
+      zipAttrsWith (name: _unionTrees) withoutNull;
 }
diff --git a/lib/fileset/tests.sh b/lib/fileset/tests.sh
index cee93615d56..1a8f1372ebf 100755
--- a/lib/fileset/tests.sh
+++ b/lib/fileset/tests.sh
@@ -457,6 +457,19 @@ checkFileset 'unions [ ./x/a ./x/b ./y/a ./z/b ]'
 checkFileset 'union (union ./x/a ./x/b) (union ./y/a ./z/b)'
 checkFileset 'union (union (union ./x/a ./x/b) ./y/a) ./z/b'
 
+# unions should not stack overflow, even if many elements are passed
+tree=()
+for i in $(seq 1000); do
+    tree[$i/a]=1
+    tree[$i/b]=0
+done
+(
+    # Locally limit the maximum stack size to 100 * 1024 bytes
+    # If unions was implemented recursively, this would stack overflow
+    ulimit -s 100
+    checkFileset 'unions (mapAttrsToList (name: _: ./. + "/${name}/a") (builtins.readDir ./.))'
+)
+
 # TODO: Once we have combinators and a property testing library, derive property tests from https://en.wikipedia.org/wiki/Algebra_of_sets
 
 echo >&2 tests ok