summary refs log tree commit diff
path: root/lib/fileset
diff options
context:
space:
mode:
authorSilvan Mosberger <contact@infinisil.com>2023-11-01 19:40:45 +0100
committerGitHub <noreply@github.com>2023-11-01 19:40:45 +0100
commitfc28c5e5b79ada17ce17b5c83e07a2949b959fcf (patch)
tree8955af520cba3d6f3539d8b182960b7bd376f31b /lib/fileset
parent1c7f17f39502a27885d9b9c9505c612adb41823e (diff)
parent50df7f977548c296e475ad37c1d44afcdc9f8e26 (diff)
downloadnixpkgs-fc28c5e5b79ada17ce17b5c83e07a2949b959fcf.tar
nixpkgs-fc28c5e5b79ada17ce17b5c83e07a2949b959fcf.tar.gz
nixpkgs-fc28c5e5b79ada17ce17b5c83e07a2949b959fcf.tar.bz2
nixpkgs-fc28c5e5b79ada17ce17b5c83e07a2949b959fcf.tar.lz
nixpkgs-fc28c5e5b79ada17ce17b5c83e07a2949b959fcf.tar.xz
nixpkgs-fc28c5e5b79ada17ce17b5c83e07a2949b959fcf.tar.zst
nixpkgs-fc28c5e5b79ada17ce17b5c83e07a2949b959fcf.zip
Merge pull request #259065 from tweag/fileset.difference
`lib.fileset.difference`: init
Diffstat (limited to 'lib/fileset')
-rw-r--r--lib/fileset/default.nix53
-rw-r--r--lib/fileset/internal.nix80
-rwxr-xr-xlib/fileset/tests.sh98
3 files changed, 231 insertions, 0 deletions
diff --git a/lib/fileset/default.nix b/lib/fileset/default.nix
index 0342be3e037..4a97633b4a8 100644
--- a/lib/fileset/default.nix
+++ b/lib/fileset/default.nix
@@ -9,6 +9,7 @@ let
     _fileFilter
     _printFileset
     _intersection
+    _difference
     ;
 
   inherit (builtins)
@@ -369,6 +370,58 @@ If a directory does not recursively contain any file, it is omitted from the sto
       (elemAt filesets 1);
 
   /*
+    The file set containing all files from the first file set that are not in the second file set.
+    See also [Difference (set theory)](https://en.wikipedia.org/wiki/Complement_(set_theory)#Relative_complement).
+
+    The given file sets are evaluated as lazily as possible,
+    with the first argument being evaluated first if needed.
+
+    Type:
+      union :: FileSet -> FileSet -> FileSet
+
+    Example:
+      # Create a file set containing all files from the current directory,
+      # except ones under ./tests
+      difference ./. ./tests
+
+      let
+        # A set of Nix-related files
+        nixFiles = unions [ ./default.nix ./nix ./tests/default.nix ];
+      in
+      # Create a file set containing all files under ./tests, except ones in `nixFiles`,
+      # meaning only without ./tests/default.nix
+      difference ./tests nixFiles
+  */
+  difference =
+    # The positive file set.
+    # The result can only contain files that are also in this file set.
+    #
+    # This argument can also be a path,
+    # which gets [implicitly coerced to a file set](#sec-fileset-path-coercion).
+    positive:
+    # The negative file set.
+    # The result will never contain files that are also in this file set.
+    #
+    # This argument can also be a path,
+    # which gets [implicitly coerced to a file set](#sec-fileset-path-coercion).
+    negative:
+    let
+      filesets = _coerceMany "lib.fileset.difference" [
+        {
+          context = "first argument (positive set)";
+          value = positive;
+        }
+        {
+          context = "second argument (negative set)";
+          value = negative;
+        }
+      ];
+    in
+    _difference
+      (elemAt filesets 0)
+      (elemAt filesets 1);
+
+  /*
     Incrementally evaluate and trace a file set in a pretty way.
     This function is only intended for debugging purposes.
     The exact tracing format is unspecified and may change.
diff --git a/lib/fileset/internal.nix b/lib/fileset/internal.nix
index 76b95c6ae47..b919a5de3ee 100644
--- a/lib/fileset/internal.nix
+++ b/lib/fileset/internal.nix
@@ -651,6 +651,86 @@ rec {
       # In all other cases it's the rhs
       rhs;
 
+  # Compute the set difference between two file sets.
+  # The filesets must already be coerced and validated to be in the same filesystem root.
+  # Type: Fileset -> Fileset -> Fileset
+  _difference = positive: negative:
+    let
+      # The common base components prefix, e.g.
+      # (/foo/bar, /foo/bar/baz) -> /foo/bar
+      # (/foo/bar, /foo/baz) -> /foo
+      commonBaseComponentsLength =
+        # TODO: Have a `lib.lists.commonPrefixLength` function such that we don't need the list allocation from commonPrefix here
+        length (
+          commonPrefix
+            positive._internalBaseComponents
+            negative._internalBaseComponents
+        );
+
+      # We need filesetTree's with the same base to be able to compute the difference between them
+      # This here is the filesetTree from the negative file set, but for a base path that matches the positive file set.
+      # Examples:
+      # For `difference /foo /foo/bar`, `negativeTreeWithPositiveBase = { bar = "directory"; }`
+      #   because under the base path of `/foo`, only `bar` from the negative file set is included
+      # For `difference /foo/bar /foo`, `negativeTreeWithPositiveBase = "directory"`
+      #   because under the base path of `/foo/bar`, everything from the negative file set is included
+      # For `difference /foo /bar`, `negativeTreeWithPositiveBase = null`
+      #   because under the base path of `/foo`, nothing from the negative file set is included
+      negativeTreeWithPositiveBase =
+        if commonBaseComponentsLength == length positive._internalBaseComponents then
+          # The common prefix is the same as the positive base path, so the second path is equal or longer.
+          # We need to _shorten_ the negative filesetTree to the same base path as the positive one
+          # E.g. for `difference /foo /foo/bar` the common prefix is /foo, equal to the positive file set's base
+          # So we need to shorten the base of the tree for the negative argument from /foo/bar to just /foo
+          _shortenTreeBase positive._internalBaseComponents negative
+        else if commonBaseComponentsLength == length negative._internalBaseComponents then
+          # The common prefix is the same as the negative base path, so the first path is longer.
+          # We need to lengthen the negative filesetTree to the same base path as the positive one.
+          # E.g. for `difference /foo/bar /foo` the common prefix is /foo, equal to the negative file set's base
+          # So we need to lengthen the base of the tree for the negative argument from /foo to /foo/bar
+          _lengthenTreeBase positive._internalBaseComponents negative
+        else
+          # The common prefix is neither the first nor the second path.
+          # This means there's no overlap between the two file sets,
+          # and nothing from the negative argument should get removed from the positive one
+          # E.g for `difference /foo /bar`, we remove nothing to get the same as `/foo`
+          null;
+
+      resultingTree =
+        _differenceTree
+        positive._internalBase
+        positive._internalTree
+        negativeTreeWithPositiveBase;
+    in
+    # If the first file set is empty, we can never have any files in the result
+    if positive._internalIsEmptyWithoutBase then
+      _emptyWithoutBase
+    # If the second file set is empty, nothing gets removed, so the result is just the first file set
+    else if negative._internalIsEmptyWithoutBase then
+      positive
+    else
+      # We use the positive file set base for the result,
+      # because only files from the positive side may be included,
+      # which is what base path is for
+      _create positive._internalBase resultingTree;
+
+  # Computes the set difference of two filesetTree's
+  # Type: Path -> filesetTree -> filesetTree
+  _differenceTree = path: lhs: rhs:
+    # If the lhs doesn't have any files, or the right hand side includes all files
+    if lhs == null || isString rhs then
+      # The result will always be empty
+      null
+    # If the right hand side has no files
+    else if rhs == null then
+      # The result is always the left hand side, because nothing gets removed
+      lhs
+    else
+      # Otherwise we always have two attribute sets to recurse into
+      mapAttrs (name: lhsValue:
+        _differenceTree (path + "/${name}") lhsValue (rhs.${name} or null)
+      ) (_directoryEntries path lhs);
+
   _fileFilter = predicate: fileset:
     let
       recurse = path: tree:
diff --git a/lib/fileset/tests.sh b/lib/fileset/tests.sh
index 5b756b8fc59..2df0727bde3 100755
--- a/lib/fileset/tests.sh
+++ b/lib/fileset/tests.sh
@@ -684,6 +684,104 @@ tree=(
 )
 checkFileset 'intersection (unions [ ./a/b ./c/d ./c/e ]) (unions [ ./a ./c/d/f ./c/e ])'
 
+## Difference
+
+# Subtracting something from itself results in nothing
+tree=(
+    [a]=0
+)
+checkFileset 'difference ./. ./.'
+
+# The tree of the second argument should not be evaluated if not needed
+checkFileset 'difference _emptyWithoutBase (_create ./. (abort "This should not be used!"))'
+checkFileset 'difference (_create ./. null) (_create ./. (abort "This should not be used!"))'
+
+# Subtracting nothing gives the same thing back
+tree=(
+    [a]=1
+)
+checkFileset 'difference ./. _emptyWithoutBase'
+checkFileset 'difference ./. (_create ./. null)'
+
+# Subtracting doesn't influence the base path
+mkdir a b
+touch {a,b}/x
+expectEqual 'toSource { root = ./a; fileset = difference ./a ./b; }' 'toSource { root = ./a; fileset = ./a; }'
+rm -rf -- *
+
+# Also not the other way around
+mkdir a
+expectFailure 'toSource { root = ./a; fileset = difference ./. ./a; }' 'lib.fileset.toSource: `fileset` could contain files in '"$work"', which is not under the `root` \('"$work"'/a\). Potential solutions:
+\s*- Set `root` to '"$work"' or any directory higher up. This changes the layout of the resulting store path.
+\s*- Set `fileset` to a file set that cannot contain files outside the `root` \('"$work"'/a\). This could change the files included in the result.'
+rm -rf -- *
+
+# Difference actually works
+# We test all combinations of ./., ./a, ./a/x and ./b
+tree=(
+    [a/x]=0
+    [a/y]=0
+    [b]=0
+    [c]=0
+)
+checkFileset 'difference ./. ./.'
+checkFileset 'difference ./a ./.'
+checkFileset 'difference ./a/x ./.'
+checkFileset 'difference ./b ./.'
+checkFileset 'difference ./a ./a'
+checkFileset 'difference ./a/x ./a'
+checkFileset 'difference ./a/x ./a/x'
+checkFileset 'difference ./b ./b'
+tree=(
+    [a/x]=0
+    [a/y]=0
+    [b]=1
+    [c]=1
+)
+checkFileset 'difference ./. ./a'
+tree=(
+    [a/x]=1
+    [a/y]=1
+    [b]=0
+    [c]=0
+)
+checkFileset 'difference ./a ./b'
+tree=(
+    [a/x]=1
+    [a/y]=0
+    [b]=0
+    [c]=0
+)
+checkFileset 'difference ./a/x ./b'
+tree=(
+    [a/x]=0
+    [a/y]=1
+    [b]=0
+    [c]=0
+)
+checkFileset 'difference ./a ./a/x'
+tree=(
+    [a/x]=0
+    [a/y]=0
+    [b]=1
+    [c]=0
+)
+checkFileset 'difference ./b ./a'
+checkFileset 'difference ./b ./a/x'
+tree=(
+    [a/x]=0
+    [a/y]=1
+    [b]=1
+    [c]=1
+)
+checkFileset 'difference ./. ./a/x'
+tree=(
+    [a/x]=1
+    [a/y]=1
+    [b]=0
+    [c]=1
+)
+checkFileset 'difference ./. ./b'
 
 ## File filter