summary refs log tree commit diff
path: root/lib
diff options
context:
space:
mode:
authorgithub-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>2023-11-11 00:02:47 +0000
committerGitHub <noreply@github.com>2023-11-11 00:02:47 +0000
commit5f5513224f3075c3bc76905121c9a3e06ec30dc4 (patch)
treeab1abc65028ce024e018c5f741b72e435d648330 /lib
parente8096ee1e1b78e17b362bf9daf67a46c8c02f28b (diff)
parent912311b7fd205f8542aaac6e26251bcb0b92fa7c (diff)
downloadnixpkgs-5f5513224f3075c3bc76905121c9a3e06ec30dc4.tar
nixpkgs-5f5513224f3075c3bc76905121c9a3e06ec30dc4.tar.gz
nixpkgs-5f5513224f3075c3bc76905121c9a3e06ec30dc4.tar.bz2
nixpkgs-5f5513224f3075c3bc76905121c9a3e06ec30dc4.tar.lz
nixpkgs-5f5513224f3075c3bc76905121c9a3e06ec30dc4.tar.xz
nixpkgs-5f5513224f3075c3bc76905121c9a3e06ec30dc4.tar.zst
nixpkgs-5f5513224f3075c3bc76905121c9a3e06ec30dc4.zip
Merge staging-next into staging
Diffstat (limited to 'lib')
-rw-r--r--lib/fileset/README.md1
-rw-r--r--lib/fileset/default.nix78
-rw-r--r--lib/fileset/internal.nix60
-rwxr-xr-xlib/fileset/tests.sh288
4 files changed, 398 insertions, 29 deletions
diff --git a/lib/fileset/README.md b/lib/fileset/README.md
index 16ab58e2f26..8518d88a7d6 100644
--- a/lib/fileset/README.md
+++ b/lib/fileset/README.md
@@ -244,5 +244,4 @@ Here's a list of places in the library that need to be updated in the future:
 - > The file set library is currently somewhat limited but is being expanded to include more functions over time.
 
   in [the manual](../../doc/functions/fileset.section.md)
-- If/Once a function to convert `lib.sources` values into file sets exists, the `_coerce` and `toSource` functions should be updated to mention that function in the error when such a value is passed
 - If/Once a function exists that can optionally include a path depending on whether it exists, the error message for the path not existing in `_coerce` should mention the new function
diff --git a/lib/fileset/default.nix b/lib/fileset/default.nix
index fe7b304ba69..372d445269f 100644
--- a/lib/fileset/default.nix
+++ b/lib/fileset/default.nix
@@ -3,8 +3,10 @@ let
 
   inherit (import ./internal.nix { inherit lib; })
     _coerce
+    _singleton
     _coerceMany
     _toSourceFilter
+    _fromSourceFilter
     _unionMany
     _fileFilter
     _printFileset
@@ -152,7 +154,12 @@ in {
       sourceFilter = _toSourceFilter fileset;
     in
     if ! isPath root then
-      if isStringLike root then
+      if root ? _isLibCleanSourceWith then
+        throw ''
+          lib.fileset.toSource: `root` is a `lib.sources`-based value, but it should be a path instead.
+              To use a `lib.sources`-based value, convert it to a file set using `lib.fileset.fromSource` and pass it as `fileset`.
+              Note that this only works for sources created from paths.''
+      else if isStringLike root then
         throw ''
           lib.fileset.toSource: `root` (${toString root}) is a string-like value, but it should be a path instead.
               Paths in strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.''
@@ -189,6 +196,75 @@ in {
       };
 
   /*
+  Create a file set with the same files as a `lib.sources`-based value.
+  This does not import any of the files into the store.
+
+  This can be used to gradually migrate from `lib.sources`-based filtering to `lib.fileset`.
+
+  A file set can be turned back into a source using [`toSource`](#function-library-lib.fileset.toSource).
+
+  :::{.note}
+  File sets cannot represent empty directories.
+  Turning the result of this function back into a source using `toSource` will therefore not preserve empty directories.
+  :::
+
+  Type:
+    fromSource :: SourceLike -> FileSet
+
+  Example:
+    # There's no cleanSource-like function for file sets yet,
+    # but we can just convert cleanSource to a file set and use it that way
+    toSource {
+      root = ./.;
+      fileset = fromSource (lib.sources.cleanSource ./.);
+    }
+
+    # Keeping a previous sourceByRegex (which could be migrated to `lib.fileset.unions`),
+    # but removing a subdirectory using file set functions
+    difference
+      (fromSource (lib.sources.sourceByRegex ./. [
+        "^README\.md$"
+        # This regex includes everything in ./doc
+        "^doc(/.*)?$"
+      ])
+      ./doc/generated
+
+    # Use cleanSource, but limit it to only include ./Makefile and files under ./src
+    intersection
+      (fromSource (lib.sources.cleanSource ./.))
+      (unions [
+        ./Makefile
+        ./src
+      ]);
+  */
+  fromSource = source:
+    let
+      # This function uses `._isLibCleanSourceWith`, `.origSrc` and `.filter`,
+      # which are technically internal to lib.sources,
+      # but we'll allow this since both libraries are in the same code base
+      # and this function is a bridge between them.
+      isFiltered = source ? _isLibCleanSourceWith;
+      path = if isFiltered then source.origSrc else source;
+    in
+    # We can only support sources created from paths
+    if ! isPath path then
+      if isStringLike path then
+        throw ''
+          lib.fileset.fromSource: The source origin of the argument is a string-like value ("${toString path}"), but it should be a path instead.
+              Sources created from paths in strings cannot be turned into file sets, use `lib.sources` or derivations instead.''
+      else
+        throw ''
+          lib.fileset.fromSource: The source origin of the argument is of type ${typeOf path}, but it should be a path instead.''
+    else if ! pathExists path then
+      throw ''
+        lib.fileset.fromSource: The source origin (${toString path}) of the argument does not exist.''
+    else if isFiltered then
+      _fromSourceFilter path source.filter
+    else
+      # If there's no filter, no need to run the expensive conversion, all subpaths will be included
+      _singleton path;
+
+  /*
     The file set containing all files that are in either of two given file sets.
     This is the same as [`unions`](#function-library-lib.fileset.unions),
     but takes just two file sets instead of a list.
diff --git a/lib/fileset/internal.nix b/lib/fileset/internal.nix
index d55c84a395e..717253f4571 100644
--- a/lib/fileset/internal.nix
+++ b/lib/fileset/internal.nix
@@ -167,7 +167,12 @@ rec {
       else
         value
     else if ! isPath value then
-      if isStringLike value then
+      if value ? _isLibCleanSourceWith then
+        throw ''
+          ${context} is a `lib.sources`-based value, but it should be a file set or a path instead.
+              To convert a `lib.sources`-based value to a file set you can use `lib.fileset.fromSource`.
+              Note that this only works for sources created from paths.''
+      else if isStringLike value then
         throw ''
           ${context} ("${toString value}") is a string-like value, but it should be a file set or a path instead.
               Paths represented as strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.''
@@ -470,6 +475,59 @@ rec {
     else
       nonEmpty;
 
+  # Turn a builtins.filterSource-based source filter on a root path into a file set
+  # containing only files included by the filter.
+  # The filter is lazily called as necessary to determine whether paths are included
+  # Type: Path -> (String -> String -> Bool) -> fileset
+  _fromSourceFilter = root: sourceFilter:
+    let
+      # During the recursion we need to track both:
+      # - The path value such that we can safely call `readDir` on it
+      # - The path string value such that we can correctly call the `filter` with it
+      #
+      # While we could just recurse with the path value,
+      # this would then require converting it to a path string for every path,
+      # which is a fairly expensive operation
+
+      # Create a file set from a directory entry
+      fromDirEntry = path: pathString: type:
+        # The filter needs to run on the path as a string
+        if ! sourceFilter pathString type then
+          null
+        else if type == "directory" then
+          fromDir path pathString
+        else
+          type;
+
+      # Create a file set from a directory
+      fromDir = path: pathString:
+        mapAttrs
+          # This looks a bit funny, but we need both the path-based and the path string-based values
+          (name: fromDirEntry (path + "/${name}") (pathString + "/${name}"))
+          # We need to readDir on the path value, because reading on a path string
+          # would be unspecified if there are multiple filesystem roots
+          (readDir path);
+
+      rootPathType = pathType root;
+
+      # We need to convert the path to a string to imitate what builtins.path calls the filter function with.
+      # We don't want to rely on `toString` for this though because it's not very well defined, see ../path/README.md
+      # So instead we use `lib.path.splitRoot` to safely deconstruct the path into its filesystem root and subpath
+      # We don't need the filesystem root though, builtins.path doesn't expose that in any way to the filter.
+      # So we only need the components, which we then turn into a string as one would expect.
+      rootString = "/" + concatStringsSep "/" (components (splitRoot root).subpath);
+    in
+    if rootPathType == "directory" then
+      # We imitate builtins.path not calling the filter on the root path
+      _create root (fromDir root rootString)
+    else
+      # Direct files are always included by builtins.path without calling the filter
+      # But we need to lift up the base path to its parent to satisfy the base path invariant
+      _create (dirOf root)
+        {
+          ${baseNameOf root} = rootPathType;
+        };
+
   # Transforms the filesetTree of a file set to a shorter base path, e.g.
   # _shortenTreeBase [ "foo" ] (_create /foo/bar null)
   # => { bar = null; }
diff --git a/lib/fileset/tests.sh b/lib/fileset/tests.sh
index c1c67800f5e..796a03b52f0 100755
--- a/lib/fileset/tests.sh
+++ b/lib/fileset/tests.sh
@@ -1,5 +1,7 @@
 #!/usr/bin/env bash
 # shellcheck disable=SC2016
+# shellcheck disable=SC2317
+# shellcheck disable=SC2192
 
 # Tests lib.fileset
 # Run:
@@ -224,6 +226,43 @@ withFileMonitor() {
     fi
 }
 
+
+# Create the tree structure declared in the tree variable, usage:
+#
+# tree=(
+#   [a/b] =   # Declare that file       a/b should exist
+#   [c/a] =   # Declare that file       c/a should exist
+#   [c/d/]=   # Declare that directory c/d/ should exist
+# )
+# createTree
+declare -A tree
+createTree() {
+    # Track which paths need to be created
+    local -a dirsToCreate=()
+    local -a filesToCreate=()
+    for p in "${!tree[@]}"; do
+        # If keys end with a `/` we treat them as directories, otherwise files
+        if [[ "$p" =~ /$ ]]; then
+            dirsToCreate+=("$p")
+        else
+            filesToCreate+=("$p")
+        fi
+    done
+
+    # Create all the necessary paths.
+    # This is done with only a fixed number of processes,
+    # in order to not be too slow
+    # Though this does mean we're a bit limited with how many files can be created
+    if (( ${#dirsToCreate[@]} != 0 )); then
+        mkdir -p "${dirsToCreate[@]}"
+    fi
+    if (( ${#filesToCreate[@]} != 0 )); then
+        readarray -d '' -t parentsToCreate < <(dirname -z "${filesToCreate[@]}")
+        mkdir -p "${parentsToCreate[@]}"
+        touch "${filesToCreate[@]}"
+    fi
+}
+
 # Check whether a file set includes/excludes declared paths as expected, usage:
 #
 # tree=(
@@ -232,34 +271,26 @@ withFileMonitor() {
 #   [c/d/]=   # Declare that directory c/d/ should exist and expect it to be excluded in the store path
 # )
 # checkFileset './a' # Pass the fileset as the argument
-declare -A tree
 checkFileset() {
     # New subshell so that we can have a separate trap handler, see `trap` below
     local fileset=$1
 
+    # Create the tree
+    createTree
+
     # Process the tree into separate arrays for included paths, excluded paths and excluded files.
     local -a included=()
     local -a excluded=()
     local -a excludedFiles=()
-    # Track which paths need to be created
-    local -a dirsToCreate=()
-    local -a filesToCreate=()
     for p in "${!tree[@]}"; do
-        # If keys end with a `/` we treat them as directories, otherwise files
-        if [[ "$p" =~ /$ ]]; then
-            dirsToCreate+=("$p")
-            isFile=
-        else
-            filesToCreate+=("$p")
-            isFile=1
-        fi
         case "${tree[$p]}" in
             1)
                 included+=("$p")
                 ;;
             0)
                 excluded+=("$p")
-                if [[ -n "$isFile" ]]; then
+                # If keys end with a `/` we treat them as directories, otherwise files
+                if [[ ! "$p" =~ /$ ]]; then
                     excludedFiles+=("$p")
                 fi
                 ;;
@@ -268,19 +299,6 @@ checkFileset() {
         esac
     done
 
-    # Create all the necessary paths.
-    # This is done with only a fixed number of processes,
-    # in order to not be too slow
-    # Though this does mean we're a bit limited with how many files can be created
-    if (( ${#dirsToCreate[@]} != 0 )); then
-        mkdir -p "${dirsToCreate[@]}"
-    fi
-    if (( ${#filesToCreate[@]} != 0 )); then
-        readarray -d '' -t parentsToCreate < <(dirname -z "${filesToCreate[@]}")
-        mkdir -p "${parentsToCreate[@]}"
-        touch "${filesToCreate[@]}"
-    fi
-
     expression="toSource { root = ./.; fileset = $fileset; }"
 
     # We don't have lambda's in bash unfortunately,
@@ -321,6 +339,10 @@ checkFileset() {
 expectFailure 'toSource { root = "/nix/store/foobar"; fileset = ./.; }' 'lib.fileset.toSource: `root` \(/nix/store/foobar\) is a string-like value, but it should be a path instead.
 \s*Paths in strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.'
 
+expectFailure 'toSource { root = cleanSourceWith { src = ./.; }; fileset = ./.; }' 'lib.fileset.toSource: `root` is a `lib.sources`-based value, but it should be a path instead.
+\s*To use a `lib.sources`-based value, convert it to a file set using `lib.fileset.fromSource` and pass it as `fileset`.
+\s*Note that this only works for sources created from paths.'
+
 # Only paths are accepted as `root`
 expectFailure 'toSource { root = 10; fileset = ./.; }' 'lib.fileset.toSource: `root` is of type int, but it should be a path instead.'
 
@@ -365,6 +387,9 @@ rm -rf -- *
 expectFailure 'toSource { root = ./.; fileset = 10; }' 'lib.fileset.toSource: `fileset` is of type int, but it should be a file set or a path instead.'
 expectFailure 'toSource { root = ./.; fileset = "/some/path"; }' 'lib.fileset.toSource: `fileset` \("/some/path"\) is a string-like value, but it should be a file set or a path instead.
 \s*Paths represented as strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.'
+expectFailure 'toSource { root = ./.; fileset = cleanSourceWith { src = ./.; }; }' 'lib.fileset.toSource: `fileset` is a `lib.sources`-based value, but it should be a file set or a path instead.
+\s*To convert a `lib.sources`-based value to a file set you can use `lib.fileset.fromSource`.
+\s*Note that this only works for sources created from paths.'
 
 # Path coercion errors for non-existent paths
 expectFailure 'toSource { root = ./.; fileset = ./a; }' 'lib.fileset.toSource: `fileset` \('"$work"'/a\) is a path that does not exist.'
@@ -995,6 +1020,217 @@ touch 0 "${filesToCreate[@]}"
 expectTrace 'unions (mapAttrsToList (n: _: ./. + "/${n}") (removeAttrs (builtins.readDir ./.) [ "0" ]))' "$expectedTrace"
 rm -rf -- *
 
+## lib.fileset.fromSource
+
+# Check error messages
+expectFailure 'fromSource null' 'lib.fileset.fromSource: The source origin of the argument is of type null, but it should be a path instead.'
+
+expectFailure 'fromSource (lib.cleanSource "")' 'lib.fileset.fromSource: The source origin of the argument is a string-like value \(""\), but it should be a path instead.
+\s*Sources created from paths in strings cannot be turned into file sets, use `lib.sources` or derivations instead.'
+
+expectFailure 'fromSource (lib.cleanSource null)' 'lib.fileset.fromSource: The source origin of the argument is of type null, but it should be a path instead.'
+
+# fromSource on a path works and is the same as coercing that path
+mkdir a
+touch a/b c
+expectEqual 'trace (fromSource ./.) null' 'trace ./. null'
+rm -rf -- *
+
+# Check that converting to a file set doesn't read the included files
+mkdir a
+touch a/b
+run() {
+    expectEqual "trace (fromSource (lib.cleanSourceWith { src = ./a; })) null" "builtins.trace \"$work/a (all files in directory)\" null"
+    rm a/b
+}
+withFileMonitor run a/b
+rm -rf -- *
+
+# Check that converting to a file set doesn't read entries for directories that are filtered out
+mkdir -p a/b
+touch a/b/c
+run() {
+    expectEqual "trace (fromSource (lib.cleanSourceWith {
+      src = ./a;
+      filter = pathString: type: false;
+    })) null" "builtins.trace \"(empty)\" null"
+    rm a/b/c
+    rmdir a/b
+}
+withFileMonitor run a/b
+rm -rf -- *
+
+# The filter is not needed on empty directories
+expectEqual 'trace (fromSource (lib.cleanSourceWith {
+  src = ./.;
+  filter = abort "filter should not be needed";
+})) null' 'trace _emptyWithoutBase null'
+
+# Single files also work
+touch a b
+expectEqual 'trace (fromSource (cleanSourceWith { src = ./a; })) null' 'trace ./a null'
+rm -rf -- *
+
+# For a tree assigning each subpath true/false,
+# check whether a source filter with those results includes the same files
+# as a file set created using fromSource. Usage:
+#
+# tree=(
+#   [a]=1  # ./a is a file and the filter should return true for it
+#   [b/]=0 # ./b is a directory and the filter should return false for it
+# )
+# checkSource
+checkSource() {
+    createTree
+
+    # Serialise the tree as JSON (there's only minimal savings with jq,
+    # and we don't need to handle escapes)
+    {
+        echo "{"
+        first=1
+        for p in "${!tree[@]}"; do
+            if [[ -z "$first" ]]; then
+                echo ","
+            else
+                first=
+            fi
+            echo "\"$p\":"
+            case "${tree[$p]}" in
+                1)
+                    echo "true"
+                    ;;
+                0)
+                    echo "false"
+                    ;;
+                *)
+                    die "Unsupported tree value: ${tree[$p]}"
+            esac
+        done
+        echo "}"
+    } > "$tmp/tree.json"
+
+    # An expression to create a source value with a filter matching the tree
+    sourceExpr='
+      let
+        tree = importJSON '"$tmp"'/tree.json;
+      in
+      cleanSourceWith {
+        src = ./.;
+        filter =
+          pathString: type:
+          let
+            stripped = removePrefix (toString ./. + "/") pathString;
+            key = stripped + optionalString (type == "directory") "/";
+          in
+          tree.${key} or
+            (throw "tree key ${key} missing");
+      }
+    '
+
+    filesetExpr='
+      toSource {
+        root = ./.;
+        fileset = fromSource ('"$sourceExpr"');
+      }
+    '
+
+    # Turn both into store paths
+    sourceStorePath=$(expectStorePath "$sourceExpr")
+    filesetStorePath=$(expectStorePath "$filesetExpr")
+
+    # Loop through each path in the tree
+    while IFS= read -r -d $'\0' subpath; do
+        if [[ ! -e "$sourceStorePath"/"$subpath" ]]; then
+            # If it's not in the source store path, it's also not in the file set store path
+            if [[ -e "$filesetStorePath"/"$subpath" ]]; then
+                die "The store path $sourceStorePath created by $expr doesn't contain $subpath, but the corresponding store path $filesetStorePath created via fromSource does contain $subpath"
+            fi
+        elif [[ -z "$(find "$sourceStorePath"/"$subpath" -type f)" ]]; then
+            # If it's an empty directory in the source store path, it shouldn't be in the file set store path
+            if [[ -e "$filesetStorePath"/"$subpath" ]]; then
+                die "The store path $sourceStorePath created by $expr contains the path $subpath without any files, but the corresponding store path $filesetStorePath created via fromSource didn't omit it"
+            fi
+        else
+            # If it's non-empty directory or a file, it should be in the file set store path
+            if [[ ! -e "$filesetStorePath"/"$subpath" ]]; then
+                die "The store path $sourceStorePath created by $expr contains the non-empty path $subpath, but the corresponding store path $filesetStorePath created via fromSource doesn't include it"
+            fi
+        fi
+    done < <(find . -mindepth 1 -print0)
+
+    rm -rf -- *
+}
+
+# Check whether the filter is evaluated correctly
+tree=(
+    [a]=
+    [b/]=
+    [b/c]=
+    [b/d]=
+    [e/]=
+    [e/e/]=
+)
+# We fill out the above tree values with all possible combinations of 0 and 1
+# Then check whether a filter based on those return values gets turned into the corresponding file set
+for i in $(seq 0 $((2 ** ${#tree[@]} - 1 ))); do
+    for p in "${!tree[@]}"; do
+        tree[$p]=$(( i % 2 ))
+        (( i /= 2 )) || true
+    done
+    checkSource
+done
+
+# The filter is called with the same arguments in the same order
+mkdir a e
+touch a/b a/c d e
+expectEqual '
+  trace (fromSource (cleanSourceWith {
+    src = ./.;
+    filter = pathString: type: builtins.trace "${pathString} ${toString type}" true;
+  })) null
+' '
+  builtins.seq (cleanSourceWith {
+    src = ./.;
+    filter = pathString: type: builtins.trace "${pathString} ${toString type}" true;
+  }).outPath
+  builtins.trace "'"$work"' (all files in directory)"
+  null
+'
+rm -rf -- *
+
+# Test that if a directory is not included, the filter isn't called on its contents
+mkdir a b
+touch a/c b/d
+expectEqual 'trace (fromSource (cleanSourceWith {
+  src = ./.;
+  filter = pathString: type:
+    if pathString == toString ./a then
+      false
+    else if pathString == toString ./b then
+      true
+    else if pathString == toString ./b/d then
+      true
+    else
+      abort "This filter should not be called with path ${pathString}";
+})) null' 'trace (_create ./. { b = "directory"; }) null'
+rm -rf -- *
+
+# The filter is called lazily:
+# If a later say intersection removes a part of the tree, the filter won't run on it
+mkdir a d
+touch a/{b,c} d/e
+expectEqual 'trace (intersection ./a (fromSource (lib.cleanSourceWith {
+  src = ./.;
+  filter = pathString: type:
+    if pathString == toString ./a || pathString == toString ./a/b then
+      true
+    else if pathString == toString ./a/c then
+      false
+    else
+      abort "filter should not be called on ${pathString}";
+}))) null' 'trace ./a/b null'
+rm -rf -- *
+
 # TODO: Once we have combinators and a property testing library, derive property tests from https://en.wikipedia.org/wiki/Algebra_of_sets
 
 echo >&2 tests ok