summary refs log tree commit diff
path: root/lib/fileset/internal.nix
blob: 2c329edb390d692e0881489160d3505a586e8e64 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
{ lib ? import ../. }:
let

  inherit (builtins)
    isAttrs
    isPath
    isString
    pathExists
    readDir
    typeOf
    split
    ;

  inherit (lib.attrsets)
    attrValues
    mapAttrs
    setAttrByPath
    zipAttrsWith
    ;

  inherit (lib.filesystem)
    pathType
    ;

  inherit (lib.lists)
    all
    commonPrefix
    drop
    elemAt
    filter
    findFirstIndex
    foldl'
    head
    length
    sublist
    tail
    ;

  inherit (lib.path)
    append
    splitRoot
    ;

  inherit (lib.path.subpath)
    components
    join
    ;

  inherit (lib.strings)
    isStringLike
    concatStringsSep
    substring
    stringLength
    ;

in
# Rare case of justified usage of rec:
# - This file is internal, so the return value doesn't matter, no need to make things overridable
# - The functions depend on each other
# - We want to expose all of these functions for easy testing
rec {

  # If you change the internal representation, make sure to:
  # - Increment this version
  # - Add an additional migration function below
  # - Update the description of the internal representation in ./README.md
  _currentVersion = 2;

  # Migrations between versions. The 0th element converts from v0 to v1, and so on
  migrations = [
    # Convert v0 into v1: Add the _internalBase{Root,Components} attributes
    (
      filesetV0:
      let
        parts = splitRoot filesetV0._internalBase;
      in
      filesetV0 // {
        _internalVersion = 1;
        _internalBaseRoot = parts.root;
        _internalBaseComponents = components parts.subpath;
      }
    )

    # Convert v1 into v2: filesetTree's can now also omit attributes to signal paths not being included
    (
      filesetV1:
      # This change is backwards compatible (but not forwards compatible, so we still need a new version)
      filesetV1 // {
        _internalVersion = 2;
      }
    )
  ];

  # Create a fileset, see ./README.md#fileset
  # Type: path -> filesetTree -> fileset
  _create = base: tree:
    let
      # Decompose the base into its components
      # See ../path/README.md for why we're not just using `toString`
      parts = splitRoot base;
    in
    {
      _type = "fileset";

      _internalVersion = _currentVersion;
      _internalBase = base;
      _internalBaseRoot = parts.root;
      _internalBaseComponents = components parts.subpath;
      _internalTree = tree;

      # Double __ to make it be evaluated and ordered first
      __noEval = throw ''
        lib.fileset: Directly evaluating a file set is not supported. Use `lib.fileset.toSource` to turn it into a usable source instead.'';
    };

  # Coerce a value to a fileset, erroring when the value cannot be coerced.
  # The string gives the context for error messages.
  # Type: String -> (fileset | Path) -> fileset
  _coerce = context: value:
    if value._type or "" == "fileset" then
      if value._internalVersion > _currentVersion then
        throw ''
          ${context} is a file set created from a future version of the file set library with a different internal representation:
              - Internal version of the file set: ${toString value._internalVersion}
              - Internal version of the library: ${toString _currentVersion}
              Make sure to update your Nixpkgs to have a newer version of `lib.fileset`.''
      else if value._internalVersion < _currentVersion then
        let
          # Get all the migration functions necessary to convert from the old to the current version
          migrationsToApply = sublist value._internalVersion (_currentVersion - value._internalVersion) migrations;
        in
        foldl' (value: migration: migration value) value migrationsToApply
      else
        value
    else if ! isPath value then
      if isStringLike value then
        throw ''
          ${context} ("${toString value}") is a string-like value, but it should be a path instead.
              Paths represented as strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.''
      else
        throw ''
          ${context} is of type ${typeOf value}, but it should be a path instead.''
    else if ! pathExists value then
      throw ''
        ${context} (${toString value}) does not exist.''
    else
      _singleton value;

  # Coerce many values to filesets, erroring when any value cannot be coerced,
  # or if the filesystem root of the values doesn't match.
  # Type: String -> [ { context :: String, value :: fileset | Path } ] -> [ fileset ]
  _coerceMany = functionContext: list:
    let
      filesets = map ({ context, value }:
        _coerce "${functionContext}: ${context}" value
      ) list;

      firstBaseRoot = (head filesets)._internalBaseRoot;

      # Finds the first element with a filesystem root different than the first element, if any
      differentIndex = findFirstIndex (fileset:
        firstBaseRoot != fileset._internalBaseRoot
      ) null filesets;
    in
    if differentIndex != null then
      throw ''
        ${functionContext}: Filesystem roots are not the same:
            ${(head list).context}: root "${toString firstBaseRoot}"
            ${(elemAt list differentIndex).context}: root "${toString (elemAt filesets differentIndex)._internalBaseRoot}"
            Different roots are not supported.''
    else
      filesets;

  # Create a file set from a path.
  # Type: Path -> fileset
  _singleton = path:
    let
      type = pathType path;
    in
    if type == "directory" then
      _create path type
    else
      # This turns a file path ./default.nix into a fileset with
      # - _internalBase: ./.
      # - _internalTree: {
      #     "default.nix" = <type>;
      #   }
      # See ./README.md#single-files
      _create (dirOf path)
        {
          ${baseNameOf path} = type;
        };

  # Expand a directory representation to an equivalent one in attribute set form.
  # All directory entries are included in the result.
  # Type: Path -> filesetTree -> { <name> = filesetTree; }
  _directoryEntries = path: value:
    if value == "directory" then
      readDir path
    else
      # Set all entries not present to null
      mapAttrs (name: value: null) (readDir path)
      // value;

  /*
    Simplify a filesetTree recursively:
    - Replace all directories that have no files with `null`
      This removes directories that would be empty
    - Replace all directories with all files with `"directory"`
      This speeds up the source filter function

    Note that this function is strict, it evaluates the entire tree

    Type: Path -> filesetTree -> filesetTree
  */
  _simplifyTree = path: tree:
    if tree == "directory" || isAttrs tree then
      let
        entries = _directoryEntries path tree;
        simpleSubtrees = mapAttrs (name: _simplifyTree (path + "/${name}")) entries;
        subtreeValues = attrValues simpleSubtrees;
      in
      # This triggers either when all files in a directory are filtered out
      # Or when the directory doesn't contain any files at all
      if all isNull subtreeValues then
        null
      # Triggers when we have the same as a `readDir path`, so we can turn it back into an equivalent "directory".
      else if all isString subtreeValues then
        "directory"
      else
        simpleSubtrees
    else
      tree;

  # Turn a fileset into a source filter function suitable for `builtins.path`
  # Only directories recursively containing at least one files are recursed into
  # Type: Path -> fileset -> (String -> String -> Bool)
  _toSourceFilter = fileset:
    let
      # Simplify the tree, necessary to make sure all empty directories are null
      # which has the effect that they aren't included in the result
      tree = _simplifyTree fileset._internalBase fileset._internalTree;

      # The base path as a string with a single trailing slash
      baseString =
        if fileset._internalBaseComponents == [] then
          # Need to handle the filesystem root specially
          "/"
        else
          "/" + concatStringsSep "/" fileset._internalBaseComponents + "/";

      baseLength = stringLength baseString;

      # Check whether a list of path components under the base path exists in the tree.
      # This function is called often, so it should be fast.
      # Type: [ String ] -> Bool
      inTree = components:
        let
          recurse = index: localTree:
            if isAttrs localTree then
              # We have an attribute set, meaning this is a directory with at least one file
              if index >= length components then
                # The path may have no more components though, meaning the filter is running on the directory itself,
                # so we always include it, again because there's at least one file in it.
                true
              else
                # If we do have more components, the filter runs on some entry inside this directory, so we need to recurse
                # We do +2 because builtins.split is an interleaved list of the inbetweens and the matches
                recurse (index + 2) localTree.${elemAt components index}
            else
              # If it's not an attribute set it can only be either null (in which case it's not included)
              # or a string ("directory" or "regular", etc.) in which case it's included
              localTree != null;
        in recurse 0 tree;

      # Filter suited when there's no files
      empty = _: _: false;

      # Filter suited when there's some files
      # This can't be used for when there's no files, because the base directory is always included
      nonEmpty =
        path: _:
        let
          # Add a slash to the path string, turning "/foo" to "/foo/",
          # making sure to not have any false prefix matches below.
          # Note that this would produce "//" for "/",
          # but builtins.path doesn't call the filter function on the `path` argument itself,
          # meaning this function can never receive "/" as an argument
          pathSlash = path + "/";
        in
        # Same as `hasPrefix pathSlash baseString`, but more efficient.
        # With base /foo/bar we need to include /foo:
        # hasPrefix "/foo/" "/foo/bar/"
        if substring 0 (stringLength pathSlash) baseString == pathSlash then
          true
        # Same as `! hasPrefix baseString pathSlash`, but more efficient.
        # With base /foo/bar we need to exclude /baz
        # ! hasPrefix "/baz/" "/foo/bar/"
        else if substring 0 baseLength pathSlash != baseString then
          false
        else
          # Same as `removePrefix baseString path`, but more efficient.
          # From the above code we know that hasPrefix baseString pathSlash holds, so this is safe.
          # We don't use pathSlash here because we only needed the trailing slash for the prefix matching.
          # With base /foo and path /foo/bar/baz this gives
          # inTree (split "/" (removePrefix "/foo/" "/foo/bar/baz"))
          # == inTree (split "/" "bar/baz")
          # == inTree [ "bar" "baz" ]
          inTree (split "/" (substring baseLength (-1) path));
    in
    # Special case because the code below assumes that the _internalBase is always included in the result
    # which shouldn't be done when we have no files at all in the base
    # This also forces the tree before returning the filter, leads to earlier error messages
    if tree == null then
      empty
    else
      nonEmpty;

  # Computes the union of a list of filesets.
  # The filesets must already be coerced and validated to be in the same filesystem root
  # Type: [ Fileset ] -> Fileset
  _unionMany = filesets:
    let
      first = head filesets;

      # To be able to union filesetTree's together, they need to have the same base path.
      # Base paths can be unioned by taking their common prefix,
      # e.g. such that `union /foo/bar /foo/baz` has the base path `/foo`

      # A list of path components common to all base paths.
      # Note that commonPrefix can only be fully evaluated,
      # so this cannot cause a stack overflow due to a build-up of unevaluated thunks.
      commonBaseComponents = foldl'
        (components: el: commonPrefix components el._internalBaseComponents)
        first._internalBaseComponents
        # We could also not do the `tail` here to avoid a list allocation,
        # but then we'd have to pay for a potentially expensive
        # but unnecessary `commonPrefix` call
        (tail filesets);

      # The common base path assembled from a filesystem root and the common components
      commonBase = append first._internalBaseRoot (join commonBaseComponents);

      # A list of filesetTree's that all have the same base path
      # This is achieved by nesting the trees into the components they have over the common base path
      # E.g. `union /foo/bar /foo/baz` has the base path /foo
      # So the tree under `/foo/bar` gets nested under `{ bar = ...; ... }`,
      # while the tree under `/foo/baz` gets nested under `{ baz = ...; ... }`
      # Therefore allowing combined operations over them.
      trees = map (fileset:
        setAttrByPath
          (drop (length commonBaseComponents) fileset._internalBaseComponents)
          fileset._internalTree
        ) filesets;

      # Folds all trees together into a single one using _unionTree
      # We do not use a fold here because it would cause a thunk build-up
      # which could cause a stack overflow for a large number of trees
      resultTree = _unionTrees trees;
    in
    _create commonBase resultTree;

  # The union of multiple filesetTree's with the same base path.
  # Later elements are only evaluated if necessary.
  # Type: [ filesetTree ] -> filesetTree
  _unionTrees = trees:
    let
      stringIndex = findFirstIndex isString null trees;
      withoutNull = filter (tree: tree != null) trees;
    in
    if stringIndex != null then
      # If there's a string, it's always a fully included tree (dir or file),
      # no need to look at other elements
      elemAt trees stringIndex
    else if withoutNull == [ ] then
      # If all trees are null, then the resulting tree is also null
      null
    else
      # The non-null elements have to be attribute sets representing partial trees
      # We need to recurse into those
      zipAttrsWith (name: _unionTrees) withoutNull;
}