Files
nixpkgs/lib/tests/sources.sh
adisbladis 59d55cbaa3 lib.sources.sourceByGlobs: init function
Adds a source filtering function inspired by [doublestar](https://github.com/bmatcuk/doublestar).

This has been in used in a few private repositories since the last ~6 months with success.

- Testing

This was originally tested with the nix-unit testsuite:
```
let
  inherit (import ./internal.nix) mkSourceFilter mkMatcher;
in
{
  mkMatcher = {
    empty = {
      testMatch = {
        expr = mkMatcher "" "" "regular";
        expected = true;
      };

      testNoMatch = {
        expr = mkMatcher "" "foo" "regular";
        expected = false;
      };
    };

    simple = {
      testMatch = {
        expr = mkMatcher "foo" "foo" "regular";
        expected = true;
      };

      testNoMatch = {
        expr = mkMatcher "foo" "bar" "regular";
        expected = false;
      };
    };

    singleStar = {
      testMatch = {
        expr = mkMatcher "*.js" "foo.js" "regular";
        expected = true;
      };

      testNoMatch = {
        expr = mkMatcher "*.js" "foo.py" "regular";
        expected = false;
      };
    };

    doubleStar = {
      testMatch = {
        expr = mkMatcher "foo/**/bar" "foo/baz/bar" "regular";
        expected = true;
      };

      testNoMatch = {
        expr = mkMatcher "foo/**/bar" "foo/bar/baz" "regular";
        expected = false;
      };

      testMultiMatch = {
        expr = mkMatcher "foo/**/bar" "foo/baz/xyz/bar" "regular";
        expected = true;
      };

      testMultiMatchDoubleGlob = {
        expr = mkMatcher "foo/**/**/bar" "foo/baz/xyz/bar" "regular";
        expected = true;
      };

      testInfixMatch = {
        expr = mkMatcher "foo/**/qux/**/bar" "foo/baz/qux/baz/bar" "regular";
        expected = true;
      };

      testInfixNoMatch = {
        expr = mkMatcher "foo/**/xyz/**/bar" "foo/baz/qux/baz/bar" "regular";
        expected = false;
      };

      # Technically a partial match
      testInfixDirMatch = {
        expr = mkMatcher "foo/**/xyz/**/bar" "foo/baz/qux/baz/bar" "directory";
        expected = true;
      };
    };
  };

  mkSourceFilter = {
    testSourceFilter = {
      expr = mkSourceFilter ./fixtures [
        "bar/*.js"
      ] "bar/bar.js" "regular";
      expected = true;
    };
  };
}
```
but it was dropped in this nixpkgs contribution as the structure of nixpkgs lib testing is too primitive to incorp this without more extensive refactoring than I'd like at the momment.

- Performance

It's hard to benchmark this against anything else meaningful except [globsset](https://github.com/pdtpartners/globset), which has a very similar API.

`sourceByGlobs` avoids performance pitfalls by:

  - Using `builtins.filterSource`

      This is more performant than the fileset API.
      The downside compared to the fileset API is that any directory which matches the filter will be added to the build, even if it's empty.

  - Match paths component by component

      By splitting each pattern into a token per / separator.
      This is much faster in Nix than the doublestar algorithm.

- Globset source

```json
{
    "cpuTime": 0.8585879802703857,
    "envs": {
        "bytes": 148252864,
        "elements": 11899843,
        "number": 6631765
    },
    "gc": {
        "heapSize": 402915328,
        "totalBytes": 671288560
    },
    "list": {
        "bytes": 3358664,
        "concats": 28658,
        "elements": 419833
    },
    "nrAvoided": 11562713,
    "nrFunctionCalls": 4816963,
    "nrLookups": 4316209,
    "nrOpUpdateValuesCopied": 5686407,
    "nrOpUpdates": 464060,
    "nrPrimOpCalls": 2966970,
    "nrThunks": 7796186,
    "sets": {
        "bytes": 196404672,
        "elements": 10837802,
        "number": 1437490
    },
    "sizes": {
        "Attr": 16,
        "Bindings": 16,
        "Env": 8,
        "Value": 24
    },
    "symbols": {
        "bytes": 340652,
        "number": 32026
    },
    "values": {
        "bytes": 207367440,
        "number": 8640310
    }
}
```

- Glob-filter source

```json
{
    "cpuTime": 0.3904629945755005,
    "envs": {
        "bytes": 13263440,
        "elements": 1005877,
        "number": 652053
    },
    "gc": {
        "heapSize": 402915328,
        "totalBytes": 146914896
    },
    "list": {
        "bytes": 3032168,
        "concats": 5899,
        "elements": 379021
    },
    "nrAvoided": 1666598,
    "nrFunctionCalls": 484399,
    "nrLookups": 112698,
    "nrOpUpdateValuesCopied": 3432135,
    "nrOpUpdates": 13426,
    "nrPrimOpCalls": 1041954,
    "nrThunks": 1205792,
    "sets": {
        "bytes": 64304800,
        "elements": 3978167,
        "number": 40883
    },
    "sizes": {
        "Attr": 16,
        "Bindings": 16,
        "Env": 8,
        "Value": 24
    },
    "symbols": {
        "bytes": 285306,
        "number": 28864
    },
    "values": {
        "bytes": 42963240,
        "number": 1790135
    }
}
```
2026-05-26 21:48:59 +12:00

86 lines
2.1 KiB
Bash
Executable File

#!/usr/bin/env bash
# Tests lib/sources.nix
# Run:
# [nixpkgs]$ lib/tests/sources.sh
# or:
# [nixpkgs]$ nix-build lib/tests/release.nix
set -euo pipefail
shopt -s inherit_errexit
# Use
# || die
die() {
echo >&2 "test case failed: " "$@"
exit 1
}
if test -n "${TEST_LIB:-}"; then
NIX_PATH=nixpkgs="$(dirname "$TEST_LIB")"
else
NIX_PATH=nixpkgs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.."; pwd)"
fi
export NIX_PATH
work="$(mktemp -d)"
clean_up() {
rm -rf "$work"
}
trap clean_up EXIT
cd "$work"
# Crudely unquotes a JSON string by just taking everything between the first and the second quote.
# We're only using this for resulting /nix/store paths, which can't contain " anyways,
# nor can they contain any other characters that would need to be escaped specially in JSON
# This way we don't need to add a dependency on e.g. jq
crudeUnquoteJSON() {
cut -d \" -f2
}
touch {README.md,module.o,foo.bar}
dir="$(nix-instantiate --eval --strict --read-write-mode --json --expr '(with import <nixpkgs/lib>; "${
cleanSource ./.
}")' | crudeUnquoteJSON)"
(cd "$dir"; find) | sort -f | diff -U10 - <(cat <<EOF
.
./foo.bar
./README.md
EOF
) || die "cleanSource 1"
dir="$(nix-instantiate --eval --strict --read-write-mode --json --expr '(with import <nixpkgs/lib>; "${
cleanSourceWith { src = '"$work"'; filter = path: type: ! hasSuffix ".bar" path; }
}")' | crudeUnquoteJSON)"
(cd "$dir"; find) | sort -f | diff -U10 - <(cat <<EOF
.
./module.o
./README.md
EOF
) || die "cleanSourceWith 1"
dir="$(nix-instantiate --eval --strict --read-write-mode --json --expr '(with import <nixpkgs/lib>; "${
cleanSourceWith { src = cleanSource '"$work"'; filter = path: type: ! hasSuffix ".bar" path; }
}")' | crudeUnquoteJSON)"
(cd "$dir"; find) | sort -f | diff -U10 - <(cat <<EOF
.
./README.md
EOF
) || die "cleanSourceWith + cleanSource"
dir="$(nix-instantiate --eval --strict --read-write-mode --json --expr '(with import <nixpkgs/lib>; "${
sources.sourceByGlobs '"$work"' [ "*.md" "**/*.o" ]
}")' | crudeUnquoteJSON)"
(cd "$dir"; find) | sort -f | diff -U10 - <(cat <<EOF
.
./module.o
./README.md
EOF
) || die "sourceByGlobs 1"
echo >&2 tests ok