From 36968c28ca4302aa18ee98ca886ed918f3db0c77 Mon Sep 17 00:00:00 2001 From: Josh Heinrichs Date: Sat, 28 Feb 2026 15:46:52 -0600 Subject: [PATCH 01/12] fetchToStore: cache fingerprint lookups for filtered paths and store paths Previously, fetchToStore2 skipped the fingerprint cache entirely when a PathFilter was present, and had no caching for on-disk store paths that lacked an accessor-level fingerprint. Two changes: 1. Always call getFingerprint() regardless of filter. When a filter is present, prefix the cache key with "filtered:" to separate filtered and unfiltered results. This allows filtered paths with stable fingerprints (e.g., git-backed zones) to cache across evaluations. 2. For paths without an accessor fingerprint, check if the physical path is an immutable store path. If so, use "storePath:" as a stable fingerprint for the SQLite cache. This avoids re-hashing on-disk nixpkgs store subpaths on every evaluation. Co-Authored-By: Claude Opus 4.6 --- src/libfetchers/fetch-to-store.cc | 34 +++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/src/libfetchers/fetch-to-store.cc b/src/libfetchers/fetch-to-store.cc index 4b58ea16d05e..10a42c339ac5 100644 --- a/src/libfetchers/fetch-to-store.cc +++ b/src/libfetchers/fetch-to-store.cc @@ -37,11 +37,13 @@ std::pair fetchToStore2( { std::optional cacheKey; - auto [subpath, fingerprint] = filter ? std::pair>{path.path, std::nullopt} - : path.accessor->getFingerprint(path.path); + // Always try getFingerprint, even when a filter is present. + // For filtered paths, we use a "filtered:" prefix to separate cache entries. + auto [subpath, fingerprint] = path.accessor->getFingerprint(path.path); if (fingerprint) { - cacheKey = makeSourcePathToHashCacheKey(*fingerprint, method, subpath.abs()); + auto fp = filter ? "filtered:" + *fingerprint : *fingerprint; + cacheKey = makeSourcePathToHashCacheKey(fp, method, subpath.abs()); if (auto res = settings.getCache()->lookup(*cacheKey)) { auto hash = Hash::parseSRI(fetchers::getStrAttr(*res, "hash")); auto storePath = @@ -60,7 +62,31 @@ std::pair fetchToStore2( static auto barf = getEnv("_NIX_TEST_BARF_ON_UNCACHEABLE").value_or("") == "1"; if (barf && !filter) throw Error("source path '%s' is uncacheable (filter=%d)", path, (bool) filter); - // FIXME: could still provide in-memory caching keyed on `SourcePath`. + + // For immutable store paths, use the physical path as a stable fingerprint. + // This works even with filters: store paths are immutable, so applying the + // same filter always produces the same result. We use a distinct cache key + // prefix for filtered vs unfiltered to avoid collisions. + auto physPath = path.accessor->getPhysicalPath(path.path); + if (physPath && store.isInStore(physPath->string())) { + auto prefix = filter ? "storePathFiltered:" : "storePath:"; + fingerprint = prefix + physPath->string(); + cacheKey = makeSourcePathToHashCacheKey(*fingerprint, method, subpath.abs()); + if (auto res = settings.getCache()->lookup(*cacheKey)) { + auto hash = Hash::parseSRI(fetchers::getStrAttr(*res, "hash")); + auto storePath = + store.makeFixedOutputPathFromCA(name, ContentAddressWithReferences::fromParts(method, hash, {})); + if (mode == FetchMode::DryRun || store.maybeQueryPathInfo(storePath)) { + debug( + "source path '%s' store path cache hit in '%s' (hash '%s')", + path, + store.printStorePath(storePath), + hash.to_string(HashFormat::SRI, true)); + return {storePath, hash}; + } + } + } + debug("source path '%s' is uncacheable", path); } From 3edf54fb8f5daf820e73c696b7cf70ee4b3019d5 Mon Sep 17 00:00:00 2001 From: Josh Heinrichs Date: Sat, 28 Feb 2026 15:48:45 -0600 Subject: [PATCH 02/12] fetchToStore: add getGitTreeHash and treeHashToNarHash cache Add a virtual getGitTreeHash() method to SourceAccessor that returns the git tree/blob SHA1 for a path, if available. Implement it in: - GitSourceAccessor: returns the OID from the git tree entry - GitExportIgnoreSourceAccessor: returns a synthetic hash incorporating "exportIgnore:" prefix to distinguish from raw trees - FilteringSourceAccessor: returns nullopt (arbitrary filters invalidate the tree hash) - MountedSourceAccessor: propagates through mounts - UnionSourceAccessor: returns first non-null result Use this in fetchToStore2 as a third cache tier: when the fingerprint cache misses (e.g., first run after cache clear), look up the git tree hash in the treeHashToNarHash SQLite cache. This maps git SHA1 tree OIDs to NAR SHA256 hashes, avoiding expensive NAR serialization when the mapping is already known from a previous evaluation. Co-Authored-By: Claude Opus 4.6 --- src/libfetchers/fetch-to-store.cc | 30 +++++++++++++++++++ src/libfetchers/filtering-source-accessor.cc | 7 +++++ src/libfetchers/git-utils.cc | 21 +++++++++++++ .../nix/fetchers/filtering-source-accessor.hh | 2 ++ .../include/nix/util/source-accessor.hh | 9 ++++++ src/libutil/mounted-source-accessor.cc | 6 ++++ src/libutil/union-source-accessor.cc | 9 ++++++ 7 files changed, 84 insertions(+) diff --git a/src/libfetchers/fetch-to-store.cc b/src/libfetchers/fetch-to-store.cc index 10a42c339ac5..acb16818f6c2 100644 --- a/src/libfetchers/fetch-to-store.cc +++ b/src/libfetchers/fetch-to-store.cc @@ -90,6 +90,29 @@ std::pair fetchToStore2( debug("source path '%s' is uncacheable", path); } + // Fast path: if we have a git tree hash, check the treeHashToNarHash cache. + // This works even when the fingerprint cache misses (e.g., first run). + std::optional treeHash; + if (!filter && method == ContentAddressMethod::Raw::NixArchive) { + treeHash = path.accessor->getGitTreeHash(path.path); + if (treeHash) { + fetchers::Cache::Key treeKey{"treeHashToNarHash", {{"treeHash", treeHash->gitRev()}}}; + if (auto res = settings.getCache()->lookup(treeKey)) { + auto narHash = Hash::parseAny(fetchers::getStrAttr(*res, "narHash"), HashAlgorithm::SHA256); + auto storePath = store.makeFixedOutputPathFromCA( + name, ContentAddressWithReferences::fromParts(method, narHash, {})); + if (mode == FetchMode::DryRun || store.maybeQueryPathInfo(storePath)) { + debug( + "source path '%s' tree hash cache hit in '%s' (hash '%s')", + path, + store.printStorePath(storePath), + narHash.to_string(HashFormat::SRI, true)); + return {storePath, narHash}; + } + } + } + } + Activity act( *logger, lvlChatty, @@ -132,6 +155,13 @@ std::pair fetchToStore2( if (cacheKey) settings.getCache()->upsert(*cacheKey, {{"hash", hash.to_string(HashFormat::SRI, true)}}); + // Populate treeHashToNarHash cache for future lookups. + if (treeHash) { + settings.getCache()->upsert( + {"treeHashToNarHash", {{"treeHash", treeHash->gitRev()}}}, + {{"narHash", hash.to_string(HashFormat::SRI, true)}}); + } + return {storePath, hash}; } diff --git a/src/libfetchers/filtering-source-accessor.cc b/src/libfetchers/filtering-source-accessor.cc index f883c0921903..1c583d636d08 100644 --- a/src/libfetchers/filtering-source-accessor.cc +++ b/src/libfetchers/filtering-source-accessor.cc @@ -68,6 +68,13 @@ std::pair> FilteringSourceAccessor::getFin return next->getFingerprint(prefix / path); } +std::optional FilteringSourceAccessor::getGitTreeHash(const CanonPath & path) +{ + // Filtering changes the tree content, so the raw tree hash + // doesn't correspond to the filtered NAR hash. + return std::nullopt; +} + void FilteringSourceAccessor::checkAccess(const CanonPath & path) { if (!isAllowed(path)) diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index 9e79cdbff8d3..5e467304f5cb 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -954,6 +954,17 @@ struct GitSourceAccessor : SourceAccessor return toHash(*git_tree_entry_id(entry)); } + std::optional getGitTreeHash(const CanonPath & path) override + { + auto state(state_.lock()); + if (path.isRoot()) + return toHash(*git_object_id(state->root.get())); + auto entry = lookup(*state, path); + if (!entry) + return std::nullopt; + return toHash(*git_tree_entry_id(entry)); + } + boost::unordered_flat_map lookupCache; /* Recursively look up 'path' relative to the root. */ @@ -1105,6 +1116,16 @@ struct GitExportIgnoreSourceAccessor : CachingFilteringSourceAccessor { } + std::optional getGitTreeHash(const CanonPath & path) override + { + auto h = next->getGitTreeHash(prefix / path); + if (!h) + return std::nullopt; + // Use a synthetic hash to distinguish export-ignored trees from raw ones, + // since .gitattributes export-ignore filtering is deterministic from the tree. + return hashString(HashAlgorithm::SHA1, "exportIgnore:" + h->to_string(HashFormat::Base16, false)); + } + bool gitAttrGet(const CanonPath & path, const char * attrName, const char *& valueOut) { const char * pathCStr = path.rel_c_str(); diff --git a/src/libfetchers/include/nix/fetchers/filtering-source-accessor.hh b/src/libfetchers/include/nix/fetchers/filtering-source-accessor.hh index 5e98caa58165..871b6c01dac5 100644 --- a/src/libfetchers/include/nix/fetchers/filtering-source-accessor.hh +++ b/src/libfetchers/include/nix/fetchers/filtering-source-accessor.hh @@ -52,6 +52,8 @@ struct FilteringSourceAccessor : SourceAccessor std::pair> getFingerprint(const CanonPath & path) override; + std::optional getGitTreeHash(const CanonPath & path) override; + /** * Call `makeNotAllowedError` to throw a `RestrictedPathError` * exception if `isAllowed()` returns `false` for `path`. diff --git a/src/libutil/include/nix/util/source-accessor.hh b/src/libutil/include/nix/util/source-accessor.hh index 1006895b33c0..49bbb0ccb9dc 100644 --- a/src/libutil/include/nix/util/source-accessor.hh +++ b/src/libutil/include/nix/util/source-accessor.hh @@ -201,6 +201,15 @@ struct SourceAccessor : std::enable_shared_from_this return {path, fingerprint}; } + /** + * Return the git tree/blob SHA1 hash for `path`, if available. + * Used to look up cached NAR hashes via treeHashToNarHash. + */ + virtual std::optional getGitTreeHash(const CanonPath & path) + { + return std::nullopt; + } + /** * Return the maximum last-modified time of the files in this * tree, if available. diff --git a/src/libutil/mounted-source-accessor.cc b/src/libutil/mounted-source-accessor.cc index d9398045cc56..264e38d266c0 100644 --- a/src/libutil/mounted-source-accessor.cc +++ b/src/libutil/mounted-source-accessor.cc @@ -92,6 +92,12 @@ struct MountedSourceAccessorImpl : MountedSourceAccessor return nullptr; } + std::optional getGitTreeHash(const CanonPath & path) override + { + auto [accessor, subpath] = resolve(path); + return accessor->getGitTreeHash(subpath); + } + std::pair> getFingerprint(const CanonPath & path) override { if (fingerprint) diff --git a/src/libutil/union-source-accessor.cc b/src/libutil/union-source-accessor.cc index e3b39f14ed27..10f813a2425b 100644 --- a/src/libutil/union-source-accessor.cc +++ b/src/libutil/union-source-accessor.cc @@ -73,6 +73,15 @@ struct UnionSourceAccessor : SourceAccessor return std::nullopt; } + std::optional getGitTreeHash(const CanonPath & path) override + { + for (auto & accessor : accessors) { + if (auto h = accessor->getGitTreeHash(path)) + return h; + } + return std::nullopt; + } + std::pair> getFingerprint(const CanonPath & path) override { if (fingerprint) From a10b9c639675a9f4e03a22d2301301d67d2aa563 Mon Sep 17 00:00:00 2001 From: Josh Heinrichs Date: Sat, 28 Feb 2026 15:48:56 -0600 Subject: [PATCH 03/12] libexpr: add fingerprint to DirtyOverlaySourceAccessor DirtyOverlaySourceAccessor (used for tectonix zones with uncommitted changes) had no getFingerprint() override, so it always returned nullopt. Combined with StorePath::random() generating a different virtual path each evaluation, the fetchToStore cache could never identify the same dirty zone across runs, causing expensive NAR serialization every time. Add a getFingerprint() that combines the base git accessor's fingerprint with the actual content of dirty files. Since dirty files are few and small, reading them is much cheaper than NAR-serializing the entire zone. The fingerprint changes when any dirty file is modified, ensuring cache correctness. Co-Authored-By: Claude Opus 4.6 --- src/libexpr/eval.cc | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index f8bb6e378a50..5d769c5bf80c 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -909,6 +909,35 @@ struct DirtyOverlaySourceAccessor : SourceAccessor std::string readLink(const CanonPath & path) override { return (isDirty(path) ? disk : base)->readLink(path); } std::optional getPhysicalPath(const CanonPath & path) override { return (isDirty(path) ? disk : base)->getPhysicalPath(path); } + std::pair> getFingerprint(const CanonPath & path) override + { + auto [subpath, baseFp] = base->getFingerprint(path); + if (!baseFp) + return {path, std::nullopt}; + + // Incorporate dirty file content into the fingerprint. + // Dirty files are few and small, so reading them is much + // cheaper than NAR-serializing the entire zone. + std::string toHash = *baseFp; + std::vector sorted(dirtyFiles.begin(), dirtyFiles.end()); + std::sort(sorted.begin(), sorted.end()); + for (auto & f : sorted) { + toHash += '\0' + f + '\0'; + auto st = disk->maybeLstat(CanonPath(f)); + if (!st) { + toHash += 'D'; + } else if (st->type == Type::tRegular) { + toHash += disk->readFile(CanonPath(f)); + } else if (st->type == Type::tSymlink) { + toHash += 'L' + disk->readLink(CanonPath(f)); + } else { + toHash += '?'; + } + } + auto hash = hashString(HashAlgorithm::SHA256, toHash); + return {path, "dirty:" + hash.to_string(HashFormat::Nix32, false)}; + } + DirEntries readDirectory(const CanonPath & path) override { auto rel = path.isRoot() ? "" : std::string(path.rel()); From 89c24e8ff8a1623ffc74190a8140f34c97deb3b6 Mon Sep 17 00:00:00 2001 From: Josh Heinrichs Date: Sat, 28 Feb 2026 16:31:04 -0600 Subject: [PATCH 04/12] Revert "fetchToStore: add getGitTreeHash and treeHashToNarHash cache" This reverts commit 3edf54fb8f5daf820e73c696b7cf70ee4b3019d5. --- src/libfetchers/fetch-to-store.cc | 30 ------------------- src/libfetchers/filtering-source-accessor.cc | 7 ----- src/libfetchers/git-utils.cc | 21 ------------- .../nix/fetchers/filtering-source-accessor.hh | 2 -- .../include/nix/util/source-accessor.hh | 9 ------ src/libutil/mounted-source-accessor.cc | 6 ---- src/libutil/union-source-accessor.cc | 9 ------ 7 files changed, 84 deletions(-) diff --git a/src/libfetchers/fetch-to-store.cc b/src/libfetchers/fetch-to-store.cc index acb16818f6c2..10a42c339ac5 100644 --- a/src/libfetchers/fetch-to-store.cc +++ b/src/libfetchers/fetch-to-store.cc @@ -90,29 +90,6 @@ std::pair fetchToStore2( debug("source path '%s' is uncacheable", path); } - // Fast path: if we have a git tree hash, check the treeHashToNarHash cache. - // This works even when the fingerprint cache misses (e.g., first run). - std::optional treeHash; - if (!filter && method == ContentAddressMethod::Raw::NixArchive) { - treeHash = path.accessor->getGitTreeHash(path.path); - if (treeHash) { - fetchers::Cache::Key treeKey{"treeHashToNarHash", {{"treeHash", treeHash->gitRev()}}}; - if (auto res = settings.getCache()->lookup(treeKey)) { - auto narHash = Hash::parseAny(fetchers::getStrAttr(*res, "narHash"), HashAlgorithm::SHA256); - auto storePath = store.makeFixedOutputPathFromCA( - name, ContentAddressWithReferences::fromParts(method, narHash, {})); - if (mode == FetchMode::DryRun || store.maybeQueryPathInfo(storePath)) { - debug( - "source path '%s' tree hash cache hit in '%s' (hash '%s')", - path, - store.printStorePath(storePath), - narHash.to_string(HashFormat::SRI, true)); - return {storePath, narHash}; - } - } - } - } - Activity act( *logger, lvlChatty, @@ -155,13 +132,6 @@ std::pair fetchToStore2( if (cacheKey) settings.getCache()->upsert(*cacheKey, {{"hash", hash.to_string(HashFormat::SRI, true)}}); - // Populate treeHashToNarHash cache for future lookups. - if (treeHash) { - settings.getCache()->upsert( - {"treeHashToNarHash", {{"treeHash", treeHash->gitRev()}}}, - {{"narHash", hash.to_string(HashFormat::SRI, true)}}); - } - return {storePath, hash}; } diff --git a/src/libfetchers/filtering-source-accessor.cc b/src/libfetchers/filtering-source-accessor.cc index 1c583d636d08..f883c0921903 100644 --- a/src/libfetchers/filtering-source-accessor.cc +++ b/src/libfetchers/filtering-source-accessor.cc @@ -68,13 +68,6 @@ std::pair> FilteringSourceAccessor::getFin return next->getFingerprint(prefix / path); } -std::optional FilteringSourceAccessor::getGitTreeHash(const CanonPath & path) -{ - // Filtering changes the tree content, so the raw tree hash - // doesn't correspond to the filtered NAR hash. - return std::nullopt; -} - void FilteringSourceAccessor::checkAccess(const CanonPath & path) { if (!isAllowed(path)) diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index 5e467304f5cb..9e79cdbff8d3 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -954,17 +954,6 @@ struct GitSourceAccessor : SourceAccessor return toHash(*git_tree_entry_id(entry)); } - std::optional getGitTreeHash(const CanonPath & path) override - { - auto state(state_.lock()); - if (path.isRoot()) - return toHash(*git_object_id(state->root.get())); - auto entry = lookup(*state, path); - if (!entry) - return std::nullopt; - return toHash(*git_tree_entry_id(entry)); - } - boost::unordered_flat_map lookupCache; /* Recursively look up 'path' relative to the root. */ @@ -1116,16 +1105,6 @@ struct GitExportIgnoreSourceAccessor : CachingFilteringSourceAccessor { } - std::optional getGitTreeHash(const CanonPath & path) override - { - auto h = next->getGitTreeHash(prefix / path); - if (!h) - return std::nullopt; - // Use a synthetic hash to distinguish export-ignored trees from raw ones, - // since .gitattributes export-ignore filtering is deterministic from the tree. - return hashString(HashAlgorithm::SHA1, "exportIgnore:" + h->to_string(HashFormat::Base16, false)); - } - bool gitAttrGet(const CanonPath & path, const char * attrName, const char *& valueOut) { const char * pathCStr = path.rel_c_str(); diff --git a/src/libfetchers/include/nix/fetchers/filtering-source-accessor.hh b/src/libfetchers/include/nix/fetchers/filtering-source-accessor.hh index 871b6c01dac5..5e98caa58165 100644 --- a/src/libfetchers/include/nix/fetchers/filtering-source-accessor.hh +++ b/src/libfetchers/include/nix/fetchers/filtering-source-accessor.hh @@ -52,8 +52,6 @@ struct FilteringSourceAccessor : SourceAccessor std::pair> getFingerprint(const CanonPath & path) override; - std::optional getGitTreeHash(const CanonPath & path) override; - /** * Call `makeNotAllowedError` to throw a `RestrictedPathError` * exception if `isAllowed()` returns `false` for `path`. diff --git a/src/libutil/include/nix/util/source-accessor.hh b/src/libutil/include/nix/util/source-accessor.hh index 49bbb0ccb9dc..1006895b33c0 100644 --- a/src/libutil/include/nix/util/source-accessor.hh +++ b/src/libutil/include/nix/util/source-accessor.hh @@ -201,15 +201,6 @@ struct SourceAccessor : std::enable_shared_from_this return {path, fingerprint}; } - /** - * Return the git tree/blob SHA1 hash for `path`, if available. - * Used to look up cached NAR hashes via treeHashToNarHash. - */ - virtual std::optional getGitTreeHash(const CanonPath & path) - { - return std::nullopt; - } - /** * Return the maximum last-modified time of the files in this * tree, if available. diff --git a/src/libutil/mounted-source-accessor.cc b/src/libutil/mounted-source-accessor.cc index 264e38d266c0..d9398045cc56 100644 --- a/src/libutil/mounted-source-accessor.cc +++ b/src/libutil/mounted-source-accessor.cc @@ -92,12 +92,6 @@ struct MountedSourceAccessorImpl : MountedSourceAccessor return nullptr; } - std::optional getGitTreeHash(const CanonPath & path) override - { - auto [accessor, subpath] = resolve(path); - return accessor->getGitTreeHash(subpath); - } - std::pair> getFingerprint(const CanonPath & path) override { if (fingerprint) diff --git a/src/libutil/union-source-accessor.cc b/src/libutil/union-source-accessor.cc index 10f813a2425b..e3b39f14ed27 100644 --- a/src/libutil/union-source-accessor.cc +++ b/src/libutil/union-source-accessor.cc @@ -73,15 +73,6 @@ struct UnionSourceAccessor : SourceAccessor return std::nullopt; } - std::optional getGitTreeHash(const CanonPath & path) override - { - for (auto & accessor : accessors) { - if (auto h = accessor->getGitTreeHash(path)) - return h; - } - return std::nullopt; - } - std::pair> getFingerprint(const CanonPath & path) override { if (fingerprint) From dc497c57fde0b9a1a58f45240d2a185912f93e13 Mon Sep 17 00:00:00 2001 From: Josh Heinrichs Date: Sat, 28 Feb 2026 21:26:08 -0600 Subject: [PATCH 05/12] Speedups --- src/libexpr/eval.cc | 1 + src/libfetchers/fetch-to-store.cc | 3 +++ 2 files changed, 4 insertions(+) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 5d769c5bf80c..bbaa3f7a2b16 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -927,6 +927,7 @@ struct DirtyOverlaySourceAccessor : SourceAccessor if (!st) { toHash += 'D'; } else if (st->type == Type::tRegular) { + if (st->isExecutable) toHash += 'X'; toHash += disk->readFile(CanonPath(f)); } else if (st->type == Type::tSymlink) { toHash += 'L' + disk->readLink(CanonPath(f)); diff --git a/src/libfetchers/fetch-to-store.cc b/src/libfetchers/fetch-to-store.cc index 10a42c339ac5..30138f76c43a 100644 --- a/src/libfetchers/fetch-to-store.cc +++ b/src/libfetchers/fetch-to-store.cc @@ -90,6 +90,9 @@ std::pair fetchToStore2( debug("source path '%s' is uncacheable", path); } + warn("fetchToStore2 SLOW PATH: '%s' (filter=%d, fingerprint=%s)", + path, (bool) filter, fingerprint ? *fingerprint : "none"); + Activity act( *logger, lvlChatty, From 9bed8b9009073e2d86da6160e3ffdfe963903f38 Mon Sep 17 00:00:00 2001 From: Josh Heinrichs Date: Sat, 28 Feb 2026 22:25:09 -0600 Subject: [PATCH 06/12] Cache pure evals --- src/nix/build.cc | 131 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/src/nix/build.cc b/src/nix/build.cc index 2d4f426a4954..1dd42c50f8eb 100644 --- a/src/nix/build.cc +++ b/src/nix/build.cc @@ -1,10 +1,19 @@ #include "nix/cmd/command.hh" +#include "nix/cmd/common-eval-args.hh" #include "nix/main/common-args.hh" #include "nix/main/shared.hh" #include "nix/store/store-api.hh" #include "nix/store/local-fs-store.hh" +#include "nix/fetchers/cache.hh" +#include "nix/fetchers/attrs.hh" +#include "nix/fetchers/fetch-settings.hh" +#include "nix/util/hash.hh" +#include "nix/util/processes.hh" +#include "nix/util/file-system.hh" +#include "nix/util/strings.hh" #include +#include using namespace nix; @@ -110,6 +119,7 @@ struct CmdBuild : InstallablesCommand, MixOutLinkByDefault, MixDryRun, MixJSON, { bool printOutputPaths = false; BuildMode buildMode = bmNormal; + std::vector buildOutputPaths_; CmdBuild() { @@ -138,6 +148,114 @@ struct CmdBuild : InstallablesCommand, MixOutLinkByDefault, MixDryRun, MixJSON, ; } + std::optional computeEvalCacheKey( + const std::vector & rawInstallables) + { + if (!expr) return std::nullopt; + if (!evalSettings.pureEval) return std::nullopt; + std::string gitSha = evalSettings.tectonixGitSha; + if (gitSha.empty()) return std::nullopt; + + HashSink hashSink(HashAlgorithm::SHA256); + hashSink << *expr; + for (auto & arg : rawInstallables) + hashSink << arg; + hashSink << gitSha; + + // Include dirty file state + std::string checkoutPath = evalSettings.tectonixCheckoutPath; + if (!checkoutPath.empty()) { + try { + auto statusOutput = runProgram( + "git", true, {"-C", checkoutPath, "status", "--porcelain", "-z"}); + // Parse NUL-separated entries, sort for determinism + std::vector dirtyFiles; + size_t pos = 0; + while (pos < statusOutput.size()) { + auto nulPos = statusOutput.find('\0', pos); + if (nulPos == std::string::npos) break; + auto entry = statusOutput.substr(pos, nulPos - pos); + pos = nulPos + 1; + if (entry.size() < 4) continue; + char xy0 = entry[0]; + auto filePath = entry.substr(3); + dirtyFiles.push_back(filePath); + // Skip rename/copy source path + if (xy0 == 'R' || xy0 == 'C') { + auto nextNul = statusOutput.find('\0', pos); + if (nextNul != std::string::npos) { + dirtyFiles.push_back(statusOutput.substr(pos, nextNul - pos)); + pos = nextNul + 1; + } + } + } + std::sort(dirtyFiles.begin(), dirtyFiles.end()); + for (auto & f : dirtyFiles) { + hashSink << f; + auto fullPath = std::filesystem::path(checkoutPath) / f; + if (std::filesystem::exists(fullPath) && std::filesystem::is_regular_file(fullPath)) + hashSink << readFile(fullPath.string()); + } + } catch (...) { + // If git status fails, skip caching + return std::nullopt; + } + } + + auto [hash, len] = hashSink.finish(); + return fetchers::Cache::Key{ + "evalResult", + {{"fingerprint", hash.to_string(HashFormat::SRI, true)}}}; + } + + void run(ref store, std::vector && rawInstallables) override + { + // Try eval cache (only when --expr + --tectonix-git-sha are set, + // and not in dry-run or rebuild mode) + if (!dryRun && buildMode == bmNormal) { + auto cacheKey = computeEvalCacheKey(rawInstallables); + if (cacheKey) { + if (auto cached = fetchSettings.getCache()->lookup(*cacheKey)) { + auto outPathsStr = fetchers::getStrAttr(*cached, "outPaths"); + auto paths = tokenizeString>(outPathsStr, "\n"); + // Verify all store paths still exist + bool allExist = true; + for (auto & p : paths) { + if (!store->maybeQueryPathInfo(store->parseStorePath(p))) { + allExist = false; + break; + } + } + if (allExist) { + if (printOutputPaths) { + logger->stop(); + for (auto & p : paths) + logger->cout("%s", p); + } + return; + } + } + + // Normal flow: evaluate + build + InstallablesCommand::run(store, std::move(rawInstallables)); + + // Cache output paths + if (!buildOutputPaths_.empty()) { + std::string outPaths; + for (auto & p : buildOutputPaths_) { + if (!outPaths.empty()) outPaths += '\n'; + outPaths += store->printStorePath(p); + } + fetchSettings.getCache()->upsert(*cacheKey, {{"outPaths", outPaths}}); + } + return; + } + } + + // Fallback: normal flow without caching + InstallablesCommand::run(store, std::move(rawInstallables)); + } + void run(ref store, Installables && installables) override { if (dryRun) { @@ -179,6 +297,19 @@ struct CmdBuild : InstallablesCommand, MixOutLinkByDefault, MixDryRun, MixJSON, } } + // Collect output paths for eval cache + for (auto & buildable : buildables) { + std::visit( + overloaded{ + [&](const BuiltPath::Opaque & bo) { buildOutputPaths_.push_back(bo.path); }, + [&](const BuiltPath::Built & bfd) { + for (auto & [_, path] : bfd.outputs) + buildOutputPaths_.push_back(path); + }, + }, + buildable.path.raw()); + } + BuiltPaths buildables2; for (auto & b : buildables) buildables2.push_back(b.path); From d3b6ae2908689246bd1953d5ebbf253bf23b4529 Mon Sep 17 00:00:00 2001 From: Josh Heinrichs Date: Sun, 1 Mar 2026 16:39:07 -0600 Subject: [PATCH 07/12] DirtyOverlaySourceAccessor: read clean files from git ODB, not disk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three changes to the dirty overlay accessor: 1. readFile/readLink now route clean files through the git ODB (base accessor) instead of always reading from disk. Only dirty files are read from disk. This is important because we can't always trust on-disk content for clean files (e.g. sparse checkouts). 2. Removed getPhysicalPath override — clean files should not expose disk paths since they're served from the git ODB. 3. Fingerprint computation now uses a HashSink and caches the result. The fingerprint is the base accessor's fingerprint (git tree SHA) plus a hash of dirty file paths and content, avoiding redundant re-computation across multiple fetchToStore calls in the same eval. Co-Authored-By: Claude Opus 4.6 --- src/libexpr/eval.cc | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index bbaa3f7a2b16..6b043385ac47 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -879,6 +879,7 @@ struct DirtyOverlaySourceAccessor : SourceAccessor { ref base, disk; boost::unordered_flat_set dirtyFiles, dirtyDirs; + mutable std::optional cachedFingerprint; DirtyOverlaySourceAccessor( ref base, ref disk, @@ -905,38 +906,47 @@ struct DirtyOverlaySourceAccessor : SourceAccessor return disk->maybeLstat(path); } - std::string readFile(const CanonPath & path) override { return (isDirty(path) ? disk : base)->readFile(path); } - std::string readLink(const CanonPath & path) override { return (isDirty(path) ? disk : base)->readLink(path); } - std::optional getPhysicalPath(const CanonPath & path) override { return (isDirty(path) ? disk : base)->getPhysicalPath(path); } + std::string readFile(const CanonPath & path) override + { + if (isDirty(path)) return disk->readFile(path); + return base->readFile(path); + } + + std::string readLink(const CanonPath & path) override + { + if (isDirty(path)) return disk->readLink(path); + return base->readLink(path); + } std::pair> getFingerprint(const CanonPath & path) override { + if (cachedFingerprint) + return {path, *cachedFingerprint}; + auto [subpath, baseFp] = base->getFingerprint(path); if (!baseFp) return {path, std::nullopt}; - // Incorporate dirty file content into the fingerprint. - // Dirty files are few and small, so reading them is much - // cheaper than NAR-serializing the entire zone. - std::string toHash = *baseFp; + // Extend the base (git) fingerprint with a hash of dirty file + // paths and content. + HashSink hashSink{HashAlgorithm::SHA256}; std::vector sorted(dirtyFiles.begin(), dirtyFiles.end()); std::sort(sorted.begin(), sorted.end()); for (auto & f : sorted) { - toHash += '\0' + f + '\0'; + hashSink << f; auto st = disk->maybeLstat(CanonPath(f)); if (!st) { - toHash += 'D'; + hashSink << "D"; } else if (st->type == Type::tRegular) { - if (st->isExecutable) toHash += 'X'; - toHash += disk->readFile(CanonPath(f)); + hashSink << (st->isExecutable ? "X" : "F"); + hashSink << disk->readFile(CanonPath(f)); } else if (st->type == Type::tSymlink) { - toHash += 'L' + disk->readLink(CanonPath(f)); - } else { - toHash += '?'; + hashSink << "L"; + hashSink << disk->readLink(CanonPath(f)); } } - auto hash = hashString(HashAlgorithm::SHA256, toHash); - return {path, "dirty:" + hash.to_string(HashFormat::Nix32, false)}; + cachedFingerprint = *baseFp + ";d=" + hashSink.finish().hash.to_string(HashFormat::Base16, false); + return {path, *cachedFingerprint}; } DirEntries readDirectory(const CanonPath & path) override From 9c06cc77387c5301482c402e1ff2bad960018819 Mon Sep 17 00:00:00 2001 From: Josh Heinrichs Date: Sun, 1 Mar 2026 22:42:19 -0600 Subject: [PATCH 08/12] Track access This wires in tracking into our source accessor and allows us to see exactly what files affect a given target. --- src/libexpr/eval.cc | 416 +++----------- src/libexpr/include/nix/expr/eval.hh | 105 ++-- src/libexpr/parallel-eval.cc | 14 +- src/libexpr/primops/tectonix.cc | 516 ++++++++++++++---- src/libutil/archive.cc | 5 + .../include/nix/util/source-accessor.hh | 7 + 6 files changed, 592 insertions(+), 471 deletions(-) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 6b043385ac47..282e307b1ce6 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -61,6 +61,8 @@ using json = nlohmann::json; namespace nix { +thread_local TrackingContext * currentTrackingContext = nullptr; + /** * Just for doc strings. Not for regular string values. */ @@ -471,8 +473,8 @@ ref EvalState::getWorldGitAccessor() const } // exportIgnore=false: The world accessor is used for path validation and tree SHA - // computation, where we need to see all files. Zone accessors (mountZoneByTreeSha, - // getZoneStorePath) use exportIgnore=true to honor .gitattributes for actual content. + // computation, where we need to see all files. The repo accessor (getRepoAccessor) + // uses exportIgnore=true to honor .gitattributes for actual content. GitAccessorOptions opts{.exportIgnore = false, .smudgeLfs = false}; worldGitAccessor = repo->getAccessor(hash, opts, "world"); debug("created world accessor at commit %s", sha); @@ -485,42 +487,20 @@ bool EvalState::isTectonixSourceAvailable() const return !settings.tectonixCheckoutPath.get().empty(); } -// Helper to normalize zone paths: strip leading // prefix -// Zone paths in manifest have // prefix (e.g., //areas/tools/dev) +// Helper to normalize paths: strip leading // prefix +// Paths in manifest have // prefix (e.g., //areas/tools/dev) // Filesystem operations need paths without // (e.g., areas/tools/dev) -static std::string normalizeZonePath(std::string_view zonePath) +static std::string normalizePath(std::string_view path) { - std::string path(zonePath); - if (hasPrefix(path, "//")) - path = path.substr(2); - return path; -} - -// Helper to sanitize zone path for use in store path names. -// Store paths only allow: a-zA-Z0-9 and +-._?= -// Replaces / with - and any other invalid chars with _ -static std::string sanitizeZoneNameForStore(std::string_view zonePath) -{ - auto zone = normalizeZonePath(zonePath); - std::string result; - result.reserve(zone.size()); - for (char c : zone) { - if (c == '/') { - result += '-'; - } else if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || - (c >= 'A' && c <= 'Z') || c == '+' || c == '-' || - c == '.' || c == '_' || c == '?' || c == '=') { - result += c; - } else { - result += '_'; - } - } + std::string result(path); + if (hasPrefix(result, "//")) + result = result.substr(2); return result; } Hash EvalState::getWorldTreeSha(std::string_view worldPath) const { - auto path = normalizeZonePath(worldPath); + auto path = normalizePath(worldPath); // Check cache first if (auto cached = getConcurrent(*worldTreeShaCache, path)) { @@ -579,155 +559,7 @@ Hash EvalState::getWorldTreeSha(std::string_view worldPath) const return currentSha; } -const std::set & EvalState::getTectonixSparseCheckoutRoots() const -{ - std::call_once(tectonixSparseCheckoutRootsFlag, [this]() { - if (isTectonixSourceAvailable()) { - auto checkoutPath = settings.tectonixCheckoutPath.get(); - // Read .git to find the actual git directory - // It can be either a directory or a file containing "gitdir: " - auto dotGitPath = std::filesystem::path(checkoutPath) / ".git"; - std::filesystem::path gitDir; - - if (std::filesystem::is_directory(dotGitPath)) { - gitDir = dotGitPath; - } else if (std::filesystem::is_regular_file(dotGitPath)) { - auto gitdirContent = readFile(dotGitPath.string()); - // Parse "gitdir: \n" - if (hasPrefix(gitdirContent, "gitdir: ")) { - auto path = trim(gitdirContent.substr(8)); - gitDir = std::filesystem::path(path); - // Handle relative paths - if (gitDir.is_relative()) - gitDir = std::filesystem::path(checkoutPath) / gitDir; - } - } - - if (!gitDir.empty()) { - // Read sparse-checkout-roots - auto sparseRootsPath = gitDir / "info" / "sparse-checkout-roots"; - if (std::filesystem::exists(sparseRootsPath)) { - auto content = readFile(sparseRootsPath.string()); - for (auto & line : tokenizeString>(content, "\n")) { - auto trimmed = trim(line); - if (!trimmed.empty()) - tectonixSparseCheckoutRoots.insert(std::string(trimmed)); - } - } - } - } - }); - return tectonixSparseCheckoutRoots; -} - -const std::map & EvalState::getTectonixDirtyZones() const -{ - std::call_once(tectonixDirtyZonesFlag, [this]() { - if (!isTectonixSourceAvailable()) - return; - - // Get sparse checkout roots (zone IDs) - auto & sparseRoots = getTectonixSparseCheckoutRoots(); - if (sparseRoots.empty()) - return; - - // Get manifest (uses cached parsed JSON) - const nlohmann::json * manifest; - try { - manifest = &getManifestJson(); - } catch (nlohmann::json::parse_error & e) { - warn("failed to parse manifest for dirty zone detection: %s", e.what()); - return; - } catch (Error &) { - // Manifest file not available (e.g., not in world repo) - return; - } - - // Build map of zone ID -> zone path for sparse roots only - std::map zoneIdToPath; - for (auto & [path, value] : manifest->items()) { - if (!value.contains("id") || !value.at("id").is_string()) { - warn("zone '%s' in manifest has missing or non-string 'id' field", path); - continue; - } - auto & id = value.at("id").get_ref(); - if (sparseRoots.count(id)) - zoneIdToPath[id] = path; - } - - // Initialize all sparse-checked-out zones as not dirty - for (auto & [zoneId, zonePath] : zoneIdToPath) { - tectonixDirtyZones[zonePath] = {}; - } - - // Get dirty files via git status with -z for NUL-separated output - // This handles filenames with special characters correctly - auto checkoutPath = settings.tectonixCheckoutPath.get(); - std::string gitStatusOutput; - try { - gitStatusOutput = runProgram("git", true, {"-C", checkoutPath, "status", "--porcelain", "-z"}); - } catch (ExecError & e) { - // If git status fails, treat all zones as clean (fallback) - // This ensures call_once completes and we don't retry with partial state - warn("failed to get git status for dirty zone detection in '%s': %s; treating all zones as clean", checkoutPath, e.what()); - return; - } - - // Parse NUL-separated output - // Format with -z: XY SP path NUL [orig-path NUL for renames/copies] - size_t pos = 0; - while (pos < gitStatusOutput.size()) { - // Find the next NUL - auto nulPos = gitStatusOutput.find('\0', pos); - if (nulPos == std::string::npos) - break; - - auto entry = gitStatusOutput.substr(pos, nulPos - pos); - pos = nulPos + 1; - - // Git porcelain format: "XY PATH" where XY is 2-char status, then space, then path - // Minimum valid entry is "X P" (4 chars): status + space + 1-char path - if (entry.size() < 4) continue; - - // XY is first 2 chars, then space, then path - char xy0 = entry[0]; - std::string rawPath = entry.substr(3); - - // Collect paths to check - destination path is always included - std::vector pathsToCheck; - pathsToCheck.push_back("/" + rawPath); - - // For renames (R) and copies (C), also process the original path - // Both source and destination zones should be marked dirty - if (xy0 == 'R' || xy0 == 'C') { - auto nextNul = gitStatusOutput.find('\0', pos); - if (nextNul != std::string::npos) { - auto origPath = gitStatusOutput.substr(pos, nextNul - pos); - pathsToCheck.push_back("/" + origPath); - pos = nextNul + 1; - } - } - - for (const auto & filePath : pathsToCheck) { - for (auto & [zonePath, info] : tectonixDirtyZones) { - auto normalized = "/" + normalizeZonePath(zonePath); - if (hasPrefix(filePath, normalized + "/") || filePath == normalized) { - info.dirty = true; - info.dirtyFiles.insert(filePath.substr(1)); - break; - } - } - } - } - - size_t dirtyCount = 0; - for (const auto & [_, info] : tectonixDirtyZones) - if (info.dirty) dirtyCount++; - debug("computed dirty zones: %d of %d zones are dirty", dirtyCount, tectonixDirtyZones.size()); - }); - return tectonixDirtyZones; -} // Path to the tectonix manifest file within the world repository static constexpr std::string_view TECTONIX_MANIFEST_PATH = "/.meta/manifest.json"; @@ -770,108 +602,6 @@ const nlohmann::json & EvalState::getManifestJson() const return *tectonixManifestJson; } -StorePath EvalState::getZoneStorePath(std::string_view zonePath) -{ - // Check dirty status using original zonePath (with // prefix) since - // tectonixDirtyZones keys come directly from manifest with // prefix - const ZoneDirtyInfo * dirtyInfo = nullptr; - if (isTectonixSourceAvailable()) { - auto & dirtyZones = getTectonixDirtyZones(); - auto it = dirtyZones.find(std::string(zonePath)); - if (it != dirtyZones.end() && it->second.dirty) - dirtyInfo = &it->second; - } - - if (dirtyInfo) { - debug("getZoneStorePath: %s is dirty, using checkout", zonePath); - return getZoneFromCheckout(zonePath, &dirtyInfo->dirtyFiles); - } - - // Clean zone: get tree SHA - auto treeSha = getWorldTreeSha(zonePath); - - if (!settings.lazyTrees) { - debug("getZoneStorePath: %s clean, eager copy from git (tree %s)", zonePath, treeSha.gitRev()); - // Eager mode: immediate copy from git ODB - auto repo = getWorldRepo(); - // exportIgnore=true: honor .gitattributes for zone content (unlike world accessor) - GitAccessorOptions opts{.exportIgnore = true, .smudgeLfs = false}; - auto accessor = repo->getAccessor(treeSha, opts, "zone"); - - std::string name = "zone-" + sanitizeZoneNameForStore(zonePath); - auto storePath = fetchToStore( - fetchSettings, *store, - SourcePath(accessor, CanonPath::root), - FetchMode::Copy, name); - - allowPath(storePath); - return storePath; - } - - debug("getZoneStorePath: %s clean, lazy mount (tree %s)", zonePath, treeSha.gitRev()); - return mountZoneByTreeSha(treeSha, zonePath); -} - -StorePath EvalState::mountZoneByTreeSha(const Hash & treeSha, std::string_view zonePath) -{ - // Double-checked locking pattern for concurrent zone mounting: - // 1. Read lock check (fast path - allows concurrent readers) - { - auto cache = tectonixZoneCache_.readLock(); - auto it = cache->find(treeSha); - if (it != cache->end()) { - debug("zone cache hit for tree %s", treeSha.gitRev()); - return it->second; - } - } // Read lock released - - // 2. Write lock check (catch races between read unlock and write lock) - { - auto cache = tectonixZoneCache_.lock(); - auto it = cache->find(treeSha); - if (it != cache->end()) { - debug("zone cache hit for tree %s (after lock upgrade)", treeSha.gitRev()); - return it->second; - } - } // Write lock released - expensive work happens without holding lock - - // 3. Perform expensive git operations without holding lock. - // This allows concurrent mounts of different zones. Multiple threads may - // race to mount the same zone, but we check again before inserting. - auto repo = getWorldRepo(); - // exportIgnore=true: honor .gitattributes for zone content (unlike world accessor) - GitAccessorOptions opts{.exportIgnore = true, .smudgeLfs = false}; - auto accessor = repo->getAccessor(treeSha, opts, "zone"); - - // Generate name from zone path (sanitized for store path requirements) - std::string name = "zone-" + sanitizeZoneNameForStore(zonePath); - - // Create virtual store path - auto storePath = StorePath::random(name); - - // 4. Re-acquire write lock and check again before mounting - auto cache = tectonixZoneCache_.lock(); - auto it = cache->find(treeSha); - if (it != cache->end()) { - // Another thread mounted while we were working - use their result - debug("zone cache hit for tree %s (after work)", treeSha.gitRev()); - return it->second; - } - - // Mount accessor at this path first, then allow the path. - // This order ensures we don't leave allowed paths without mounts on exception. - storeFS->mount(CanonPath(store->printStorePath(storePath)), accessor); - allowPath(storePath); - - // Insert into cache (we hold the lock, so this will succeed) - cache->emplace(treeSha, storePath); - - debug("mounted zone %s (tree %s) at %s", - zonePath, treeSha.gitRev(), store->printStorePath(storePath)); - - return storePath; -} - /** * Overlays dirty files from disk on top of a clean git tree accessor. */ @@ -880,7 +610,6 @@ struct DirtyOverlaySourceAccessor : SourceAccessor ref base, disk; boost::unordered_flat_set dirtyFiles, dirtyDirs; mutable std::optional cachedFingerprint; - DirtyOverlaySourceAccessor( ref base, ref disk, boost::unordered_flat_set && dirtyFiles) @@ -897,8 +626,19 @@ struct DirtyOverlaySourceAccessor : SourceAccessor bool isDirty(const CanonPath & path) { return dirtyFiles.contains(std::string(path.rel())); } + void trackAccess(const CanonPath & path) { + if (auto ctx = currentTrackingContext; ctx && !path.isRoot()) { + debug("trackAccess: %s", path); + ctx->recordAccess(std::string(path.rel())); + } + } + std::optional maybeLstat(const CanonPath & path) override { + // Don't track maybeLstat: it's called during path traversal and + // symlink resolution on intermediate directories (areas/, system/, + // etc.) which aren't real dependencies. Actual content dependencies + // are captured via readFile and getFingerprint. if (path.isRoot()) return base->maybeLstat(path); if (isDirty(path)) return disk->maybeLstat(path); auto s = base->maybeLstat(path); @@ -908,18 +648,22 @@ struct DirtyOverlaySourceAccessor : SourceAccessor std::string readFile(const CanonPath & path) override { + trackAccess(path); if (isDirty(path)) return disk->readFile(path); return base->readFile(path); } std::string readLink(const CanonPath & path) override { + // Don't track readLink: symlink resolution is path traversal, + // not a content dependency. if (isDirty(path)) return disk->readLink(path); return base->readLink(path); } std::pair> getFingerprint(const CanonPath & path) override { + trackAccess(path); if (cachedFingerprint) return {path, *cachedFingerprint}; @@ -951,6 +695,9 @@ struct DirtyOverlaySourceAccessor : SourceAccessor DirEntries readDirectory(const CanonPath & path) override { + // Don't track readDirectory: it's called for directory navigation + // and listing, not for content dependencies. Directory source + // imports are tracked via getFingerprint. auto rel = path.isRoot() ? "" : std::string(path.rel()); if (!path.isRoot() && !dirtyDirs.contains(rel)) return base->readDirectory(path); @@ -978,56 +725,75 @@ struct DirtyOverlaySourceAccessor : SourceAccessor } }; -StorePath EvalState::getZoneFromCheckout(std::string_view zonePath, const boost::unordered_flat_set * dirtyFiles) +ref EvalState::getRepoAccessor() { - auto zone = normalizeZonePath(zonePath); - std::string name = "zone-" + sanitizeZoneNameForStore(zonePath); - auto checkoutPath = settings.tectonixCheckoutPath.get(); - auto fullPath = std::filesystem::path(checkoutPath) / zone; - - auto makeDirtyAccessor = [&]() -> ref { + std::call_once(repoAccessorFlag, [this]() { + auto & sha = requireTectonixGitSha(); auto repo = getWorldRepo(); - auto baseAccessor = repo->getAccessor( - getWorldTreeSha(zone), {.exportIgnore = true, .smudgeLfs = false}, "zone"); - boost::unordered_flat_set zoneDirtyFiles; - if (dirtyFiles) { - auto zonePrefix = zone + "/"; - for (auto & f : *dirtyFiles) - if (f.starts_with(zonePrefix)) - zoneDirtyFiles.insert(f.substr(zonePrefix.size())); - } - return make_ref( - baseAccessor, makeFSSourceAccessor(fullPath), std::move(zoneDirtyFiles)); - }; + auto commitHash = Hash::parseNonSRIUnprefixed(sha, HashAlgorithm::SHA1); + auto rootTreeSha = repo->getCommitTree(commitHash); - if (!settings.lazyTrees) { - auto accessor = makeDirtyAccessor(); - auto storePath = fetchToStore( - fetchSettings, *store, - SourcePath(accessor, CanonPath::root), - FetchMode::Copy, name); - allowPath(storePath); - return storePath; - } + GitAccessorOptions opts{.exportIgnore = true, .smudgeLfs = false}; + auto baseAccessor = repo->getAccessor(rootTreeSha, opts, "repo"); - { - auto cache = tectonixCheckoutZoneCache_.readLock(); - auto it = cache->find(std::string(zonePath)); - if (it != cache->end()) return it->second; - } + if (isTectonixSourceAvailable()) { + auto checkoutPath = settings.tectonixCheckoutPath.get(); + + // Get all dirty files in the repo + boost::unordered_flat_set dirtyFiles; + try { + auto gitStatusOutput = runProgram("git", true, + {"-C", checkoutPath, "status", "--porcelain", "-z"}); + + size_t pos = 0; + while (pos < gitStatusOutput.size()) { + auto nulPos = gitStatusOutput.find('\0', pos); + if (nulPos == std::string::npos) break; + auto entry = gitStatusOutput.substr(pos, nulPos - pos); + pos = nulPos + 1; + if (entry.size() < 4) continue; + + char xy0 = entry[0]; + dirtyFiles.insert(entry.substr(3)); + + if (xy0 == 'R' || xy0 == 'C') { + auto nextNul = gitStatusOutput.find('\0', pos); + if (nextNul != std::string::npos) pos = nextNul + 1; + } + } + } catch (...) {} - auto cache = tectonixCheckoutZoneCache_.lock(); - auto it = cache->find(std::string(zonePath)); - if (it != cache->end()) return it->second; + repoAccessor = make_ref( + baseAccessor, makeFSSourceAccessor(checkoutPath), std::move(dirtyFiles)); + } else { + repoAccessor = baseAccessor; + } - if (!std::filesystem::exists(fullPath)) - throw Error("zone '%s' not found in checkout at '%s'", zonePath, fullPath.string()); + debug("created repo-wide accessor"); + }); + return *repoAccessor; +} - auto storePath = StorePath::random(name); - storeFS->mount(CanonPath(store->printStorePath(storePath)), makeDirtyAccessor()); - allowPath(storePath); - cache->emplace(std::string(zonePath), storePath); - return storePath; +StorePath EvalState::mountRepoAccessor() +{ + std::call_once(repoMountFlag, [this]() { + auto accessor = getRepoAccessor(); + auto storePath = StorePath::random("world-repo"); + storeFS->mount(CanonPath(store->printStorePath(storePath)), accessor); + allowPath(storePath); + repoMountStorePath = storePath; + debug("mounted repo accessor at %s", store->printStorePath(storePath)); + }); + return *repoMountStorePath; +} + +std::string EvalState::getRepoSubtreePath(std::string_view repoRelPath) +{ + auto rootStorePath = mountRepoAccessor(); + auto path = normalizePath(repoRelPath); + auto result = store->printStorePath(rootStorePath) + "/" + path; + debug("getRepoSubtreePath: '%s' -> '%s'", repoRelPath, result); + return result; } inline static bool isJustSchemePrefix(std::string_view prefix) diff --git a/src/libexpr/include/nix/expr/eval.hh b/src/libexpr/include/nix/expr/eval.hh index 4b9b6885f6d5..ac51952a57e7 100644 --- a/src/libexpr/include/nix/expr/eval.hh +++ b/src/libexpr/include/nix/expr/eval.hh @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -311,6 +312,24 @@ struct StaticEvalSymbols } }; +/** + * Tracks file/directory accesses during tecnix target resolution for cache invalidation. + * One per target. Paths are repo-relative (e.g. "areas/core/shopify/default.nix"). + */ +struct TrackingContext { + boost::concurrent_flat_set accessedPaths; + + void recordAccess(const std::string & path) { + accessedPaths.insert(path); + } +}; + +/** + * Thread-local pointer to the active tracking context. + * Set during tecnix target resolution, nullptr otherwise. + */ +extern thread_local TrackingContext * currentTrackingContext; + class EvalMemory { #if NIX_USE_BOEHMGC @@ -527,23 +546,23 @@ private: mutable std::once_flag worldGitAccessorFlag; mutable std::optional> worldGitAccessor; - /** Cache: world path → tree SHA (lazy computed, cached at each path level) */ - const ref> worldTreeShaCache; - - /** Lazy-initialized set of zone IDs in sparse checkout (thread-safe via once_flag) */ - mutable std::once_flag tectonixSparseCheckoutRootsFlag; - mutable std::set tectonixSparseCheckoutRoots; + /** + * Repo-wide source accessor with dirty overlay. Lazily created. + * All file reads during tecnix evaluation go through this single accessor, + * so tracked paths are naturally repo-relative. + */ + mutable std::once_flag repoAccessorFlag; + mutable std::optional> repoAccessor; - /** Per-zone dirty status: whether the zone is dirty, and if so, which - * repo-relative file paths are dirty (from git status). */ - struct ZoneDirtyInfo { - bool dirty = false; - boost::unordered_flat_set dirtyFiles; // repo-relative paths - }; + /** + * Virtual store path where the repo-wide accessor is lazily mounted. + * All repo subtree store paths are subpaths of this mount. + */ + mutable std::once_flag repoMountFlag; + mutable std::optional repoMountStorePath; - /** Lazy-initialized map of zone path → dirty info (thread-safe via once_flag) */ - mutable std::once_flag tectonixDirtyZonesFlag; - mutable std::map tectonixDirtyZones; + /** Cache: world path → tree SHA (lazy computed, cached at each path level) */ + const ref> worldTreeShaCache; /** Cached manifest content (thread-safe via once_flag) */ mutable std::once_flag tectonixManifestFlag; @@ -553,30 +572,6 @@ private: mutable std::once_flag tectonixManifestJsonFlag; mutable std::unique_ptr tectonixManifestJson; - /** - * Cache tree SHA → virtual store path for lazy zone mounts. - * Thread-safe for eval-cores > 1. - */ - mutable SharedSync> tectonixZoneCache_; - - /** - * Cache zone path → virtual store path for lazy checkout zone mounts. - * Thread-safe for eval-cores > 1. - */ - mutable SharedSync> tectonixCheckoutZoneCache_; - - /** - * Mount a zone by tree SHA, returning a (potentially virtual) store path. - * Caches by tree SHA for deduplication across world revisions. - */ - StorePath mountZoneByTreeSha(const Hash & treeSha, std::string_view zonePath); - - /** - * Get zone store path from checkout (for dirty zones). - * With lazy-trees enabled, mounts lazily and caches by zone path. - */ - StorePath getZoneFromCheckout(std::string_view zonePath, const boost::unordered_flat_set * dirtyFiles = nullptr); - /** * Return the configured tectonix git SHA, or throw if unset. */ @@ -622,8 +617,8 @@ public: * exportIgnore policy for tectonix accessors: * - World accessor (getWorldGitAccessor): exportIgnore=false * Used for path validation and tree SHA computation; needs to see all files - * - Zone accessors (mountZoneByTreeSha, getZoneStorePath): exportIgnore=true - * Used for actual zone content; honors .gitattributes for filtered output + * - Repo accessor (getRepoAccessor): exportIgnore=true + * Used for repo content; honors .gitattributes for filtered output * - Raw tree accessor (__unsafeTectonixInternalTree): exportIgnore=false * Low-level access by SHA; provides unfiltered content */ @@ -635,12 +630,6 @@ public: /** Check if we're in source-available mode */ bool isTectonixSourceAvailable() const; - /** Get set of zone IDs in sparse checkout (source-available mode only) */ - const std::set & getTectonixSparseCheckoutRoots() const; - - /** Get map of zone path → dirty status (only for sparse-checked-out zones) */ - const std::map & getTectonixDirtyZones() const; - /** Get cached manifest content (thread-safe, lazy-loaded) */ const std::string & getManifestContent() const; @@ -648,13 +637,23 @@ public: const nlohmann::json & getManifestJson() const; /** - * Get a zone's store path, handling dirty detection and lazy mounting. - * - * For clean zones with lazy-trees enabled: mounts accessor lazily - * For dirty zones: currently eager-copies from checkout (extension point) - * For lazy-trees disabled: eager-copies from git + * Get the repo-wide source accessor with dirty overlay. + * All file reads go through this single accessor, producing + * repo-relative paths for tracking. + */ + ref getRepoAccessor(); + + /** + * Lazily mount the repo-wide accessor and return the virtual store path. + * All repo reads go through this mount so file accesses are tracked. + */ + StorePath mountRepoAccessor(); + + /** + * Get a filesystem path for a repo-relative subtree. + * Returns a subpath within the mounted repo accessor. */ - StorePath getZoneStorePath(std::string_view zonePath); + std::string getRepoSubtreePath(std::string_view repoRelPath); /** * Return a `SourcePath` that refers to `path` in the root diff --git a/src/libexpr/parallel-eval.cc b/src/libexpr/parallel-eval.cc index d63e931845e5..2c8984d6e928 100644 --- a/src/libexpr/parallel-eval.cc +++ b/src/libexpr/parallel-eval.cc @@ -115,6 +115,9 @@ std::vector> Executor::spawn(std::vector> futures; { @@ -125,7 +128,16 @@ std::vector> Executor::spawn(std::vector dist(0, 1ULL << 48); auto key = (uint64_t(item.second) << 48) | dist(rd); - state->queue.emplace(key, Item{.promise = std::move(promise), .work = std::move(item.first)}); + + // Wrap work to propagate tracking context into worker thread. + auto wrappedWork = [parentTrackingCtx, work = std::move(item.first)]() { + auto prev = currentTrackingContext; + currentTrackingContext = parentTrackingCtx; + work(); + currentTrackingContext = prev; + }; + + state->queue.emplace(key, Item{.promise = std::move(promise), .work = std::move(wrappedWork)}); } } diff --git a/src/libexpr/primops/tectonix.cc b/src/libexpr/primops/tectonix.cc index 78e286e9eb1e..868ec355f7cd 100644 --- a/src/libexpr/primops/tectonix.cc +++ b/src/libexpr/primops/tectonix.cc @@ -4,6 +4,7 @@ #include "nix/fetchers/git-utils.hh" #include "nix/store/store-api.hh" #include "nix/fetchers/fetch-to-store.hh" +#include "nix/util/processes.hh" #include #include @@ -16,18 +17,9 @@ static const nlohmann::json & getManifest(EvalState & state) return state.getManifestJson(); } -// Helper to validate that a zone path exists in the manifest -static void validateZonePath(EvalState & state, const PosIdx pos, std::string_view zonePath) -{ - auto & manifest = getManifest(state); - if (!manifest.contains(std::string(zonePath))) - state.error("'%s' is not a zone root (must be an exact path from the manifest)", zonePath) - .atPos(pos).debugThrow(); -} - // ============================================================================ -// builtins.worldManifest -// Returns path -> zone metadata mapping from //.meta/manifest.json +// builtins.unsafeTectonixInternalManifest +// Returns path -> metadata mapping from //.meta/manifest.json // ============================================================================ static void prim_worldManifest(EvalState & state, const PosIdx pos, Value ** args, Value & v) { @@ -36,12 +28,12 @@ static void prim_worldManifest(EvalState & state, const PosIdx pos, Value ** arg auto attrs = state.buildBindings(json.size()); for (auto & [path, value] : json.items()) { if (!value.contains("id") || !value.at("id").is_string()) - throw Error("zone '%s' in manifest has missing or non-string 'id' field", path); + throw Error("path '%s' in manifest has missing or non-string 'id' field", path); auto idStr = value.at("id").get(); - auto zoneAttrs = state.buildBindings(1); - zoneAttrs.alloc("id").mkString(idStr, state.mem); - attrs.alloc(state.symbols.create(path)).mkAttrs(zoneAttrs); + auto entryAttrs = state.buildBindings(1); + entryAttrs.alloc("id").mkString(idStr, state.mem); + attrs.alloc(state.symbols.create(path)).mkAttrs(entryAttrs); } v.mkAttrs(attrs); } @@ -50,7 +42,7 @@ static RegisterPrimOp primop_worldManifest({ .name = "__unsafeTectonixInternalManifest", .args = {}, .doc = R"( - Get the world manifest as a Nix attrset mapping zone paths to zone metadata. + Get the world manifest as a Nix attrset mapping paths to metadata. Example: `builtins.unsafeTectonixInternalManifest."//areas/tools/dev".id` returns `"W-123456"`. @@ -61,24 +53,23 @@ static RegisterPrimOp primop_worldManifest({ }); // ============================================================================ -// builtins.worldManifestInverted -// Returns zoneId -> path mapping (inverse of worldManifest) +// builtins.unsafeTectonixInternalManifestInverted +// Returns id -> path mapping (inverse of manifest) // ============================================================================ static void prim_worldManifestInverted(EvalState & state, const PosIdx pos, Value ** args, Value & v) { auto json = getManifest(state); - // Track seen IDs to detect duplicates std::set seenIds; auto attrs = state.buildBindings(json.size()); for (auto & [path, value] : json.items()) { if (!value.contains("id") || !value.at("id").is_string()) - throw Error("zone '%s' in manifest has missing or non-string 'id' field", path); + throw Error("path '%s' in manifest has missing or non-string 'id' field", path); auto idStr = value.at("id").get(); if (!seenIds.insert(idStr).second) - throw Error("duplicate zone ID '%s' in manifest (zone '%s')", idStr, path); + throw Error("duplicate ID '%s' in manifest (path '%s')", idStr, path); attrs.alloc(state.symbols.create(idStr)).mkString(path, state.mem); } @@ -89,7 +80,7 @@ static RegisterPrimOp primop_worldManifestInverted({ .name = "__unsafeTectonixInternalManifestInverted", .args = {}, .doc = R"( - Get the inverted world manifest as a Nix attrset mapping zone IDs to zone paths. + Get the inverted world manifest as a Nix attrset mapping IDs to paths. Example: `builtins.unsafeTectonixInternalManifestInverted."W-123456"` returns `"//areas/tools/dev"`. @@ -119,7 +110,7 @@ static RegisterPrimOp primop_unsafeTectonixInternalTreeSha({ Get the git tree SHA for a path in the world repository. Example: `builtins.unsafeTectonixInternalTreeSha "//areas/tools/tec"` returns the tree SHA - for that zone. + for that path. Uses `--tectonix-git-dir` (defaults to `~/world/git`) and requires `--tectonix-git-sha` to be set. @@ -143,9 +134,8 @@ static void prim_unsafeTectonixInternalTree(EvalState & state, const PosIdx pos, state.error("tree SHA '%s' not found in world repository", treeSha) .atPos(pos).debugThrow(); - // exportIgnore=false: This is raw tree access by SHA, used for low-level operations. - // Unlike zone accessors (which use exportIgnore=true to honor .gitattributes for - // filtered zone content), this provides unfiltered access to exact tree contents. + // exportIgnore=false: Raw tree access by SHA for low-level operations. + // Provides unfiltered access to exact tree contents. GitAccessorOptions opts{.exportIgnore = false, .smudgeLfs = false}; auto accessor = repo->getAccessor(hash, opts, "world-tree"); @@ -173,33 +163,25 @@ static RegisterPrimOp primop_unsafeTectonixInternalTree({ }); // ============================================================================ -// builtins.unsafeTectonixInternalZoneSrc zonePath -// Returns a store path containing the zone source -// With lazy-trees enabled, returns a virtual store path that is only -// materialized when used as a derivation input. +// builtins.unsafeTectonixInternalZoneSrc repoPath +// Returns a path within the mounted repo accessor for a repo-relative path. // ============================================================================ static void prim_unsafeTectonixInternalZoneSrc(EvalState & state, const PosIdx pos, Value ** args, Value & v) { - auto zonePath = state.forceStringNoCtx(*args[0], pos, - "while evaluating the 'zonePath' argument to builtins.unsafeTectonixInternalZoneSrc"); - - validateZonePath(state, pos, zonePath); + auto repoPath = state.forceStringNoCtx(*args[0], pos, + "while evaluating the 'repoPath' argument to builtins.unsafeTectonixInternalZoneSrc"); - auto storePath = state.getZoneStorePath(zonePath); - state.allowAndSetStorePathString(storePath, v); + auto path = state.getRepoSubtreePath(repoPath); + v.mkString(path, state.mem); } static RegisterPrimOp primop_unsafeTectonixInternalZoneSrc({ .name = "__unsafeTectonixInternalZoneSrc", - .args = {"zonePath"}, + .args = {"repoPath"}, .doc = R"( - Get the source of a zone as a store path. - - With `lazy-trees = true`, returns a virtual store path that is only - materialized when used as a derivation input (devirtualized). + Get a subpath within the mounted repo accessor for a repo-relative path. - In source-available mode with uncommitted changes, uses checkout content - (always eager for dirty zones). + Returns a virtual store path backed by the repo accessor (git + dirty overlay). Example: `builtins.unsafeTectonixInternalZoneSrc "//areas/tools/tec"` @@ -211,11 +193,42 @@ static RegisterPrimOp primop_unsafeTectonixInternalZoneSrc({ // ============================================================================ // builtins.unsafeTectonixInternalSparseCheckoutRoots -// Returns list of zone IDs in sparse checkout +// Returns list of IDs in sparse checkout. +// Self-contained: reads directly from the git info file. // ============================================================================ static void prim_unsafeTectonixInternalSparseCheckoutRoots(EvalState & state, const PosIdx pos, Value ** args, Value & v) { - auto & roots = state.getTectonixSparseCheckoutRoots(); + std::set roots; + + if (state.isTectonixSourceAvailable()) { + auto checkoutPath = state.settings.tectonixCheckoutPath.get(); + auto dotGitPath = std::filesystem::path(checkoutPath) / ".git"; + std::filesystem::path gitDir; + + if (std::filesystem::is_directory(dotGitPath)) { + gitDir = dotGitPath; + } else if (std::filesystem::is_regular_file(dotGitPath)) { + auto content = readFile(dotGitPath.string()); + if (hasPrefix(content, "gitdir: ")) { + auto path = trim(content.substr(8)); + gitDir = std::filesystem::path(path); + if (gitDir.is_relative()) + gitDir = std::filesystem::path(checkoutPath) / gitDir; + } + } + + if (!gitDir.empty()) { + auto sparseRootsPath = gitDir / "info" / "sparse-checkout-roots"; + if (std::filesystem::exists(sparseRootsPath)) { + auto content = readFile(sparseRootsPath.string()); + for (auto & line : tokenizeString>(content, "\n")) { + auto trimmed = trim(line); + if (!trimmed.empty()) + roots.insert(std::string(trimmed)); + } + } + } + } auto list = state.buildList(roots.size()); size_t i = 0; @@ -229,13 +242,11 @@ static RegisterPrimOp primop_unsafeTectonixInternalSparseCheckoutRoots({ .name = "__unsafeTectonixInternalSparseCheckoutRoots", .args = {}, .doc = R"( - Get the list of zone IDs that are in the sparse checkout. + Get the list of IDs in the sparse checkout. Returns an empty list if not in source-available mode or if no sparse-checkout-roots file exists. - Example: `builtins.unsafeTectonixInternalSparseCheckoutRoots` returns `["W-000000" "W-1337af" ...]`. - Requires `--tectonix-checkout-path` to be set. )", .fun = prim_unsafeTectonixInternalSparseCheckoutRoots, @@ -243,15 +254,76 @@ static RegisterPrimOp primop_unsafeTectonixInternalSparseCheckoutRoots({ // ============================================================================ // builtins.unsafeTectonixInternalDirtyZones -// Returns map of zone paths to dirty status +// Returns map of paths to dirty status. +// Self-contained: runs git status and maps to manifest paths. // ============================================================================ static void prim_unsafeTectonixInternalDirtyZones(EvalState & state, const PosIdx pos, Value ** args, Value & v) { - auto & dirtyZones = state.getTectonixDirtyZones(); + // Reuse SparseCheckoutRoots to find which IDs are checked out + Value sparseRootsVal; + prim_unsafeTectonixInternalSparseCheckoutRoots(state, pos, nullptr, sparseRootsVal); + + std::set sparseRoots; + for (auto elem : sparseRootsVal.listView()) + sparseRoots.insert(std::string(elem->string_view())); + + if (sparseRoots.empty()) { + auto emptyBindings = state.buildBindings(0); + v.mkAttrs(emptyBindings); + return; + } + + auto & manifest = getManifest(state); + + // Map IDs to manifest paths for checked-out entries + struct DirtyInfo { bool dirty = false; }; + std::map dirtyMap; // manifest path -> dirty status - auto attrs = state.buildBindings(dirtyZones.size()); - for (const auto & [zonePath, info] : dirtyZones) { - attrs.alloc(state.symbols.create(zonePath)).mkBool(info.dirty); + for (auto & [path, value] : manifest.items()) { + if (!value.contains("id") || !value.at("id").is_string()) continue; + auto & id = value.at("id").get_ref(); + if (sparseRoots.count(id)) + dirtyMap[path] = {}; + } + + // Run git status to find dirty files + auto checkoutPath = state.settings.tectonixCheckoutPath.get(); + try { + auto gitStatusOutput = runProgram("git", true, {"-C", checkoutPath, "status", "--porcelain", "-z"}); + + size_t gitPos = 0; + while (gitPos < gitStatusOutput.size()) { + auto nulPos = gitStatusOutput.find('\0', gitPos); + if (nulPos == std::string::npos) break; + auto entry = gitStatusOutput.substr(gitPos, nulPos - gitPos); + gitPos = nulPos + 1; + if (entry.size() < 4) continue; + + char xy0 = entry[0]; + std::string rawPath = "/" + entry.substr(3); + + // Skip rename/copy source paths + if (xy0 == 'R' || xy0 == 'C') { + auto nextNul = gitStatusOutput.find('\0', gitPos); + if (nextNul != std::string::npos) gitPos = nextNul + 1; + } + + // Check if this dirty file belongs to a checked-out manifest path + for (auto & [manifestPath, info] : dirtyMap) { + std::string normalized = manifestPath; + if (hasPrefix(normalized, "//")) + normalized = "/" + normalized.substr(2); + if (hasPrefix(rawPath, normalized + "/") || rawPath == normalized) { + info.dirty = true; + break; + } + } + } + } catch (...) {} + + auto attrs = state.buildBindings(dirtyMap.size()); + for (const auto & [path, info] : dirtyMap) { + attrs.alloc(state.symbols.create(path)).mkBool(info.dirty); } v.mkAttrs(attrs); } @@ -260,14 +332,10 @@ static RegisterPrimOp primop_unsafeTectonixInternalDirtyZones({ .name = "__unsafeTectonixInternalDirtyZones", .args = {}, .doc = R"( - Get the dirty status of zones in the sparse checkout. - - Returns an attrset mapping zone paths to booleans indicating whether - the zone has uncommitted changes. - - Only includes zones that are in the sparse checkout. + Get the dirty status of paths in the sparse checkout. - Example: `builtins.unsafeTectonixInternalDirtyZones."//areas/tools/dev"` returns `true` or `false`. + Returns an attrset mapping manifest paths to booleans indicating whether + the path has uncommitted changes. Requires `--tectonix-checkout-path` to be set. )", @@ -275,82 +343,346 @@ static RegisterPrimOp primop_unsafeTectonixInternalDirtyZones({ }); // ============================================================================ -// builtins.__unsafeTectonixInternalZoneIsDirty zonePath -// Returns whether a given zone is dirty in the checkout +// builtins.__unsafeTectonixInternalZoneIsDirty repoPath +// Returns whether a given path is dirty in the checkout // ============================================================================ static void prim_unsafeTectonixInternalZoneIsDirty(EvalState & state, const PosIdx pos, Value ** args, Value & v) { - auto zonePath = state.forceStringNoCtx(*args[0], pos, - "while evaluating the 'zonePath' argument to builtins.__unsafeTectonixInternalZoneIsDirty"); + auto repoPath = state.forceStringNoCtx(*args[0], pos, + "while evaluating the 'repoPath' argument to builtins.__unsafeTectonixInternalZoneIsDirty"); - validateZonePath(state, pos, zonePath); + if (!state.isTectonixSourceAvailable()) { + v.mkBool(false); + return; + } + + // Check dirty status via git status + auto checkoutPath = state.settings.tectonixCheckoutPath.get(); + std::string normalizedPath(repoPath); + if (hasPrefix(normalizedPath, "//")) + normalizedPath = normalizedPath.substr(2); bool isDirty = false; - if (state.isTectonixSourceAvailable()) { - auto & dirtyZones = state.getTectonixDirtyZones(); - auto it = dirtyZones.find(std::string(zonePath)); - isDirty = it != dirtyZones.end() && it->second.dirty; - } + try { + auto gitStatusOutput = runProgram("git", true, {"-C", checkoutPath, "status", "--porcelain", "-z", "--", normalizedPath}); + isDirty = !gitStatusOutput.empty(); + } catch (...) {} v.mkBool(isDirty); } static RegisterPrimOp primop_unsafeTectonixInternalZoneIsDirty({ .name = "__unsafeTectonixInternalZoneIsDirty", - .args = {"zonePath"}, + .args = {"repoPath"}, .doc = R"( - Get whether a zone is in the sparse checkout and whether it is dirty. + Get whether a repo path has uncommitted changes. Example: `builtins.unsafeTectonixInternalZoneIsDirty "//areas/tools/tec"` - Uses `--tectonix-git-dir` (defaults to `~/world/git`). + Requires `--tectonix-checkout-path` to be set. )", .fun = prim_unsafeTectonixInternalZoneIsDirty, }); // ============================================================================ -// builtins.__unsafeTectonixInternalZoneRoot zonePath -// Returns an zone root path in sparse checkout +// builtins.__unsafeTectonixInternalZoneRoot repoPath +// Returns a checkout path for a repo-relative path // ============================================================================ static void prim_unsafeTectonixInternalZoneRoot(EvalState & state, const PosIdx pos, Value ** args, Value & v) { - auto zonePath = state.forceStringNoCtx(*args[0], pos, - "while evaluating the 'zonePath' argument to builtins.__unsafeTectonixInternalZoneRoot"); - - validateZonePath(state, pos, zonePath); + auto repoPath = state.forceStringNoCtx(*args[0], pos, + "while evaluating the 'repoPath' argument to builtins.__unsafeTectonixInternalZoneRoot"); - std::string zone(zonePath); - if (hasPrefix(zone, "//")) - zone = zone.substr(2); + std::string normalized(repoPath); + if (hasPrefix(normalized, "//")) + normalized = normalized.substr(2); auto checkoutPath = state.settings.tectonixCheckoutPath.get(); - auto fullPath = std::filesystem::path(checkoutPath) / zone; + auto fullPath = std::filesystem::path(checkoutPath) / normalized; if (std::filesystem::exists(fullPath) && !state.settings.pureEval) { v.mkString(fullPath.string(), state.mem); } else { - // Zone not accessible in checkout v.mkNull(); } } static RegisterPrimOp primop_unsafeTectonixInternalZoneRoot({ .name = "__unsafeTectonixInternalZoneRoot", - .args = {"zonePath"}, + .args = {"repoPath"}, .doc = R"( - Get the root of a zone in sparse checkout, if available. + Get the checkout path for a repo-relative path, if available. - With `lazy-trees = true`, returns a virtual store path that is only - materialized when used as a derivation input (devirtualized). - - In source-available mode with uncommitted changes, uses checkout content - (always eager for dirty zones). + Returns null if the path doesn't exist in the checkout or if in pure eval mode. Example: `builtins.unsafeTectonixInternalZoneRoot "//areas/tools/tec"` - Uses `--tectonix-git-dir` (defaults to `~/world/git`). + Requires `--tectonix-checkout-path` to be set. )", .fun = prim_unsafeTectonixInternalZoneRoot, }); +// ============================================================================ +// Shared helpers for tecnixTargets / tecnixDependencies +// ============================================================================ + +/** + * Resolve the git SHA to use: explicit gitSha attr > checkout HEAD > error. + */ +static std::string resolveGitSha(EvalState & state, const PosIdx pos, + const std::string & gitDir, const Bindings & attrs, + const std::string & checkoutPath) +{ + // Check for explicit gitSha attr + auto gitShaAttr = attrs.get(state.symbols.create("gitSha")); + if (gitShaAttr) { + auto sha = state.forceStringNoCtx(*gitShaAttr->value, pos, + "while evaluating the 'gitSha' argument"); + if (!sha.empty()) + return std::string(sha); + } + + // Try to read HEAD from checkout + if (!checkoutPath.empty()) { + try { + auto headOutput = runProgram("git", true, {"-C", checkoutPath, "rev-parse", "HEAD"}); + auto trimmed = trim(headOutput); + if (!trimmed.empty()) + return trimmed; + } catch (...) {} + } + + state.error("could not determine git SHA: set 'gitSha' or provide a valid 'checkoutPath'") + .atPos(pos).debugThrow(); +} + +/** + * Parse the common attributes shared by tecnixTargets and tecnixDependencies. + */ +struct TecnixArgs { + std::string gitDir; + std::string moduleSrc; + std::string gitSha; + std::string checkoutPath; + std::string system; + std::vector targets; +}; + +static TecnixArgs parseTecnixArgs(EvalState & state, const PosIdx pos, Value ** args) +{ + state.forceAttrs(*args[0], pos, "while evaluating the argument to tecnixTargets/tecnixDependencies"); + auto & attrs = *args[0]->attrs(); + + TecnixArgs result; + + // Required: gitDir + auto gitDirAttr = attrs.get(state.symbols.create("gitDir")); + if (!gitDirAttr) + state.error("'gitDir' attribute required").atPos(pos).debugThrow(); + result.gitDir = std::string(state.forceStringNoCtx(*gitDirAttr->value, pos, + "while evaluating the 'gitDir' argument")); + + // Required: moduleSrc + auto moduleSrcAttr = attrs.get(state.symbols.create("moduleSrc")); + if (!moduleSrcAttr) + state.error("'moduleSrc' attribute required").atPos(pos).debugThrow(); + result.moduleSrc = std::string(state.forceStringNoCtx(*moduleSrcAttr->value, pos, + "while evaluating the 'moduleSrc' argument")); + + // Required: targets + auto targetsAttr = attrs.get(state.symbols.create("targets")); + if (!targetsAttr) + state.error("'targets' attribute required").atPos(pos).debugThrow(); + state.forceList(*targetsAttr->value, pos, "while evaluating the 'targets' argument"); + for (auto elem : targetsAttr->value->listView()) { + auto target = state.forceStringNoCtx(*elem, pos, + "while evaluating a target string"); + result.targets.push_back(std::string(target)); + } + + // Optional: checkoutPath + auto checkoutPathAttr = attrs.get(state.symbols.create("checkoutPath")); + if (checkoutPathAttr) + result.checkoutPath = std::string(state.forceStringNoCtx(*checkoutPathAttr->value, pos, + "while evaluating the 'checkoutPath' argument")); + + // Required: system + auto systemAttr = attrs.get(state.symbols.create("system")); + if (!systemAttr) + state.error("'system' attribute required").atPos(pos).debugThrow(); + result.system = std::string(state.forceStringNoCtx(*systemAttr->value, pos, + "while evaluating the 'system' argument")); + + // Resolve gitSha + result.gitSha = resolveGitSha(state, pos, result.gitDir, attrs, result.checkoutPath); + + return result; +} + +/** + * Import moduleSrc/resolve.nix from the git repo and return the `resolve` + * function from its attrset. + * + * This sets the tectonix eval settings (gitDir, gitSha, checkoutPath) so that + * existing tectonix builtins work during module evaluation without requiring + * CLI flags. + */ +static Value & getResolveFunction(EvalState & state, const PosIdx pos, + const TecnixArgs & tArgs) +{ + // Set tectonix settings so existing builtins work during module evaluation. + // NOTE: EvalSettings are normally immutable; we const_cast here because + // the tecnix builtins need to configure the evaluator for the repo they're + // pointed at. This is safe because we're in single-threaded primop dispatch. + auto & mutableSettings = const_cast(state.settings); + mutableSettings.tectonixGitDir.assign(tArgs.gitDir); + mutableSettings.tectonixGitSha.assign(tArgs.gitSha); + if (!tArgs.checkoutPath.empty()) + mutableSettings.tectonixCheckoutPath.assign(tArgs.checkoutPath); + + // Get moduleSrc path from the lazily-mounted repo accessor. + auto moduleSrcPath = state.getRepoSubtreePath(tArgs.moduleSrc); + auto modulePath = SourcePath(state.rootFS, CanonPath(moduleSrcPath + "/resolve.nix")); + + // Import resolve.nix (a function taking { system }) and call it + auto * moduleFn = state.allocValue(); + state.evalFile(modulePath, *moduleFn); + + auto * moduleArgs = state.allocValue(); + auto argBindings = state.buildBindings(1); + argBindings.alloc("system").mkString(tArgs.system, state.mem); + moduleArgs->mkAttrs(argBindings); + + auto * moduleVal = state.allocValue(); + state.callFunction(*moduleFn, *moduleArgs, *moduleVal, pos); + state.forceAttrs(*moduleVal, pos, "while evaluating tecnix module"); + + // Get resolve function + auto resolveAttr = moduleVal->attrs()->get(state.symbols.create("resolve")); + if (!resolveAttr) + state.error("tecnix module must have a 'resolve' attribute") + .atPos(pos).debugThrow(); + + state.forceFunction(*resolveAttr->value, pos, "while evaluating the 'resolve' attribute of tecnix module"); + return *resolveAttr->value; +} + +// ============================================================================ +// builtins.tecnixTargets { gitDir, moduleSrc, targets, system, ... } +// Resolves targets via module contract, returns list of derivations. +// Tracks file accesses per target. +// ============================================================================ +static void prim_tecnixTargets(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto tArgs = parseTecnixArgs(state, pos, args); + auto & resolveFn = getResolveFunction(state, pos, tArgs); + + auto list = state.buildList(tArgs.targets.size()); + for (size_t i = 0; i < tArgs.targets.size(); i++) { + TrackingContext trackingCtx; + auto prevCtx = currentTrackingContext; + currentTrackingContext = &trackingCtx; + + auto * targetArg = state.allocValue(); + targetArg->mkString(tArgs.targets[i], state.mem); + list[i] = state.allocValue(); + state.callFunction(const_cast(resolveFn), *targetArg, *list[i], pos); + + currentTrackingContext = prevCtx; + } + v.mkList(list); +} + +static RegisterPrimOp primop_tecnixTargets({ + .name = "__tecnixTargets", + .args = {"attrs"}, + .doc = R"( + Resolve tecnix targets via a module contract. Returns a list of values + (typically derivations), one per target. + + Takes an attrset with: + - `targets`: list of target strings + - `gitDir`: path to bare git directory + - `moduleSrc`: repo-relative path to directory with resolve.nix + - `system`: system string (e.g. "aarch64-darwin") + - `gitSha` (optional): explicit commit SHA + - `checkoutPath` (optional): checkout path for dirty file detection + )", + .fun = prim_tecnixTargets, +}); + +// ============================================================================ +// builtins.tecnixDependencies { gitDir, moduleSrc, targets, system, ... } +// Same inputs as tecnixTargets, returns JSON of accessed paths per target. +// ============================================================================ +static void prim_tecnixDependencies(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto tArgs = parseTecnixArgs(state, pos, args); + auto & resolveFn = getResolveFunction(state, pos, tArgs); + + nlohmann::json result; + + for (auto & target : tArgs.targets) { + TrackingContext trackingCtx; + auto prevCtx = currentTrackingContext; + currentTrackingContext = &trackingCtx; + + auto * targetArg = state.allocValue(); + targetArg->mkString(target, state.mem); + auto * resolveResult = state.allocValue(); + state.callFunction(const_cast(resolveFn), *targetArg, *resolveResult, pos); + + // Force the value to WHNF first. + state.forceValue(*resolveResult, pos); + + // If the result is a derivation, force drvPath to trigger + // derivationStrict. This is critical: derivationStrict evaluates + // all derivation inputs (src, buildInputs, etc.), which triggers + // builtins.readFile calls (e.g. for Gemfile.lock, .ruby-version) + // that must be tracked. + if (resolveResult->type() == nAttrs) { + auto drvPathAttr = resolveResult->attrs()->get(state.symbols.create("drvPath")); + if (drvPathAttr) + state.forceValue(*drvPathAttr->value, pos); + } + + currentTrackingContext = prevCtx; + + // Collect accessed paths, collapsing children under directory roots. + // When a directory is imported as a unit (e.g. src = ./.), both the + // directory and its individual files get tracked. We only keep the + // directory since it subsumes all children. + std::vector paths; + trackingCtx.accessedPaths.visit_all([&](const std::string & p) { + paths.push_back(p); + }); + std::sort(paths.begin(), paths.end()); + + std::vector collapsed; + for (auto & p : paths) { + // After sorting, parent dirs come before their children. + // Skip any path that falls under the last kept directory. + if (!collapsed.empty() && p.starts_with(collapsed.back() + "/")) + continue; + collapsed.push_back(p); + } + result[target] = collapsed; + } + + auto resultStr = result.dump(); + v.mkString(resultStr, state.mem); +} + +static RegisterPrimOp primop_tecnixDependencies({ + .name = "__tecnixDependencies", + .args = {"attrs"}, + .doc = R"( + Discover dependencies for tecnix targets. Returns a JSON string mapping each + target to a list of paths accessed during its resolution. + + Takes the same attrset as `tecnixTargets`. + )", + .fun = prim_tecnixDependencies, +}); + } // namespace nix diff --git a/src/libutil/archive.cc b/src/libutil/archive.cc index 0291d6827290..d3be1d3790f3 100644 --- a/src/libutil/archive.cc +++ b/src/libutil/archive.cc @@ -34,8 +34,11 @@ static GlobalConfig::Register rArchiveSettings(&archiveSettings); PathFilter defaultPathFilter = [](const Path &) { return true; }; +thread_local int SourceAccessor::dumpPathDepth = 0; + void SourceAccessor::dumpPath(const CanonPath & path, Sink & sink, PathFilter & filter) { + dumpPathDepth++; auto dumpContents = [&](const CanonPath & path) { sink << "contents"; std::optional size; @@ -99,6 +102,8 @@ void SourceAccessor::dumpPath(const CanonPath & path, Sink & sink, PathFilter & sink << ")"; }(path); + + dumpPathDepth--; } time_t dumpPathAndGetMtime(const Path & path, Sink & sink, PathFilter & filter) diff --git a/src/libutil/include/nix/util/source-accessor.hh b/src/libutil/include/nix/util/source-accessor.hh index 1006895b33c0..ca1330add23c 100644 --- a/src/libutil/include/nix/util/source-accessor.hh +++ b/src/libutil/include/nix/util/source-accessor.hh @@ -138,6 +138,13 @@ struct SourceAccessor : std::enable_shared_from_this virtual void dumpPath(const CanonPath & path, Sink & sink, PathFilter & filter = defaultPathFilter); + /** + * Depth counter for dumpPath calls. Used by tracking infrastructure + * to suppress individual file tracking during NAR serialization + * (store copy), since the directory-level fingerprint is sufficient. + */ + static thread_local int dumpPathDepth; + Hash hashPath(const CanonPath & path, PathFilter & filter = defaultPathFilter, HashAlgorithm ha = HashAlgorithm::SHA256); From fba47c6955320e833b87aeceb75595219bc7bd21 Mon Sep 17 00:00:00 2001 From: Josh Heinrichs Date: Sat, 7 Mar 2026 12:52:25 -0600 Subject: [PATCH 09/12] Checkpoint --- misc/launchd/org.nixos.nix-daemon.plist.in | 2 + src/libexpr/eval.cc | 85 ++++++--- src/libexpr/include/nix/expr/primops.hh | 6 + src/libexpr/primops/tectonix.cc | 164 ++++++++++++++--- src/libfetchers/git-utils.cc | 15 ++ .../include/nix/fetchers/git-utils.hh | 3 + src/libstore/daemon.cc | 49 +++++ src/nix/build.cc | 174 +++++++++--------- 8 files changed, 360 insertions(+), 138 deletions(-) diff --git a/misc/launchd/org.nixos.nix-daemon.plist.in b/misc/launchd/org.nixos.nix-daemon.plist.in index 664608305e93..a04363fa6110 100644 --- a/misc/launchd/org.nixos.nix-daemon.plist.in +++ b/misc/launchd/org.nixos.nix-daemon.plist.in @@ -6,6 +6,8 @@ org.nixos.nix-daemon KeepAlive + ProcessType + Interactive RunAtLoad ProgramArguments diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 282e307b1ce6..a8a66298ad41 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -608,12 +608,15 @@ const nlohmann::json & EvalState::getManifestJson() const struct DirtyOverlaySourceAccessor : SourceAccessor { ref base, disk; + ref repo; + Hash rootTreeSha; boost::unordered_flat_set dirtyFiles, dirtyDirs; - mutable std::optional cachedFingerprint; + mutable std::unordered_map fingerprintCache; DirtyOverlaySourceAccessor( ref base, ref disk, + ref repo, Hash rootTreeSha, boost::unordered_flat_set && dirtyFiles) - : base(base), disk(disk), dirtyFiles(std::move(dirtyFiles)) + : base(base), disk(disk), repo(repo), rootTreeSha(rootTreeSha), dirtyFiles(std::move(dirtyFiles)) { for (auto & f : this->dirtyFiles) { for (auto p = CanonPath(f); !p.isRoot();) { @@ -648,7 +651,10 @@ struct DirtyOverlaySourceAccessor : SourceAccessor std::string readFile(const CanonPath & path) override { - trackAccess(path); + // Suppress tracking during NAR serialization (dumpPath) — + // the directory's fingerprint already covers its contents. + if (dumpPathDepth == 0) + trackAccess(path); if (isDirty(path)) return disk->readFile(path); return base->readFile(path); } @@ -664,33 +670,54 @@ struct DirtyOverlaySourceAccessor : SourceAccessor std::pair> getFingerprint(const CanonPath & path) override { trackAccess(path); - if (cachedFingerprint) - return {path, *cachedFingerprint}; - - auto [subpath, baseFp] = base->getFingerprint(path); - if (!baseFp) - return {path, std::nullopt}; - - // Extend the base (git) fingerprint with a hash of dirty file - // paths and content. - HashSink hashSink{HashAlgorithm::SHA256}; - std::vector sorted(dirtyFiles.begin(), dirtyFiles.end()); - std::sort(sorted.begin(), sorted.end()); - for (auto & f : sorted) { - hashSink << f; - auto st = disk->maybeLstat(CanonPath(f)); - if (!st) { - hashSink << "D"; - } else if (st->type == Type::tRegular) { - hashSink << (st->isExecutable ? "X" : "F"); - hashSink << disk->readFile(CanonPath(f)); - } else if (st->type == Type::tSymlink) { - hashSink << "L"; - hashSink << disk->readLink(CanonPath(f)); + + auto key = path.abs(); + if (auto it = fingerprintCache.find(key); it != fingerprintCache.end()) + return {path, it->second}; + + // Get the git object SHA for this specific path (tree SHA for dirs, blob SHA for files). + // This is content-addressed: same content = same SHA regardless of commit. + std::string baseSha; + if (path.isRoot()) { + baseSha = rootTreeSha.gitRev(); + } else { + auto sha = repo->getPathSha(rootTreeSha, std::string(path.rel())); + if (!sha) + return {path, std::nullopt}; + baseSha = sha->gitRev(); + } + + // Collect dirty files under this path. + auto prefix = path.isRoot() ? "" : std::string(path.rel()) + "/"; + std::vector dirtyUnderPath; + for (auto & f : dirtyFiles) { + if (path.isRoot() || f.starts_with(prefix) || f == std::string(path.rel())) + dirtyUnderPath.push_back(f); + } + + std::string fp = "git:" + baseSha + ";e"; + + if (!dirtyUnderPath.empty()) { + std::sort(dirtyUnderPath.begin(), dirtyUnderPath.end()); + HashSink hashSink{HashAlgorithm::SHA256}; + for (auto & f : dirtyUnderPath) { + hashSink << f; + auto st = disk->maybeLstat(CanonPath(f)); + if (!st) { + hashSink << "D"; + } else if (st->type == Type::tRegular) { + hashSink << (st->isExecutable ? "X" : "F"); + hashSink << disk->readFile(CanonPath(f)); + } else if (st->type == Type::tSymlink) { + hashSink << "L"; + hashSink << disk->readLink(CanonPath(f)); + } } + fp += ";d=" + hashSink.finish().hash.to_string(HashFormat::Base16, false); } - cachedFingerprint = *baseFp + ";d=" + hashSink.finish().hash.to_string(HashFormat::Base16, false); - return {path, *cachedFingerprint}; + + fingerprintCache[key] = fp; + return {path, fp}; } DirEntries readDirectory(const CanonPath & path) override @@ -764,7 +791,7 @@ ref EvalState::getRepoAccessor() } catch (...) {} repoAccessor = make_ref( - baseAccessor, makeFSSourceAccessor(checkoutPath), std::move(dirtyFiles)); + baseAccessor, makeFSSourceAccessor(checkoutPath), repo, rootTreeSha, std::move(dirtyFiles)); } else { repoAccessor = baseAccessor; } diff --git a/src/libexpr/include/nix/expr/primops.hh b/src/libexpr/include/nix/expr/primops.hh index 8854f6b03847..3cb92a6409f0 100644 --- a/src/libexpr/include/nix/expr/primops.hh +++ b/src/libexpr/include/nix/expr/primops.hh @@ -38,4 +38,10 @@ void prim_exec(EvalState & state, const PosIdx pos, Value ** args, Value & v); void makePositionThunks(EvalState & state, const PosIdx pos, Value & line, Value & column); +/** + * Reconstruct a derivation Value from a .drv store path. + */ +void derivationToValue( + EvalState & state, const PosIdx pos, const SourcePath & path, const StorePath & storePath, Value & v); + } // namespace nix diff --git a/src/libexpr/primops/tectonix.cc b/src/libexpr/primops/tectonix.cc index 868ec355f7cd..d37482e11e81 100644 --- a/src/libexpr/primops/tectonix.cc +++ b/src/libexpr/primops/tectonix.cc @@ -2,6 +2,9 @@ #include "nix/expr/eval-inline.hh" #include "nix/expr/eval-settings.hh" #include "nix/fetchers/git-utils.hh" +#include "nix/fetchers/cache.hh" +#include "nix/fetchers/attrs.hh" +#include "nix/fetchers/fetch-settings.hh" #include "nix/store/store-api.hh" #include "nix/fetchers/fetch-to-store.hh" #include "nix/util/processes.hh" @@ -567,28 +570,123 @@ static Value & getResolveFunction(EvalState & state, const PosIdx pos, return *resolveAttr->value; } +// Collect tracked paths from a TrackingContext, collapsing children under +// directory roots (when a directory is imported as a unit via src = ./., +// both the directory and its files get tracked — keep only the directory). +static std::vector collectTrackedPaths(const TrackingContext & ctx) +{ + std::vector paths; + ctx.accessedPaths.visit_all([&](const std::string & p) { + paths.push_back(p); + }); + std::sort(paths.begin(), paths.end()); + + std::vector collapsed; + for (auto & p : paths) { + if (!collapsed.empty() && p.starts_with(collapsed.back() + "/")) + continue; + collapsed.push_back(p); + } + return collapsed; +} + +// Compute a content-addressed fingerprint from per-path fingerprints of tracked paths. +// Returns nullopt if any path can't be fingerprinted (e.g. deleted from git tree). +static std::optional computeDepsFp( + ref accessor, const std::vector & trackedPaths) +{ + HashSink hashSink(HashAlgorithm::SHA256); + for (auto & path : trackedPaths) { + auto [subpath, fp] = accessor->getFingerprint(CanonPath(path)); + if (!fp) return std::nullopt; + hashSink << path << *fp; + } + auto [hash, len] = hashSink.finish(); + return hash.to_string(HashFormat::Base16, false); +} + // ============================================================================ // builtins.tecnixTargets { gitDir, moduleSrc, targets, system, ... } // Resolves targets via module contract, returns list of derivations. -// Tracks file accesses per target. +// Tracks file accesses per target. Caches per-target by dependency fingerprint. // ============================================================================ static void prim_tecnixTargets(EvalState & state, const PosIdx pos, Value ** args, Value & v) { auto tArgs = parseTecnixArgs(state, pos, args); auto & resolveFn = getResolveFunction(state, pos, tArgs); + auto cache = state.fetchSettings.getCache(); + auto accessor = state.getRepoAccessor(); auto list = state.buildList(tArgs.targets.size()); for (size_t i = 0; i < tArgs.targets.size(); i++) { + auto & target = tArgs.targets[i]; + + // Check cached dependency graph + fetchers::Cache::Key graphKey = {"tecnixGraph", { + {"target", target}, {"gitDir", tArgs.gitDir}, + {"moduleSrc", tArgs.moduleSrc}}}; + + if (auto cached = cache->lookup(graphKey)) { + auto trackedPaths = nlohmann::json::parse( + fetchers::getStrAttr(*cached, "paths")).get>(); + auto depsFp = computeDepsFp(accessor, trackedPaths); + if (depsFp) { + fetchers::Cache::Key resultKey = {"tecnixResult", { + {"target", target}, {"gitDir", tArgs.gitDir}, + {"moduleSrc", tArgs.moduleSrc}, {"depsFp", *depsFp}}}; + if (auto cachedResult = cache->lookup(resultKey)) { + auto drvPathStr = fetchers::getStrAttr(*cachedResult, "drvPath"); + auto storePath = state.store->parseStorePath(drvPathStr); + if (state.store->isValidPath(storePath)) { + debug("tecnixTargets: cache hit for '%s' (depsFp=%s)", target, *depsFp); + list[i] = state.allocValue(); + derivationToValue(state, pos, + state.storePath(storePath), storePath, *list[i]); + continue; + } + } + } + } + + // Cache miss: full eval with tracking + debug("tecnixTargets: cache miss for '%s', evaluating", target); TrackingContext trackingCtx; auto prevCtx = currentTrackingContext; currentTrackingContext = &trackingCtx; auto * targetArg = state.allocValue(); - targetArg->mkString(tArgs.targets[i], state.mem); + targetArg->mkString(target, state.mem); list[i] = state.allocValue(); state.callFunction(const_cast(resolveFn), *targetArg, *list[i], pos); + // Force the value + drvPath to ensure all deps are tracked + state.forceValue(*list[i], pos); + std::string drvPathStr; + if (list[i]->type() == nAttrs) { + auto drvPathAttr = list[i]->attrs()->get(state.symbols.create("drvPath")); + if (drvPathAttr) { + state.forceValue(*drvPathAttr->value, pos); + NixStringContext ctx; + drvPathStr = state.coerceToString(pos, *drvPathAttr->value, ctx, + "while evaluating drvPath", true, false).toOwned(); + } + } + currentTrackingContext = prevCtx; + + // Store dependency graph + result + if (!drvPathStr.empty()) { + auto trackedPaths = collectTrackedPaths(trackingCtx); + cache->upsert(graphKey, {{"paths", nlohmann::json(trackedPaths).dump()}}); + + auto depsFp = computeDepsFp(accessor, trackedPaths); + if (depsFp) { + cache->upsert( + {"tecnixResult", {{"target", target}, {"gitDir", tArgs.gitDir}, + {"moduleSrc", tArgs.moduleSrc}, {"depsFp", *depsFp}}}, + {{"drvPath", drvPathStr}}); + } + } } v.mkList(list); } @@ -619,10 +717,36 @@ static void prim_tecnixDependencies(EvalState & state, const PosIdx pos, Value * { auto tArgs = parseTecnixArgs(state, pos, args); auto & resolveFn = getResolveFunction(state, pos, tArgs); + auto cache = state.fetchSettings.getCache(); + auto accessor = state.getRepoAccessor(); nlohmann::json result; for (auto & target : tArgs.targets) { + // Check cached dependency graph + fetchers::Cache::Key graphKey = {"tecnixGraph", { + {"target", target}, {"gitDir", tArgs.gitDir}, + {"moduleSrc", tArgs.moduleSrc}}}; + + if (auto cached = cache->lookup(graphKey)) { + auto trackedPaths = nlohmann::json::parse( + fetchers::getStrAttr(*cached, "paths")).get>(); + auto depsFp = computeDepsFp(accessor, trackedPaths); + if (depsFp) { + fetchers::Cache::Key resultKey = {"tecnixResult", { + {"target", target}, {"gitDir", tArgs.gitDir}, + {"moduleSrc", tArgs.moduleSrc}, {"depsFp", *depsFp}}}; + if (auto cachedResult = cache->lookup(resultKey)) { + debug("tecnixDependencies: cache hit for '%s' (depsFp=%s)", target, *depsFp); + result[target] = nlohmann::json::parse( + fetchers::getStrAttr(*cachedResult, "result")); + continue; + } + } + } + + // Cache miss: full eval with tracking + debug("tecnixDependencies: cache miss for '%s', evaluating", target); TrackingContext trackingCtx; auto prevCtx = currentTrackingContext; currentTrackingContext = &trackingCtx; @@ -632,14 +756,8 @@ static void prim_tecnixDependencies(EvalState & state, const PosIdx pos, Value * auto * resolveResult = state.allocValue(); state.callFunction(const_cast(resolveFn), *targetArg, *resolveResult, pos); - // Force the value to WHNF first. state.forceValue(*resolveResult, pos); - // If the result is a derivation, force drvPath to trigger - // derivationStrict. This is critical: derivationStrict evaluates - // all derivation inputs (src, buildInputs, etc.), which triggers - // builtins.readFile calls (e.g. for Gemfile.lock, .ruby-version) - // that must be tracked. if (resolveResult->type() == nAttrs) { auto drvPathAttr = resolveResult->attrs()->get(state.symbols.create("drvPath")); if (drvPathAttr) @@ -648,25 +766,19 @@ static void prim_tecnixDependencies(EvalState & state, const PosIdx pos, Value * currentTrackingContext = prevCtx; - // Collect accessed paths, collapsing children under directory roots. - // When a directory is imported as a unit (e.g. src = ./.), both the - // directory and its individual files get tracked. We only keep the - // directory since it subsumes all children. - std::vector paths; - trackingCtx.accessedPaths.visit_all([&](const std::string & p) { - paths.push_back(p); - }); - std::sort(paths.begin(), paths.end()); - - std::vector collapsed; - for (auto & p : paths) { - // After sorting, parent dirs come before their children. - // Skip any path that falls under the last kept directory. - if (!collapsed.empty() && p.starts_with(collapsed.back() + "/")) - continue; - collapsed.push_back(p); + auto trackedPaths = collectTrackedPaths(trackingCtx); + result[target] = trackedPaths; + + // Store dependency graph + result + cache->upsert(graphKey, {{"paths", nlohmann::json(trackedPaths).dump()}}); + + auto depsFp = computeDepsFp(accessor, trackedPaths); + if (depsFp) { + cache->upsert( + {"tecnixResult", {{"target", target}, {"gitDir", tArgs.gitDir}, + {"moduleSrc", tArgs.moduleSrc}, {"depsFp", *depsFp}}}, + {{"result", nlohmann::json(trackedPaths).dump()}}); } - result[target] = collapsed; } auto resultStr = result.dump(); diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index 9e79cdbff8d3..ccc53ff0d68f 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -622,6 +622,21 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this return toHash(*git_tree_entry_id(entry)); } + std::optional getPathSha(const Hash & treeSha, const std::string & relPath) override + { + auto oid = hashToOID(treeSha); + git_tree * rawTree = nullptr; + if (git_tree_lookup(&rawTree, *this, &oid)) + throw Error("looking up tree %s: %s", treeSha.gitRev(), git_error_last()->message); + Finally freeTree([&]() { git_tree_free(rawTree); }); + + git_tree_entry * entry = nullptr; + if (git_tree_entry_bypath(&entry, rawTree, relPath.c_str()) != 0) + return std::nullopt; + Finally freeEntry([&]() { git_tree_entry_free(entry); }); + return toHash(*git_tree_entry_id(entry)); + } + Hash getCommitTree(const Hash & commitSha) override { auto oid = hashToOID(commitSha); diff --git a/src/libfetchers/include/nix/fetchers/git-utils.hh b/src/libfetchers/include/nix/fetchers/git-utils.hh index fd14cab555b6..ff20d6fbe4a3 100644 --- a/src/libfetchers/include/nix/fetchers/git-utils.hh +++ b/src/libfetchers/include/nix/fetchers/git-utils.hh @@ -107,6 +107,9 @@ struct GitRepo /** Get the SHA of a subtree entry within a tree object */ virtual Hash getSubtreeSha(const Hash & treeSha, const std::string & entryName) = 0; + /** Get the SHA of any entry (blob or tree) by full relative path within a tree */ + virtual std::optional getPathSha(const Hash & treeSha, const std::string & relPath) = 0; + /** Get the root tree SHA from a commit SHA */ virtual Hash getCommitTree(const Hash & commitSha) = 0; diff --git a/src/libstore/daemon.cc b/src/libstore/daemon.cc index f71f66db5d87..7acbf7882351 100644 --- a/src/libstore/daemon.cc +++ b/src/libstore/daemon.cc @@ -25,8 +25,53 @@ #include +#if __APPLE__ +# include +# include +#endif + namespace nix::daemon { +#if __APPLE__ +class ScopedDaemonPriorityAdjustment +{ + std::optional oldPriority; + +public: + ScopedDaemonPriorityAdjustment(RecursiveFlag recursive, WorkerProto::Op op) + { + bool backgroundLikeOp = + recursive || op == WorkerProto::Op::CollectGarbage || op == WorkerProto::Op::OptimiseStore + || op == WorkerProto::Op::VerifyStore; + if (!backgroundLikeOp) + return; + + errno = 0; + auto currentPriority = getpriority(PRIO_PROCESS, 0); + if (currentPriority == -1 && errno != 0) + return; + + oldPriority = currentPriority; + + constexpr int backgroundPriority = 10; + if (currentPriority >= backgroundPriority) + return; + + if (setpriority(PRIO_PROCESS, 0, backgroundPriority) != 0) + oldPriority = std::nullopt; + } + + ~ScopedDaemonPriorityAdjustment() + { + if (!oldPriority) + return; + + /* Best-effort restoration; no-op on failure. */ + (void) setpriority(PRIO_PROCESS, 0, *oldPriority); + } +}; +#endif + Sink & operator<<(Sink & sink, const Logger::Fields & fields) { sink << fields.size(); @@ -310,6 +355,10 @@ static void performOp( WorkerProto::ReadConn rconn(conn); WorkerProto::WriteConn wconn(conn); +#if __APPLE__ + ScopedDaemonPriorityAdjustment daemonPriorityAdjustment(recursive, op); +#endif + switch (op) { case WorkerProto::Op::IsValidPath: { diff --git a/src/nix/build.cc b/src/nix/build.cc index 1dd42c50f8eb..c8c437a08b5a 100644 --- a/src/nix/build.cc +++ b/src/nix/build.cc @@ -7,6 +7,7 @@ #include "nix/fetchers/cache.hh" #include "nix/fetchers/attrs.hh" #include "nix/fetchers/fetch-settings.hh" +#include "nix/expr/eval.hh" #include "nix/util/hash.hh" #include "nix/util/processes.hh" #include "nix/util/file-system.hh" @@ -148,108 +149,115 @@ struct CmdBuild : InstallablesCommand, MixOutLinkByDefault, MixDryRun, MixJSON, ; } - std::optional computeEvalCacheKey( - const std::vector & rawInstallables) + bool canEvalCache() const { - if (!expr) return std::nullopt; - if (!evalSettings.pureEval) return std::nullopt; - std::string gitSha = evalSettings.tectonixGitSha; - if (gitSha.empty()) return std::nullopt; + return expr && evalSettings.pureEval + && !std::string(evalSettings.tectonixGitSha).empty(); + } + // Compute a base key from expr + args (stable across commits/dirty states). + std::string computeBaseKey(const std::vector & rawInstallables) + { HashSink hashSink(HashAlgorithm::SHA256); hashSink << *expr; for (auto & arg : rawInstallables) hashSink << arg; - hashSink << gitSha; - - // Include dirty file state - std::string checkoutPath = evalSettings.tectonixCheckoutPath; - if (!checkoutPath.empty()) { - try { - auto statusOutput = runProgram( - "git", true, {"-C", checkoutPath, "status", "--porcelain", "-z"}); - // Parse NUL-separated entries, sort for determinism - std::vector dirtyFiles; - size_t pos = 0; - while (pos < statusOutput.size()) { - auto nulPos = statusOutput.find('\0', pos); - if (nulPos == std::string::npos) break; - auto entry = statusOutput.substr(pos, nulPos - pos); - pos = nulPos + 1; - if (entry.size() < 4) continue; - char xy0 = entry[0]; - auto filePath = entry.substr(3); - dirtyFiles.push_back(filePath); - // Skip rename/copy source path - if (xy0 == 'R' || xy0 == 'C') { - auto nextNul = statusOutput.find('\0', pos); - if (nextNul != std::string::npos) { - dirtyFiles.push_back(statusOutput.substr(pos, nextNul - pos)); - pos = nextNul + 1; - } - } - } - std::sort(dirtyFiles.begin(), dirtyFiles.end()); - for (auto & f : dirtyFiles) { - hashSink << f; - auto fullPath = std::filesystem::path(checkoutPath) / f; - if (std::filesystem::exists(fullPath) && std::filesystem::is_regular_file(fullPath)) - hashSink << readFile(fullPath.string()); - } - } catch (...) { - // If git status fails, skip caching - return std::nullopt; - } - } + auto [hash, len] = hashSink.finish(); + return hash.to_string(HashFormat::SRI, true); + } + + // Compute a content-addressed fingerprint from per-path fingerprints of tracked paths. + std::optional computeDepsFp( + ref accessor, const nlohmann::json & trackedPaths) + { + std::vector sortedPaths; + for (auto & [path, _] : trackedPaths.items()) + sortedPaths.push_back(path); + std::sort(sortedPaths.begin(), sortedPaths.end()); + HashSink hashSink(HashAlgorithm::SHA256); + for (auto & path : sortedPaths) { + auto [subpath, fp] = accessor->getFingerprint(CanonPath(path)); + if (!fp) return std::nullopt; + hashSink << path << *fp; + } auto [hash, len] = hashSink.finish(); - return fetchers::Cache::Key{ - "evalResult", - {{"fingerprint", hash.to_string(HashFormat::SRI, true)}}}; + return hash.to_string(HashFormat::SRI, true); + } + + bool tryReturnCached(ref store, const fetchers::Cache::Key & key) + { + auto cached = fetchSettings.getCache()->lookup(key); + if (!cached) return false; + + auto outPathsStr = fetchers::getStrAttr(*cached, "outPaths"); + auto paths = tokenizeString>(outPathsStr, "\n"); + for (auto & p : paths) { + if (!store->maybeQueryPathInfo(store->parseStorePath(p))) + return false; + } + if (printOutputPaths) { + logger->stop(); + for (auto & p : paths) + logger->cout("%s", p); + } + return true; } void run(ref store, std::vector && rawInstallables) override { - // Try eval cache (only when --expr + --tectonix-git-sha are set, - // and not in dry-run or rebuild mode) - if (!dryRun && buildMode == bmNormal) { - auto cacheKey = computeEvalCacheKey(rawInstallables); - if (cacheKey) { - if (auto cached = fetchSettings.getCache()->lookup(*cacheKey)) { - auto outPathsStr = fetchers::getStrAttr(*cached, "outPaths"); - auto paths = tokenizeString>(outPathsStr, "\n"); - // Verify all store paths still exist - bool allExist = true; - for (auto & p : paths) { - if (!store->maybeQueryPathInfo(store->parseStorePath(p))) { - allExist = false; - break; - } - } - if (allExist) { - if (printOutputPaths) { - logger->stop(); - for (auto & p : paths) - logger->cout("%s", p); - } + if (!dryRun && buildMode == bmNormal && canEvalCache()) { + auto cache = fetchSettings.getCache(); + auto baseKey = computeBaseKey(rawInstallables); + auto & evalState = *getEvalState(); + auto accessor = evalState.getRepoAccessor(); + + // Check cached dependency graph: compute per-path fingerprints + // for only the tracked paths, ignoring unrelated dirty files. + fetchers::Cache::Key pathsKey = {"evalPaths", {{"baseKey", baseKey}}}; + if (auto cached = cache->lookup(pathsKey)) { + auto trackedPaths = nlohmann::json::parse( + fetchers::getStrAttr(*cached, "paths")); + auto depsFp = computeDepsFp(accessor, trackedPaths); + if (depsFp) { + fetchers::Cache::Key depsKey = {"evalResult", { + {"baseKey", baseKey}, {"depsFp", *depsFp}}}; + if (tryReturnCached(store, depsKey)) { + debug("eval cache: hit (content-addressed match)"); return; } } + } - // Normal flow: evaluate + build - InstallablesCommand::run(store, std::move(rawInstallables)); + // Cache miss: full eval with tracking + debug("eval cache: miss, running full eval with tracking"); + TrackingContext trackingCtx; + currentTrackingContext = &trackingCtx; + InstallablesCommand::run(store, std::move(rawInstallables)); + currentTrackingContext = nullptr; + + // Store tracked paths and result + if (!buildOutputPaths_.empty()) { + std::string outPaths; + for (auto & p : buildOutputPaths_) { + if (!outPaths.empty()) outPaths += '\n'; + outPaths += store->printStorePath(p); + } - // Cache output paths - if (!buildOutputPaths_.empty()) { - std::string outPaths; - for (auto & p : buildOutputPaths_) { - if (!outPaths.empty()) outPaths += '\n'; - outPaths += store->printStorePath(p); - } - fetchSettings.getCache()->upsert(*cacheKey, {{"outPaths", outPaths}}); + nlohmann::json trackedPaths; + trackingCtx.accessedPaths.visit_all([&](const auto & p) { + trackedPaths[p] = true; + }); + cache->upsert(pathsKey, {{"paths", trackedPaths.dump()}}); + + auto depsFp = computeDepsFp(accessor, trackedPaths); + if (depsFp) { + cache->upsert( + {"evalResult", {{"baseKey", baseKey}, {"depsFp", *depsFp}}}, + {{"outPaths", outPaths}}); } - return; } + return; } // Fallback: normal flow without caching From 95a3572a290fff470cb3992f2db002533f1635b9 Mon Sep 17 00:00:00 2001 From: Josh Heinrichs Date: Sat, 7 Mar 2026 13:52:12 -0600 Subject: [PATCH 10/12] Checkpoint --- src/libexpr/eval.cc | 11 +- src/libexpr/primops/tectonix.cc | 145 ++++++++++++++------------ src/nix/build.cc | 174 +++++++++++++++----------------- 3 files changed, 171 insertions(+), 159 deletions(-) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index a8a66298ad41..4d14c21d1b86 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -611,7 +611,7 @@ struct DirtyOverlaySourceAccessor : SourceAccessor ref repo; Hash rootTreeSha; boost::unordered_flat_set dirtyFiles, dirtyDirs; - mutable std::unordered_map fingerprintCache; + mutable boost::concurrent_flat_map fingerprintCache; DirtyOverlaySourceAccessor( ref base, ref disk, ref repo, Hash rootTreeSha, @@ -672,8 +672,11 @@ struct DirtyOverlaySourceAccessor : SourceAccessor trackAccess(path); auto key = path.abs(); - if (auto it = fingerprintCache.find(key); it != fingerprintCache.end()) - return {path, it->second}; + { + std::string cached; + if (fingerprintCache.visit(key, [&](const auto & entry) { cached = entry.second; })) + return {path, cached}; + } // Get the git object SHA for this specific path (tree SHA for dirs, blob SHA for files). // This is content-addressed: same content = same SHA regardless of commit. @@ -716,7 +719,7 @@ struct DirtyOverlaySourceAccessor : SourceAccessor fp += ";d=" + hashSink.finish().hash.to_string(HashFormat::Base16, false); } - fingerprintCache[key] = fp; + fingerprintCache.insert_or_assign(key, fp); return {path, fp}; } diff --git a/src/libexpr/primops/tectonix.cc b/src/libexpr/primops/tectonix.cc index d37482e11e81..e4799a84939c 100644 --- a/src/libexpr/primops/tectonix.cc +++ b/src/libexpr/primops/tectonix.cc @@ -617,50 +617,57 @@ static void prim_tecnixTargets(EvalState & state, const PosIdx pos, Value ** arg auto cache = state.fetchSettings.getCache(); auto accessor = state.getRepoAccessor(); + bool useCache = state.settings.pureEval; + auto list = state.buildList(tArgs.targets.size()); for (size_t i = 0; i < tArgs.targets.size(); i++) { auto & target = tArgs.targets[i]; - // Check cached dependency graph - fetchers::Cache::Key graphKey = {"tecnixGraph", { - {"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}}}; - - if (auto cached = cache->lookup(graphKey)) { - auto trackedPaths = nlohmann::json::parse( - fetchers::getStrAttr(*cached, "paths")).get>(); - auto depsFp = computeDepsFp(accessor, trackedPaths); - if (depsFp) { - fetchers::Cache::Key resultKey = {"tecnixResult", { - {"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}, {"depsFp", *depsFp}}}; - if (auto cachedResult = cache->lookup(resultKey)) { - auto drvPathStr = fetchers::getStrAttr(*cachedResult, "drvPath"); - auto storePath = state.store->parseStorePath(drvPathStr); - if (state.store->isValidPath(storePath)) { - debug("tecnixTargets: cache hit for '%s' (depsFp=%s)", target, *depsFp); - list[i] = state.allocValue(); - derivationToValue(state, pos, - state.storePath(storePath), storePath, *list[i]); - continue; + if (useCache) { + // Check cached dependency graph + fetchers::Cache::Key graphKey = {"tecnixTargetsGraph", { + {"target", target}, {"gitDir", tArgs.gitDir}, + {"moduleSrc", tArgs.moduleSrc}}}; + + if (auto cached = cache->lookup(graphKey)) { + auto trackedPaths = nlohmann::json::parse( + fetchers::getStrAttr(*cached, "paths")).get>(); + auto depsFp = computeDepsFp(accessor, trackedPaths); + if (depsFp) { + fetchers::Cache::Key resultKey = {"tecnixTargetsResult", { + {"target", target}, {"gitDir", tArgs.gitDir}, + {"moduleSrc", tArgs.moduleSrc}, {"depsFp", *depsFp}}}; + if (auto cachedResult = cache->lookup(resultKey)) { + auto drvPathStr = fetchers::getStrAttr(*cachedResult, "drvPath"); + auto storePath = state.store->parseStorePath(drvPathStr); + if (state.store->isValidPath(storePath)) { + debug("tecnixTargets: cache hit for '%s' (depsFp=%s)", target, *depsFp); + list[i] = state.allocValue(); + derivationToValue(state, pos, + state.storePath(storePath), storePath, *list[i]); + continue; + } } } } } - // Cache miss: full eval with tracking - debug("tecnixTargets: cache miss for '%s', evaluating", target); + // Eval with tracking + debug("tecnixTargets: evaluating '%s'", target); TrackingContext trackingCtx; auto prevCtx = currentTrackingContext; - currentTrackingContext = &trackingCtx; + if (useCache) + currentTrackingContext = &trackingCtx; auto * targetArg = state.allocValue(); targetArg->mkString(target, state.mem); list[i] = state.allocValue(); state.callFunction(const_cast(resolveFn), *targetArg, *list[i], pos); - - // Force the value + drvPath to ensure all deps are tracked state.forceValue(*list[i], pos); + + currentTrackingContext = prevCtx; + + // Force drvPath (outside tracking — triggers deep eval / fetchToStore) std::string drvPathStr; if (list[i]->type() == nAttrs) { auto drvPathAttr = list[i]->attrs()->get(state.symbols.create("drvPath")); @@ -672,17 +679,18 @@ static void prim_tecnixTargets(EvalState & state, const PosIdx pos, Value ** arg } } - currentTrackingContext = prevCtx; - - // Store dependency graph + result - if (!drvPathStr.empty()) { + // Store dependency graph + result (only for pure evals) + if (useCache && !drvPathStr.empty()) { auto trackedPaths = collectTrackedPaths(trackingCtx); + fetchers::Cache::Key graphKey = {"tecnixTargetsGraph", { + {"target", target}, {"gitDir", tArgs.gitDir}, + {"moduleSrc", tArgs.moduleSrc}}}; cache->upsert(graphKey, {{"paths", nlohmann::json(trackedPaths).dump()}}); auto depsFp = computeDepsFp(accessor, trackedPaths); if (depsFp) { cache->upsert( - {"tecnixResult", {{"target", target}, {"gitDir", tArgs.gitDir}, + {"tecnixTargetsResult", {{"target", target}, {"gitDir", tArgs.gitDir}, {"moduleSrc", tArgs.moduleSrc}, {"depsFp", *depsFp}}}, {{"drvPath", drvPathStr}}); } @@ -720,64 +728,73 @@ static void prim_tecnixDependencies(EvalState & state, const PosIdx pos, Value * auto cache = state.fetchSettings.getCache(); auto accessor = state.getRepoAccessor(); + bool useCache = state.settings.pureEval; + nlohmann::json result; for (auto & target : tArgs.targets) { - // Check cached dependency graph - fetchers::Cache::Key graphKey = {"tecnixGraph", { - {"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}}}; - - if (auto cached = cache->lookup(graphKey)) { - auto trackedPaths = nlohmann::json::parse( - fetchers::getStrAttr(*cached, "paths")).get>(); - auto depsFp = computeDepsFp(accessor, trackedPaths); - if (depsFp) { - fetchers::Cache::Key resultKey = {"tecnixResult", { - {"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}, {"depsFp", *depsFp}}}; - if (auto cachedResult = cache->lookup(resultKey)) { - debug("tecnixDependencies: cache hit for '%s' (depsFp=%s)", target, *depsFp); - result[target] = nlohmann::json::parse( - fetchers::getStrAttr(*cachedResult, "result")); - continue; + if (useCache) { + fetchers::Cache::Key graphKey = {"tecnixDepsGraph", { + {"target", target}, {"gitDir", tArgs.gitDir}, + {"moduleSrc", tArgs.moduleSrc}}}; + + if (auto cached = cache->lookup(graphKey)) { + auto trackedPaths = nlohmann::json::parse( + fetchers::getStrAttr(*cached, "paths")).get>(); + auto depsFp = computeDepsFp(accessor, trackedPaths); + if (depsFp) { + fetchers::Cache::Key resultKey = {"tecnixDepsResult", { + {"target", target}, {"gitDir", tArgs.gitDir}, + {"moduleSrc", tArgs.moduleSrc}, {"depsFp", *depsFp}}}; + if (auto cachedResult = cache->lookup(resultKey)) { + debug("tecnixDependencies: cache hit for '%s' (depsFp=%s)", target, *depsFp); + result[target] = nlohmann::json::parse( + fetchers::getStrAttr(*cachedResult, "result")); + continue; + } } } } - // Cache miss: full eval with tracking - debug("tecnixDependencies: cache miss for '%s', evaluating", target); + // Eval with tracking + debug("tecnixDependencies: evaluating '%s'", target); TrackingContext trackingCtx; auto prevCtx = currentTrackingContext; - currentTrackingContext = &trackingCtx; + if (useCache) + currentTrackingContext = &trackingCtx; auto * targetArg = state.allocValue(); targetArg->mkString(target, state.mem); auto * resolveResult = state.allocValue(); state.callFunction(const_cast(resolveFn), *targetArg, *resolveResult, pos); - state.forceValue(*resolveResult, pos); + currentTrackingContext = prevCtx; + + // Force drvPath (outside tracking) if (resolveResult->type() == nAttrs) { auto drvPathAttr = resolveResult->attrs()->get(state.symbols.create("drvPath")); if (drvPathAttr) state.forceValue(*drvPathAttr->value, pos); } - currentTrackingContext = prevCtx; - auto trackedPaths = collectTrackedPaths(trackingCtx); result[target] = trackedPaths; - // Store dependency graph + result - cache->upsert(graphKey, {{"paths", nlohmann::json(trackedPaths).dump()}}); + // Store dependency graph + result (only for pure evals) + if (useCache) { + fetchers::Cache::Key graphKey = {"tecnixDepsGraph", { + {"target", target}, {"gitDir", tArgs.gitDir}, + {"moduleSrc", tArgs.moduleSrc}}}; + cache->upsert(graphKey, {{"paths", nlohmann::json(trackedPaths).dump()}}); - auto depsFp = computeDepsFp(accessor, trackedPaths); - if (depsFp) { - cache->upsert( - {"tecnixResult", {{"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}, {"depsFp", *depsFp}}}, - {{"result", nlohmann::json(trackedPaths).dump()}}); + auto depsFp = computeDepsFp(accessor, trackedPaths); + if (depsFp) { + cache->upsert( + {"tecnixDepsResult", {{"target", target}, {"gitDir", tArgs.gitDir}, + {"moduleSrc", tArgs.moduleSrc}, {"depsFp", *depsFp}}}, + {{"result", nlohmann::json(trackedPaths).dump()}}); + } } } diff --git a/src/nix/build.cc b/src/nix/build.cc index c8c437a08b5a..1dd42c50f8eb 100644 --- a/src/nix/build.cc +++ b/src/nix/build.cc @@ -7,7 +7,6 @@ #include "nix/fetchers/cache.hh" #include "nix/fetchers/attrs.hh" #include "nix/fetchers/fetch-settings.hh" -#include "nix/expr/eval.hh" #include "nix/util/hash.hh" #include "nix/util/processes.hh" #include "nix/util/file-system.hh" @@ -149,115 +148,108 @@ struct CmdBuild : InstallablesCommand, MixOutLinkByDefault, MixDryRun, MixJSON, ; } - bool canEvalCache() const + std::optional computeEvalCacheKey( + const std::vector & rawInstallables) { - return expr && evalSettings.pureEval - && !std::string(evalSettings.tectonixGitSha).empty(); - } + if (!expr) return std::nullopt; + if (!evalSettings.pureEval) return std::nullopt; + std::string gitSha = evalSettings.tectonixGitSha; + if (gitSha.empty()) return std::nullopt; - // Compute a base key from expr + args (stable across commits/dirty states). - std::string computeBaseKey(const std::vector & rawInstallables) - { HashSink hashSink(HashAlgorithm::SHA256); hashSink << *expr; for (auto & arg : rawInstallables) hashSink << arg; - auto [hash, len] = hashSink.finish(); - return hash.to_string(HashFormat::SRI, true); - } - - // Compute a content-addressed fingerprint from per-path fingerprints of tracked paths. - std::optional computeDepsFp( - ref accessor, const nlohmann::json & trackedPaths) - { - std::vector sortedPaths; - for (auto & [path, _] : trackedPaths.items()) - sortedPaths.push_back(path); - std::sort(sortedPaths.begin(), sortedPaths.end()); - - HashSink hashSink(HashAlgorithm::SHA256); - for (auto & path : sortedPaths) { - auto [subpath, fp] = accessor->getFingerprint(CanonPath(path)); - if (!fp) return std::nullopt; - hashSink << path << *fp; + hashSink << gitSha; + + // Include dirty file state + std::string checkoutPath = evalSettings.tectonixCheckoutPath; + if (!checkoutPath.empty()) { + try { + auto statusOutput = runProgram( + "git", true, {"-C", checkoutPath, "status", "--porcelain", "-z"}); + // Parse NUL-separated entries, sort for determinism + std::vector dirtyFiles; + size_t pos = 0; + while (pos < statusOutput.size()) { + auto nulPos = statusOutput.find('\0', pos); + if (nulPos == std::string::npos) break; + auto entry = statusOutput.substr(pos, nulPos - pos); + pos = nulPos + 1; + if (entry.size() < 4) continue; + char xy0 = entry[0]; + auto filePath = entry.substr(3); + dirtyFiles.push_back(filePath); + // Skip rename/copy source path + if (xy0 == 'R' || xy0 == 'C') { + auto nextNul = statusOutput.find('\0', pos); + if (nextNul != std::string::npos) { + dirtyFiles.push_back(statusOutput.substr(pos, nextNul - pos)); + pos = nextNul + 1; + } + } + } + std::sort(dirtyFiles.begin(), dirtyFiles.end()); + for (auto & f : dirtyFiles) { + hashSink << f; + auto fullPath = std::filesystem::path(checkoutPath) / f; + if (std::filesystem::exists(fullPath) && std::filesystem::is_regular_file(fullPath)) + hashSink << readFile(fullPath.string()); + } + } catch (...) { + // If git status fails, skip caching + return std::nullopt; + } } - auto [hash, len] = hashSink.finish(); - return hash.to_string(HashFormat::SRI, true); - } - bool tryReturnCached(ref store, const fetchers::Cache::Key & key) - { - auto cached = fetchSettings.getCache()->lookup(key); - if (!cached) return false; - - auto outPathsStr = fetchers::getStrAttr(*cached, "outPaths"); - auto paths = tokenizeString>(outPathsStr, "\n"); - for (auto & p : paths) { - if (!store->maybeQueryPathInfo(store->parseStorePath(p))) - return false; - } - if (printOutputPaths) { - logger->stop(); - for (auto & p : paths) - logger->cout("%s", p); - } - return true; + auto [hash, len] = hashSink.finish(); + return fetchers::Cache::Key{ + "evalResult", + {{"fingerprint", hash.to_string(HashFormat::SRI, true)}}}; } void run(ref store, std::vector && rawInstallables) override { - if (!dryRun && buildMode == bmNormal && canEvalCache()) { - auto cache = fetchSettings.getCache(); - auto baseKey = computeBaseKey(rawInstallables); - auto & evalState = *getEvalState(); - auto accessor = evalState.getRepoAccessor(); - - // Check cached dependency graph: compute per-path fingerprints - // for only the tracked paths, ignoring unrelated dirty files. - fetchers::Cache::Key pathsKey = {"evalPaths", {{"baseKey", baseKey}}}; - if (auto cached = cache->lookup(pathsKey)) { - auto trackedPaths = nlohmann::json::parse( - fetchers::getStrAttr(*cached, "paths")); - auto depsFp = computeDepsFp(accessor, trackedPaths); - if (depsFp) { - fetchers::Cache::Key depsKey = {"evalResult", { - {"baseKey", baseKey}, {"depsFp", *depsFp}}}; - if (tryReturnCached(store, depsKey)) { - debug("eval cache: hit (content-addressed match)"); + // Try eval cache (only when --expr + --tectonix-git-sha are set, + // and not in dry-run or rebuild mode) + if (!dryRun && buildMode == bmNormal) { + auto cacheKey = computeEvalCacheKey(rawInstallables); + if (cacheKey) { + if (auto cached = fetchSettings.getCache()->lookup(*cacheKey)) { + auto outPathsStr = fetchers::getStrAttr(*cached, "outPaths"); + auto paths = tokenizeString>(outPathsStr, "\n"); + // Verify all store paths still exist + bool allExist = true; + for (auto & p : paths) { + if (!store->maybeQueryPathInfo(store->parseStorePath(p))) { + allExist = false; + break; + } + } + if (allExist) { + if (printOutputPaths) { + logger->stop(); + for (auto & p : paths) + logger->cout("%s", p); + } return; } } - } - // Cache miss: full eval with tracking - debug("eval cache: miss, running full eval with tracking"); - TrackingContext trackingCtx; - currentTrackingContext = &trackingCtx; - InstallablesCommand::run(store, std::move(rawInstallables)); - currentTrackingContext = nullptr; - - // Store tracked paths and result - if (!buildOutputPaths_.empty()) { - std::string outPaths; - for (auto & p : buildOutputPaths_) { - if (!outPaths.empty()) outPaths += '\n'; - outPaths += store->printStorePath(p); - } + // Normal flow: evaluate + build + InstallablesCommand::run(store, std::move(rawInstallables)); - nlohmann::json trackedPaths; - trackingCtx.accessedPaths.visit_all([&](const auto & p) { - trackedPaths[p] = true; - }); - cache->upsert(pathsKey, {{"paths", trackedPaths.dump()}}); - - auto depsFp = computeDepsFp(accessor, trackedPaths); - if (depsFp) { - cache->upsert( - {"evalResult", {{"baseKey", baseKey}, {"depsFp", *depsFp}}}, - {{"outPaths", outPaths}}); + // Cache output paths + if (!buildOutputPaths_.empty()) { + std::string outPaths; + for (auto & p : buildOutputPaths_) { + if (!outPaths.empty()) outPaths += '\n'; + outPaths += store->printStorePath(p); + } + fetchSettings.getCache()->upsert(*cacheKey, {{"outPaths", outPaths}}); } + return; } - return; } // Fallback: normal flow without caching From a708b51ffe513b0b4f0a6569c40c48a9e4b7fcea Mon Sep 17 00:00:00 2001 From: Josh Heinrichs Date: Sat, 7 Mar 2026 13:59:56 -0600 Subject: [PATCH 11/12] Checkpoint --- src/libexpr/primops/tectonix.cc | 16 ++-- src/libfetchers/fetch-to-store.cc | 2 +- src/libstore/daemon.cc | 49 ----------- src/nix/build.cc | 131 ------------------------------ 4 files changed, 9 insertions(+), 189 deletions(-) diff --git a/src/libexpr/primops/tectonix.cc b/src/libexpr/primops/tectonix.cc index e4799a84939c..f7fb0154808b 100644 --- a/src/libexpr/primops/tectonix.cc +++ b/src/libexpr/primops/tectonix.cc @@ -627,7 +627,7 @@ static void prim_tecnixTargets(EvalState & state, const PosIdx pos, Value ** arg // Check cached dependency graph fetchers::Cache::Key graphKey = {"tecnixTargetsGraph", { {"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}}}; + {"moduleSrc", tArgs.moduleSrc}, {"system", tArgs.system}}}; if (auto cached = cache->lookup(graphKey)) { auto trackedPaths = nlohmann::json::parse( @@ -636,7 +636,7 @@ static void prim_tecnixTargets(EvalState & state, const PosIdx pos, Value ** arg if (depsFp) { fetchers::Cache::Key resultKey = {"tecnixTargetsResult", { {"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}, {"depsFp", *depsFp}}}; + {"moduleSrc", tArgs.moduleSrc}, {"system", tArgs.system}, {"depsFp", *depsFp}}}; if (auto cachedResult = cache->lookup(resultKey)) { auto drvPathStr = fetchers::getStrAttr(*cachedResult, "drvPath"); auto storePath = state.store->parseStorePath(drvPathStr); @@ -684,14 +684,14 @@ static void prim_tecnixTargets(EvalState & state, const PosIdx pos, Value ** arg auto trackedPaths = collectTrackedPaths(trackingCtx); fetchers::Cache::Key graphKey = {"tecnixTargetsGraph", { {"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}}}; + {"moduleSrc", tArgs.moduleSrc}, {"system", tArgs.system}}}; cache->upsert(graphKey, {{"paths", nlohmann::json(trackedPaths).dump()}}); auto depsFp = computeDepsFp(accessor, trackedPaths); if (depsFp) { cache->upsert( {"tecnixTargetsResult", {{"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}, {"depsFp", *depsFp}}}, + {"moduleSrc", tArgs.moduleSrc}, {"system", tArgs.system}, {"depsFp", *depsFp}}}, {{"drvPath", drvPathStr}}); } } @@ -736,7 +736,7 @@ static void prim_tecnixDependencies(EvalState & state, const PosIdx pos, Value * if (useCache) { fetchers::Cache::Key graphKey = {"tecnixDepsGraph", { {"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}}}; + {"moduleSrc", tArgs.moduleSrc}, {"system", tArgs.system}}}; if (auto cached = cache->lookup(graphKey)) { auto trackedPaths = nlohmann::json::parse( @@ -745,7 +745,7 @@ static void prim_tecnixDependencies(EvalState & state, const PosIdx pos, Value * if (depsFp) { fetchers::Cache::Key resultKey = {"tecnixDepsResult", { {"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}, {"depsFp", *depsFp}}}; + {"moduleSrc", tArgs.moduleSrc}, {"system", tArgs.system}, {"depsFp", *depsFp}}}; if (auto cachedResult = cache->lookup(resultKey)) { debug("tecnixDependencies: cache hit for '%s' (depsFp=%s)", target, *depsFp); result[target] = nlohmann::json::parse( @@ -785,14 +785,14 @@ static void prim_tecnixDependencies(EvalState & state, const PosIdx pos, Value * if (useCache) { fetchers::Cache::Key graphKey = {"tecnixDepsGraph", { {"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}}}; + {"moduleSrc", tArgs.moduleSrc}, {"system", tArgs.system}}}; cache->upsert(graphKey, {{"paths", nlohmann::json(trackedPaths).dump()}}); auto depsFp = computeDepsFp(accessor, trackedPaths); if (depsFp) { cache->upsert( {"tecnixDepsResult", {{"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}, {"depsFp", *depsFp}}}, + {"moduleSrc", tArgs.moduleSrc}, {"system", tArgs.system}, {"depsFp", *depsFp}}}, {{"result", nlohmann::json(trackedPaths).dump()}}); } } diff --git a/src/libfetchers/fetch-to-store.cc b/src/libfetchers/fetch-to-store.cc index 30138f76c43a..6d70f9f9ec44 100644 --- a/src/libfetchers/fetch-to-store.cc +++ b/src/libfetchers/fetch-to-store.cc @@ -90,7 +90,7 @@ std::pair fetchToStore2( debug("source path '%s' is uncacheable", path); } - warn("fetchToStore2 SLOW PATH: '%s' (filter=%d, fingerprint=%s)", + debug("fetchToStore2 slow path: '%s' (filter=%d, fingerprint=%s)", path, (bool) filter, fingerprint ? *fingerprint : "none"); Activity act( diff --git a/src/libstore/daemon.cc b/src/libstore/daemon.cc index 7acbf7882351..f71f66db5d87 100644 --- a/src/libstore/daemon.cc +++ b/src/libstore/daemon.cc @@ -25,53 +25,8 @@ #include -#if __APPLE__ -# include -# include -#endif - namespace nix::daemon { -#if __APPLE__ -class ScopedDaemonPriorityAdjustment -{ - std::optional oldPriority; - -public: - ScopedDaemonPriorityAdjustment(RecursiveFlag recursive, WorkerProto::Op op) - { - bool backgroundLikeOp = - recursive || op == WorkerProto::Op::CollectGarbage || op == WorkerProto::Op::OptimiseStore - || op == WorkerProto::Op::VerifyStore; - if (!backgroundLikeOp) - return; - - errno = 0; - auto currentPriority = getpriority(PRIO_PROCESS, 0); - if (currentPriority == -1 && errno != 0) - return; - - oldPriority = currentPriority; - - constexpr int backgroundPriority = 10; - if (currentPriority >= backgroundPriority) - return; - - if (setpriority(PRIO_PROCESS, 0, backgroundPriority) != 0) - oldPriority = std::nullopt; - } - - ~ScopedDaemonPriorityAdjustment() - { - if (!oldPriority) - return; - - /* Best-effort restoration; no-op on failure. */ - (void) setpriority(PRIO_PROCESS, 0, *oldPriority); - } -}; -#endif - Sink & operator<<(Sink & sink, const Logger::Fields & fields) { sink << fields.size(); @@ -355,10 +310,6 @@ static void performOp( WorkerProto::ReadConn rconn(conn); WorkerProto::WriteConn wconn(conn); -#if __APPLE__ - ScopedDaemonPriorityAdjustment daemonPriorityAdjustment(recursive, op); -#endif - switch (op) { case WorkerProto::Op::IsValidPath: { diff --git a/src/nix/build.cc b/src/nix/build.cc index 1dd42c50f8eb..2d4f426a4954 100644 --- a/src/nix/build.cc +++ b/src/nix/build.cc @@ -1,19 +1,10 @@ #include "nix/cmd/command.hh" -#include "nix/cmd/common-eval-args.hh" #include "nix/main/common-args.hh" #include "nix/main/shared.hh" #include "nix/store/store-api.hh" #include "nix/store/local-fs-store.hh" -#include "nix/fetchers/cache.hh" -#include "nix/fetchers/attrs.hh" -#include "nix/fetchers/fetch-settings.hh" -#include "nix/util/hash.hh" -#include "nix/util/processes.hh" -#include "nix/util/file-system.hh" -#include "nix/util/strings.hh" #include -#include using namespace nix; @@ -119,7 +110,6 @@ struct CmdBuild : InstallablesCommand, MixOutLinkByDefault, MixDryRun, MixJSON, { bool printOutputPaths = false; BuildMode buildMode = bmNormal; - std::vector buildOutputPaths_; CmdBuild() { @@ -148,114 +138,6 @@ struct CmdBuild : InstallablesCommand, MixOutLinkByDefault, MixDryRun, MixJSON, ; } - std::optional computeEvalCacheKey( - const std::vector & rawInstallables) - { - if (!expr) return std::nullopt; - if (!evalSettings.pureEval) return std::nullopt; - std::string gitSha = evalSettings.tectonixGitSha; - if (gitSha.empty()) return std::nullopt; - - HashSink hashSink(HashAlgorithm::SHA256); - hashSink << *expr; - for (auto & arg : rawInstallables) - hashSink << arg; - hashSink << gitSha; - - // Include dirty file state - std::string checkoutPath = evalSettings.tectonixCheckoutPath; - if (!checkoutPath.empty()) { - try { - auto statusOutput = runProgram( - "git", true, {"-C", checkoutPath, "status", "--porcelain", "-z"}); - // Parse NUL-separated entries, sort for determinism - std::vector dirtyFiles; - size_t pos = 0; - while (pos < statusOutput.size()) { - auto nulPos = statusOutput.find('\0', pos); - if (nulPos == std::string::npos) break; - auto entry = statusOutput.substr(pos, nulPos - pos); - pos = nulPos + 1; - if (entry.size() < 4) continue; - char xy0 = entry[0]; - auto filePath = entry.substr(3); - dirtyFiles.push_back(filePath); - // Skip rename/copy source path - if (xy0 == 'R' || xy0 == 'C') { - auto nextNul = statusOutput.find('\0', pos); - if (nextNul != std::string::npos) { - dirtyFiles.push_back(statusOutput.substr(pos, nextNul - pos)); - pos = nextNul + 1; - } - } - } - std::sort(dirtyFiles.begin(), dirtyFiles.end()); - for (auto & f : dirtyFiles) { - hashSink << f; - auto fullPath = std::filesystem::path(checkoutPath) / f; - if (std::filesystem::exists(fullPath) && std::filesystem::is_regular_file(fullPath)) - hashSink << readFile(fullPath.string()); - } - } catch (...) { - // If git status fails, skip caching - return std::nullopt; - } - } - - auto [hash, len] = hashSink.finish(); - return fetchers::Cache::Key{ - "evalResult", - {{"fingerprint", hash.to_string(HashFormat::SRI, true)}}}; - } - - void run(ref store, std::vector && rawInstallables) override - { - // Try eval cache (only when --expr + --tectonix-git-sha are set, - // and not in dry-run or rebuild mode) - if (!dryRun && buildMode == bmNormal) { - auto cacheKey = computeEvalCacheKey(rawInstallables); - if (cacheKey) { - if (auto cached = fetchSettings.getCache()->lookup(*cacheKey)) { - auto outPathsStr = fetchers::getStrAttr(*cached, "outPaths"); - auto paths = tokenizeString>(outPathsStr, "\n"); - // Verify all store paths still exist - bool allExist = true; - for (auto & p : paths) { - if (!store->maybeQueryPathInfo(store->parseStorePath(p))) { - allExist = false; - break; - } - } - if (allExist) { - if (printOutputPaths) { - logger->stop(); - for (auto & p : paths) - logger->cout("%s", p); - } - return; - } - } - - // Normal flow: evaluate + build - InstallablesCommand::run(store, std::move(rawInstallables)); - - // Cache output paths - if (!buildOutputPaths_.empty()) { - std::string outPaths; - for (auto & p : buildOutputPaths_) { - if (!outPaths.empty()) outPaths += '\n'; - outPaths += store->printStorePath(p); - } - fetchSettings.getCache()->upsert(*cacheKey, {{"outPaths", outPaths}}); - } - return; - } - } - - // Fallback: normal flow without caching - InstallablesCommand::run(store, std::move(rawInstallables)); - } - void run(ref store, Installables && installables) override { if (dryRun) { @@ -297,19 +179,6 @@ struct CmdBuild : InstallablesCommand, MixOutLinkByDefault, MixDryRun, MixJSON, } } - // Collect output paths for eval cache - for (auto & buildable : buildables) { - std::visit( - overloaded{ - [&](const BuiltPath::Opaque & bo) { buildOutputPaths_.push_back(bo.path); }, - [&](const BuiltPath::Built & bfd) { - for (auto & [_, path] : bfd.outputs) - buildOutputPaths_.push_back(path); - }, - }, - buildable.path.raw()); - } - BuiltPaths buildables2; for (auto & b : buildables) buildables2.push_back(b.path); From d9ac90ec94a208c6851004c1800901b13c56c2fc Mon Sep 17 00:00:00 2001 From: Josh Heinrichs Date: Sun, 8 Mar 2026 12:35:25 -0600 Subject: [PATCH 12/12] Source closure target eval caching --- src/libexpr/eval.cc | 31 +- src/libexpr/include/nix/expr/eval.hh | 16 + src/libexpr/primops/tectonix.cc | 440 +++++++++++++++++++-------- 3 files changed, 352 insertions(+), 135 deletions(-) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 4d14c21d1b86..71508ac02610 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -619,6 +619,7 @@ struct DirtyOverlaySourceAccessor : SourceAccessor : base(base), disk(disk), repo(repo), rootTreeSha(rootTreeSha), dirtyFiles(std::move(dirtyFiles)) { for (auto & f : this->dirtyFiles) { + debug("DirtyOverlaySourceAccessor: dirty file: '%s'", f); for (auto p = CanonPath(f); !p.isRoot();) { p.pop(); if (!dirtyDirs.insert(p.rel().empty() ? "" : std::string(p.rel())).second) @@ -631,7 +632,6 @@ struct DirtyOverlaySourceAccessor : SourceAccessor void trackAccess(const CanonPath & path) { if (auto ctx = currentTrackingContext; ctx && !path.isRoot()) { - debug("trackAccess: %s", path); ctx->recordAccess(std::string(path.rel())); } } @@ -674,8 +674,9 @@ struct DirtyOverlaySourceAccessor : SourceAccessor auto key = path.abs(); { std::string cached; - if (fingerprintCache.visit(key, [&](const auto & entry) { cached = entry.second; })) + if (fingerprintCache.visit(key, [&](const auto & entry) { cached = entry.second; })) { return {path, cached}; + } } // Get the git object SHA for this specific path (tree SHA for dirs, blob SHA for files). @@ -773,7 +774,7 @@ ref EvalState::getRepoAccessor() boost::unordered_flat_set dirtyFiles; try { auto gitStatusOutput = runProgram("git", true, - {"-C", checkoutPath, "status", "--porcelain", "-z"}); + {"-C", checkoutPath, "--no-optional-locks", "status", "--porcelain", "-z"}); size_t pos = 0; while (pos < gitStatusOutput.size()) { @@ -793,6 +794,15 @@ ref EvalState::getRepoAccessor() } } catch (...) {} + // Store dirty file/dir info for commit cache overlap checks. + repoDirtyFiles = dirtyFiles; + for (auto & f : repoDirtyFiles) { + for (auto p = CanonPath(f); !p.isRoot();) { + p.pop(); + repoDirtyDirs.insert(p.rel().empty() ? "" : std::string(p.rel())); + } + } + repoAccessor = make_ref( baseAccessor, makeFSSourceAccessor(checkoutPath), repo, rootTreeSha, std::move(dirtyFiles)); } else { @@ -804,6 +814,21 @@ ref EvalState::getRepoAccessor() return *repoAccessor; } +bool EvalState::dirtyFilesOverlap(const std::vector & trackedPaths) const +{ + if (repoDirtyFiles.empty()) return false; + for (auto & tp : trackedPaths) { + // Tracked path is a dirty file itself + if (repoDirtyFiles.contains(tp)) return true; + // Tracked path is a directory containing dirty files + if (repoDirtyDirs.contains(tp)) return true; + // Tracked path is a file under a dirty directory — shouldn't happen + // (dirty entries are files), but check if tracked file starts with + // any dirty file as prefix (not meaningful). Skip this case. + } + return false; +} + StorePath EvalState::mountRepoAccessor() { std::call_once(repoMountFlag, [this]() { diff --git a/src/libexpr/include/nix/expr/eval.hh b/src/libexpr/include/nix/expr/eval.hh index ac51952a57e7..8cc10e60237a 100644 --- a/src/libexpr/include/nix/expr/eval.hh +++ b/src/libexpr/include/nix/expr/eval.hh @@ -554,6 +554,14 @@ private: mutable std::once_flag repoAccessorFlag; mutable std::optional> repoAccessor; + /** + * Dirty files and their parent directories from the repo checkout. + * Used to check whether a commit-keyed cache entry might be stale + * due to uncommitted changes overlapping with tracked paths. + */ + mutable boost::unordered_flat_set repoDirtyFiles; + mutable boost::unordered_flat_set repoDirtyDirs; + /** * Virtual store path where the repo-wide accessor is lazily mounted. * All repo subtree store paths are subpaths of this mount. @@ -643,6 +651,14 @@ public: */ ref getRepoAccessor(); + /** + * Check whether any dirty (uncommitted) files overlap with the given + * tracked paths. A dirty file overlaps if it equals a tracked path or + * lives under a tracked directory. A tracked file overlaps if it lives + * under a dirty directory. + */ + bool dirtyFilesOverlap(const std::vector & trackedPaths) const; + /** * Lazily mount the repo-wide accessor and return the virtual store path. * All repo reads go through this mount so file accesses are tracked. diff --git a/src/libexpr/primops/tectonix.cc b/src/libexpr/primops/tectonix.cc index f7fb0154808b..f70dc166c9cf 100644 --- a/src/libexpr/primops/tectonix.cc +++ b/src/libexpr/primops/tectonix.cc @@ -430,17 +430,17 @@ static RegisterPrimOp primop_unsafeTectonixInternalZoneRoot({ // ============================================================================ /** - * Resolve the git SHA to use: explicit gitSha attr > checkout HEAD > error. + * Resolve the git SHA to use: explicit rev attr > checkout HEAD > error. */ -static std::string resolveGitSha(EvalState & state, const PosIdx pos, +static std::string resolveRev(EvalState & state, const PosIdx pos, const std::string & gitDir, const Bindings & attrs, const std::string & checkoutPath) { - // Check for explicit gitSha attr - auto gitShaAttr = attrs.get(state.symbols.create("gitSha")); - if (gitShaAttr) { - auto sha = state.forceStringNoCtx(*gitShaAttr->value, pos, - "while evaluating the 'gitSha' argument"); + // Check for explicit rev attr + auto revAttr = attrs.get(state.symbols.create("rev")); + if (revAttr) { + auto sha = state.forceStringNoCtx(*revAttr->value, pos, + "while evaluating the 'rev' argument"); if (!sha.empty()) return std::string(sha); } @@ -455,7 +455,7 @@ static std::string resolveGitSha(EvalState & state, const PosIdx pos, } catch (...) {} } - state.error("could not determine git SHA: set 'gitSha' or provide a valid 'checkoutPath'") + state.error("could not determine git SHA: set 'rev' or provide a valid 'checkoutPath'") .atPos(pos).debugThrow(); } @@ -464,8 +464,8 @@ static std::string resolveGitSha(EvalState & state, const PosIdx pos, */ struct TecnixArgs { std::string gitDir; - std::string moduleSrc; - std::string gitSha; + std::string resolver; + std::string rev; std::string checkoutPath; std::string system; std::vector targets; @@ -485,12 +485,12 @@ static TecnixArgs parseTecnixArgs(EvalState & state, const PosIdx pos, Value ** result.gitDir = std::string(state.forceStringNoCtx(*gitDirAttr->value, pos, "while evaluating the 'gitDir' argument")); - // Required: moduleSrc - auto moduleSrcAttr = attrs.get(state.symbols.create("moduleSrc")); - if (!moduleSrcAttr) - state.error("'moduleSrc' attribute required").atPos(pos).debugThrow(); - result.moduleSrc = std::string(state.forceStringNoCtx(*moduleSrcAttr->value, pos, - "while evaluating the 'moduleSrc' argument")); + // Required: resolver + auto resolverAttr = attrs.get(state.symbols.create("resolver")); + if (!resolverAttr) + state.error("'resolver' attribute required").atPos(pos).debugThrow(); + result.resolver = std::string(state.forceStringNoCtx(*resolverAttr->value, pos, + "while evaluating the 'resolver' argument")); // Required: targets auto targetsAttr = attrs.get(state.symbols.create("targets")); @@ -516,36 +516,43 @@ static TecnixArgs parseTecnixArgs(EvalState & state, const PosIdx pos, Value ** result.system = std::string(state.forceStringNoCtx(*systemAttr->value, pos, "while evaluating the 'system' argument")); - // Resolve gitSha - result.gitSha = resolveGitSha(state, pos, result.gitDir, attrs, result.checkoutPath); + // Resolve rev + result.rev = resolveRev(state, pos, result.gitDir, attrs, result.checkoutPath); return result; } /** - * Import moduleSrc/resolve.nix from the git repo and return the `resolve` - * function from its attrset. + * Configure tectonix eval settings from parsed args so that existing tectonix + * builtins work during module evaluation without requiring CLI flags. + * + * Must be called before getRepoAccessor() or getResolveFunction(). * - * This sets the tectonix eval settings (gitDir, gitSha, checkoutPath) so that - * existing tectonix builtins work during module evaluation without requiring - * CLI flags. + * NOTE: EvalSettings are normally immutable; we const_cast here because the + * tecnix builtins need to configure the evaluator for the repo they're pointed + * at. This is safe because we're in single-threaded primop dispatch. */ -static Value & getResolveFunction(EvalState & state, const PosIdx pos, - const TecnixArgs & tArgs) +static void configureTectonixSettings(EvalState & state, const TecnixArgs & tArgs) { - // Set tectonix settings so existing builtins work during module evaluation. - // NOTE: EvalSettings are normally immutable; we const_cast here because - // the tecnix builtins need to configure the evaluator for the repo they're - // pointed at. This is safe because we're in single-threaded primop dispatch. auto & mutableSettings = const_cast(state.settings); mutableSettings.tectonixGitDir.assign(tArgs.gitDir); - mutableSettings.tectonixGitSha.assign(tArgs.gitSha); + mutableSettings.tectonixGitSha.assign(tArgs.rev); if (!tArgs.checkoutPath.empty()) mutableSettings.tectonixCheckoutPath.assign(tArgs.checkoutPath); +} - // Get moduleSrc path from the lazily-mounted repo accessor. - auto moduleSrcPath = state.getRepoSubtreePath(tArgs.moduleSrc); - auto modulePath = SourcePath(state.rootFS, CanonPath(moduleSrcPath + "/resolve.nix")); +/** + * Import resolver/resolve.nix from the git repo and return the `resolve` + * function from its attrset. + * + * Requires configureTectonixSettings() to have been called first. + */ +static Value & getResolveFunction(EvalState & state, const PosIdx pos, + const TecnixArgs & tArgs) +{ + // Get resolver path from the lazily-mounted repo accessor. + auto resolverPath = state.getRepoSubtreePath(tArgs.resolver); + auto modulePath = SourcePath(state.rootFS, CanonPath(resolverPath + "/resolve.nix")); // Import resolve.nix (a function taking { system }) and call it auto * moduleFn = state.allocValue(); @@ -590,71 +597,188 @@ static std::vector collectTrackedPaths(const TrackingContext & ctx) return collapsed; } -// Compute a content-addressed fingerprint from per-path fingerprints of tracked paths. -// Returns nullopt if any path can't be fingerprinted (e.g. deleted from git tree). -static std::optional computeDepsFp( - ref accessor, const std::vector & trackedPaths) +// ============================================================================ +// Source closure cache: cross-commit matching via lazy fingerprinting +// ============================================================================ + +static constexpr size_t MAX_CLOSURES_PER_TARGET = 50; + +struct SourceClosure { + std::map pathFps; // path → fingerprint + std::string result; // drvPath for targets, JSON for deps +}; + +struct SetTrieNode { + // Children keyed by path, then by expected fingerprint. + // At each trie level we look up the path, compute its current fingerprint, + // and follow the child whose expected fingerprint matches. + std::map>> children; // path → (fp → child) + std::optional closureIdx; // leaf = matching closure index +}; + +static std::vector parseClosures(const fetchers::Attrs & attrs) +{ + std::vector closures; + auto closuresJson = nlohmann::json::parse(fetchers::getStrAttr(attrs, "closures")); + for (auto & cj : closuresJson) { + SourceClosure c; + for (auto & [path, fp] : cj.at("pathFps").items()) + c.pathFps[path] = fp.get(); + c.result = cj.at("result").get(); + closures.push_back(std::move(c)); + } + return closures; +} + +static fetchers::Attrs serializeClosures(const std::vector & closures) { - HashSink hashSink(HashAlgorithm::SHA256); - for (auto & path : trackedPaths) { - auto [subpath, fp] = accessor->getFingerprint(CanonPath(path)); - if (!fp) return std::nullopt; - hashSink << path << *fp; + nlohmann::json arr = nlohmann::json::array(); + for (auto & c : closures) { + nlohmann::json obj; + obj["pathFps"] = c.pathFps; + obj["result"] = c.result; + arr.push_back(std::move(obj)); + } + return {{"closures", arr.dump()}}; +} + +static std::unique_ptr buildSetTrie(const std::vector & closures) +{ + auto root = std::make_unique(); + for (size_t i = 0; i < closures.size(); i++) { + auto * node = root.get(); + // pathFps is std::map so already sorted by path + for (auto & [path, fp] : closures[i].pathFps) { + auto & child = node->children[path][fp]; + if (!child) + child = std::make_unique(); + node = child.get(); + } + node->closureIdx = i; } - auto [hash, len] = hashSink.finish(); - return hash.to_string(HashFormat::Base16, false); + return root; +} + +static std::optional searchSetTrie( + const SetTrieNode & node, ref accessor) +{ + if (node.closureIdx) + return *node.closureIdx; + for (auto & [path, fpChildren] : node.children) { + auto [_, currentFp] = accessor->getFingerprint(CanonPath(path)); + if (!currentFp) continue; + auto it = fpChildren.find(*currentFp); + if (it != fpChildren.end()) { + if (auto result = searchSetTrie(*it->second, accessor)) + return result; + } + } + return std::nullopt; +} + +static void appendClosure( + const std::shared_ptr & cache, + const fetchers::Cache::Key & closuresKey, + SourceClosure newClosure) +{ + std::vector closures; + if (auto existing = cache->lookup(closuresKey)) + closures = parseClosures(*existing); + + closures.push_back(std::move(newClosure)); + + while (closures.size() > MAX_CLOSURES_PER_TARGET) + closures.erase(closures.begin()); + + cache->upsert(closuresKey, serializeClosures(closures)); } // ============================================================================ -// builtins.tecnixTargets { gitDir, moduleSrc, targets, system, ... } +// builtins.tecnixTargets { gitDir, resolver, targets, system, ... } // Resolves targets via module contract, returns list of derivations. // Tracks file accesses per target. Caches per-target by dependency fingerprint. // ============================================================================ static void prim_tecnixTargets(EvalState & state, const PosIdx pos, Value ** args, Value & v) { auto tArgs = parseTecnixArgs(state, pos, args); - auto & resolveFn = getResolveFunction(state, pos, tArgs); + configureTectonixSettings(state, tArgs); auto cache = state.fetchSettings.getCache(); auto accessor = state.getRepoAccessor(); - bool useCache = state.settings.pureEval; + // Track module loading so module source files are in each target's tracked set. + // This ensures dirty module changes are caught by depsFp. + TrackingContext moduleCtx; + if (useCache) + currentTrackingContext = &moduleCtx; + auto & resolveFn = getResolveFunction(state, pos, tArgs); + currentTrackingContext = nullptr; + auto list = state.buildList(tArgs.targets.size()); for (size_t i = 0; i < tArgs.targets.size(); i++) { auto & target = tArgs.targets[i]; if (useCache) { - // Check cached dependency graph - fetchers::Cache::Key graphKey = {"tecnixTargetsGraph", { - {"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}, {"system", tArgs.system}}}; - - if (auto cached = cache->lookup(graphKey)) { - auto trackedPaths = nlohmann::json::parse( - fetchers::getStrAttr(*cached, "paths")).get>(); - auto depsFp = computeDepsFp(accessor, trackedPaths); - if (depsFp) { - fetchers::Cache::Key resultKey = {"tecnixTargetsResult", { - {"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}, {"system", tArgs.system}, {"depsFp", *depsFp}}}; - if (auto cachedResult = cache->lookup(resultKey)) { - auto drvPathStr = fetchers::getStrAttr(*cachedResult, "drvPath"); - auto storePath = state.store->parseStorePath(drvPathStr); - if (state.store->isValidPath(storePath)) { - debug("tecnixTargets: cache hit for '%s' (depsFp=%s)", target, *depsFp); - list[i] = state.allocValue(); - derivationToValue(state, pos, - state.storePath(storePath), storePath, *list[i]); - continue; - } + // Load source closures (needed for both dirty overlap check and cross-commit matching) + fetchers::Cache::Key closuresKey = {"tecnixTargetsSourceClosures", { + {"target", target}, {"resolver", tArgs.resolver}, {"system", tArgs.system}}}; + std::vector closures; + if (auto closuresCache = cache->lookup(closuresKey)) + closures = parseClosures(*closuresCache); + + // Layer 1: same-commit fast path — skip git ODB entirely + fetchers::Cache::Key commitKey = {"tecnixTargetsCommit", { + {"target", target}, {"resolver", tArgs.resolver}, + {"system", tArgs.system}, {"rev", tArgs.rev}}}; + if (auto cached = cache->lookup(commitKey)) { + // Check dirty overlap using tracked paths from the most recent closure + bool dirtyOverlap = false; + if (!closures.empty()) { + std::vector trackedPaths; + for (auto & [p, _] : closures.back().pathFps) trackedPaths.push_back(p); + dirtyOverlap = state.dirtyFilesOverlap(trackedPaths); + } + if (!dirtyOverlap) { + auto drvPathStr = fetchers::getStrAttr(*cached, "drvPath"); + auto storePath = state.store->parseStorePath(drvPathStr); + if (state.store->isValidPath(storePath)) { + warn("tecnixTargets: commit cache hit for '%s'", target); + list[i] = state.allocValue(); + derivationToValue(state, pos, + state.storePath(storePath), storePath, *list[i]); + continue; + } + } else { + warn("tecnixTargets: commit cache skipped (dirty overlap) for '%s'", target); + } + } + + // Layer 2: source closure — cross-commit lookup via lazy fingerprinting + if (!closures.empty()) { + auto trie = buildSetTrie(closures); + if (auto matchIdx = searchSetTrie(*trie, accessor)) { + auto & closure = closures[*matchIdx]; + auto storePath = state.store->parseStorePath(closure.result); + if (state.store->isValidPath(storePath)) { + warn("tecnixTargets: source closure hit for '%s'", target); + list[i] = state.allocValue(); + derivationToValue(state, pos, + state.storePath(storePath), storePath, *list[i]); + // Populate commit cache for next same-commit lookup + cache->upsert(commitKey, {{"drvPath", closure.result}}); + continue; } } } } // Eval with tracking - debug("tecnixTargets: evaluating '%s'", target); + warn("tecnixTargets: cache miss, evaluating '%s'", target); TrackingContext trackingCtx; + // Merge module paths so module source changes are captured in depsFp + moduleCtx.accessedPaths.visit_all([&](const std::string & p) { + trackingCtx.accessedPaths.insert(p); + }); auto prevCtx = currentTrackingContext; if (useCache) currentTrackingContext = &trackingCtx; @@ -663,11 +787,9 @@ static void prim_tecnixTargets(EvalState & state, const PosIdx pos, Value ** arg targetArg->mkString(target, state.mem); list[i] = state.allocValue(); state.callFunction(const_cast(resolveFn), *targetArg, *list[i], pos); - state.forceValue(*list[i], pos); - currentTrackingContext = prevCtx; - - // Force drvPath (outside tracking — triggers deep eval / fetchToStore) + // Force the value + drvPath to ensure all deps are tracked + state.forceValue(*list[i], pos); std::string drvPathStr; if (list[i]->type() == nAttrs) { auto drvPathAttr = list[i]->attrs()->get(state.symbols.create("drvPath")); @@ -679,21 +801,28 @@ static void prim_tecnixTargets(EvalState & state, const PosIdx pos, Value ** arg } } - // Store dependency graph + result (only for pure evals) + currentTrackingContext = prevCtx; + + // Cache result (only for pure evals) if (useCache && !drvPathStr.empty()) { auto trackedPaths = collectTrackedPaths(trackingCtx); - fetchers::Cache::Key graphKey = {"tecnixTargetsGraph", { - {"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}, {"system", tArgs.system}}}; - cache->upsert(graphKey, {{"paths", nlohmann::json(trackedPaths).dump()}}); - - auto depsFp = computeDepsFp(accessor, trackedPaths); - if (depsFp) { - cache->upsert( - {"tecnixTargetsResult", {{"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}, {"system", tArgs.system}, {"depsFp", *depsFp}}}, - {{"drvPath", drvPathStr}}); + + // Same-commit fast path + cache->upsert( + {"tecnixTargetsCommit", {{"target", target}, {"resolver", tArgs.resolver}, + {"system", tArgs.system}, {"rev", tArgs.rev}}}, + {{"drvPath", drvPathStr}}); + + // Source closure for cross-commit matching + SourceClosure newClosure; + for (auto & path : trackedPaths) { + auto [_, fp] = accessor->getFingerprint(CanonPath(path)); + if (fp) newClosure.pathFps[path] = *fp; } + newClosure.result = drvPathStr; + appendClosure(cache, {"tecnixTargetsSourceClosures", { + {"target", target}, {"resolver", tArgs.resolver}, {"system", tArgs.system}}}, + std::move(newClosure)); } } v.mkList(list); @@ -709,56 +838,99 @@ static RegisterPrimOp primop_tecnixTargets({ Takes an attrset with: - `targets`: list of target strings - `gitDir`: path to bare git directory - - `moduleSrc`: repo-relative path to directory with resolve.nix + - `resolver`: repo-relative path to directory with resolve.nix - `system`: system string (e.g. "aarch64-darwin") - - `gitSha` (optional): explicit commit SHA + - `rev` (optional): explicit commit SHA - `checkoutPath` (optional): checkout path for dirty file detection )", .fun = prim_tecnixTargets, }); // ============================================================================ -// builtins.tecnixDependencies { gitDir, moduleSrc, targets, system, ... } +// builtins.tecnixDependencies { gitDir, resolver, targets, system, ... } // Same inputs as tecnixTargets, returns JSON of accessed paths per target. // ============================================================================ static void prim_tecnixDependencies(EvalState & state, const PosIdx pos, Value ** args, Value & v) { auto tArgs = parseTecnixArgs(state, pos, args); - auto & resolveFn = getResolveFunction(state, pos, tArgs); + configureTectonixSettings(state, tArgs); auto cache = state.fetchSettings.getCache(); auto accessor = state.getRepoAccessor(); - bool useCache = state.settings.pureEval; - nlohmann::json result; + // Track module loading so module source files are in each target's tracked set. + TrackingContext moduleCtx; + if (useCache) + currentTrackingContext = &moduleCtx; + auto & resolveFn = getResolveFunction(state, pos, tArgs); + currentTrackingContext = nullptr; + + // Build result as attrset: { target = [ "path1" "path2" ... ]; ... } + auto resultAttrs = state.buildBindings(tArgs.targets.size()); + + auto pathsToValue = [&](const std::vector & paths) -> Value * { + auto * val = state.allocValue(); + auto list = state.buildList(paths.size()); + for (size_t j = 0; j < paths.size(); j++) + (list[j] = state.allocValue())->mkString(paths[j], state.mem); + val->mkList(list); + return val; + }; for (auto & target : tArgs.targets) { if (useCache) { - fetchers::Cache::Key graphKey = {"tecnixDepsGraph", { - {"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}, {"system", tArgs.system}}}; - - if (auto cached = cache->lookup(graphKey)) { - auto trackedPaths = nlohmann::json::parse( - fetchers::getStrAttr(*cached, "paths")).get>(); - auto depsFp = computeDepsFp(accessor, trackedPaths); - if (depsFp) { - fetchers::Cache::Key resultKey = {"tecnixDepsResult", { - {"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}, {"system", tArgs.system}, {"depsFp", *depsFp}}}; - if (auto cachedResult = cache->lookup(resultKey)) { - debug("tecnixDependencies: cache hit for '%s' (depsFp=%s)", target, *depsFp); - result[target] = nlohmann::json::parse( - fetchers::getStrAttr(*cachedResult, "result")); - continue; - } + // Load source closures (needed for both dirty overlap check and cross-commit matching) + fetchers::Cache::Key closuresKey = {"tecnixDepsSourceClosures", { + {"target", target}, {"resolver", tArgs.resolver}, {"system", tArgs.system}}}; + std::vector closures; + if (auto closuresCache = cache->lookup(closuresKey)) + closures = parseClosures(*closuresCache); + + // Layer 1: same-commit fast path + fetchers::Cache::Key commitKey = {"tecnixDepsCommit", { + {"target", target}, {"resolver", tArgs.resolver}, + {"system", tArgs.system}, {"rev", tArgs.rev}}}; + if (auto cached = cache->lookup(commitKey)) { + // Check dirty overlap using tracked paths from the most recent closure + bool dirtyOverlap = false; + if (!closures.empty()) { + std::vector trackedPaths; + for (auto & [p, _] : closures.back().pathFps) trackedPaths.push_back(p); + dirtyOverlap = state.dirtyFilesOverlap(trackedPaths); + } + if (!dirtyOverlap) { + warn("tecnixDependencies: commit cache hit for '%s'", target); + auto result = fetchers::getStrAttr(*cached, "result"); + auto paths = nlohmann::json::parse(result).get>(); + resultAttrs.insert(state.symbols.create(target), pathsToValue(paths)); + continue; + } else { + warn("tecnixDependencies: commit cache skipped (dirty overlap) for '%s'", target); + } + } + + // Layer 2: source closure — cross-commit lookup via lazy fingerprinting + if (!closures.empty()) { + auto trie = buildSetTrie(closures); + if (auto matchIdx = searchSetTrie(*trie, accessor)) { + auto & closure = closures[*matchIdx]; + warn("tecnixDependencies: source closure hit for '%s'", target); + auto paths = nlohmann::json::parse(closure.result).get>(); + resultAttrs.insert(state.symbols.create(target), pathsToValue(paths)); + // Populate commit cache for next same-commit lookup + cache->upsert(commitKey, {{"result", closure.result}}); + continue; } } } // Eval with tracking - debug("tecnixDependencies: evaluating '%s'", target); + warn("tecnixDependencies: cache miss, evaluating '%s'", target); TrackingContext trackingCtx; + // Merge module paths so module source changes are captured + moduleCtx.accessedPaths.visit_all([&](const std::string & p) { + trackingCtx.accessedPaths.insert(p); + }); auto prevCtx = currentTrackingContext; if (useCache) currentTrackingContext = &trackingCtx; @@ -767,46 +939,50 @@ static void prim_tecnixDependencies(EvalState & state, const PosIdx pos, Value * targetArg->mkString(target, state.mem); auto * resolveResult = state.allocValue(); state.callFunction(const_cast(resolveFn), *targetArg, *resolveResult, pos); - state.forceValue(*resolveResult, pos); - currentTrackingContext = prevCtx; - - // Force drvPath (outside tracking) + state.forceValue(*resolveResult, pos); if (resolveResult->type() == nAttrs) { auto drvPathAttr = resolveResult->attrs()->get(state.symbols.create("drvPath")); if (drvPathAttr) state.forceValue(*drvPathAttr->value, pos); } + currentTrackingContext = prevCtx; + auto trackedPaths = collectTrackedPaths(trackingCtx); - result[target] = trackedPaths; + resultAttrs.insert(state.symbols.create(target), pathsToValue(trackedPaths)); - // Store dependency graph + result (only for pure evals) + // Cache result (only for pure evals) if (useCache) { - fetchers::Cache::Key graphKey = {"tecnixDepsGraph", { - {"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}, {"system", tArgs.system}}}; - cache->upsert(graphKey, {{"paths", nlohmann::json(trackedPaths).dump()}}); - - auto depsFp = computeDepsFp(accessor, trackedPaths); - if (depsFp) { - cache->upsert( - {"tecnixDepsResult", {{"target", target}, {"gitDir", tArgs.gitDir}, - {"moduleSrc", tArgs.moduleSrc}, {"system", tArgs.system}, {"depsFp", *depsFp}}}, - {{"result", nlohmann::json(trackedPaths).dump()}}); + auto resultJson = nlohmann::json(trackedPaths).dump(); + + // Same-commit fast path + cache->upsert( + {"tecnixDepsCommit", {{"target", target}, {"resolver", tArgs.resolver}, + {"system", tArgs.system}, {"rev", tArgs.rev}}}, + {{"result", resultJson}}); + + // Source closure for cross-commit matching + SourceClosure newClosure; + for (auto & path : trackedPaths) { + auto [_, fp] = accessor->getFingerprint(CanonPath(path)); + if (fp) newClosure.pathFps[path] = *fp; } + newClosure.result = resultJson; + appendClosure(cache, {"tecnixDepsSourceClosures", { + {"target", target}, {"resolver", tArgs.resolver}, {"system", tArgs.system}}}, + std::move(newClosure)); } } - auto resultStr = result.dump(); - v.mkString(resultStr, state.mem); + v.mkAttrs(resultAttrs); } static RegisterPrimOp primop_tecnixDependencies({ .name = "__tecnixDependencies", .args = {"attrs"}, .doc = R"( - Discover dependencies for tecnix targets. Returns a JSON string mapping each + Discover dependencies for tecnix targets. Returns an attrset mapping each target to a list of paths accessed during its resolution. Takes the same attrset as `tecnixTargets`.