From 2eac39b9998002106967045e4df17f777aaa6b96 Mon Sep 17 00:00:00 2001 From: Isaac Suttell Date: Sun, 14 Jun 2026 12:00:00 -0700 Subject: [PATCH 01/14] docs(adr): record Git-like revision model (commit chain + jobs-reconstructed delta) ADR 0086 retroactively captures the shipped workspace-scoped content-addressed blob dedup (no prior ADR existed; it was only in data-model.md/api.md + commit dea091f4). ADR 0087 decides the next step: revisions.parent_revision_id + tree inheritance (partial-manifest publish, unlisted paths inherit the parent tree by reference) so an agent can express "change this file" instead of the whole tree, plus server-reconstructed intra-file delta (unified diff for text, whole-blob fallback for binary). Reconstruction runs in jobs, not upload: upload is write-only against R2 today (sole op ARTIFACTS.put), while jobs already does the read-decrypt-transform-reencrypt-write shape for Bundle generation. This keeps content and the ADR 0063 encryption boundary untouched. Chunk stores, per-block AEAD, Range serving, global dedup, and dropping encryption are deferred. Spec/CONTEXT edits land with the implementation, per the spec-is-source-of-truth rule. Staged plan in docs/ops/git-like-revisions-todo.md. Co-Authored-By: Claude Opus 4.8 --- ...ed-content-addressed-blob-deduplication.md | 96 +++++++++ ...eritance-and-server-reconstructed-delta.md | 155 ++++++++++++++ docs/ops/git-like-revisions-todo.md | 190 ++++++++++++++++++ 3 files changed, 441 insertions(+) create mode 100644 docs/adr/0087-workspace-scoped-content-addressed-blob-deduplication.md create mode 100644 docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md create mode 100644 docs/ops/git-like-revisions-todo.md diff --git a/docs/adr/0087-workspace-scoped-content-addressed-blob-deduplication.md b/docs/adr/0087-workspace-scoped-content-addressed-blob-deduplication.md new file mode 100644 index 00000000..30eee716 --- /dev/null +++ b/docs/adr/0087-workspace-scoped-content-addressed-blob-deduplication.md @@ -0,0 +1,96 @@ +# Workspace-Scoped Content-Addressed Blob Deduplication + +Status: Accepted (retroactive). Records a decision already shipped in code and +[`data-model.md`](../specs/data-model.md)/[`api.md`](../specs/api.md) but not +previously captured as an ADR. Drafted because the next decision +([ADR 0088](./0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)) +builds directly on it and an implementer should not have to reconstruct this from +the schema and commit history. + +A **Revision** is "a complete immutable file tree." Re-publishing an **Artifact** +with one changed file used to re-upload every file's bytes, even the unchanged +ones, because every file was stored under a per-Revision R2 key +(`artifacts/{artifactId}/revisions/{revisionId}/files/{path}`). For an Artifact +carrying a multi-megabyte asset that the agent edits around, the same bytes were +written to R2 again on every Revision. + +## Decision + +Files are deduplicated within a **Workspace** by the SHA-256 of their plaintext. + +- The client sends a manifest of `(path, size_bytes, sha256)` in + `CreateUploadSessionRequest`. `sha256` is the lowercase hex digest of the + plaintext, computed client-side (the CLI streams it via `sha256HexForFile`). +- A `content_blobs(workspace_id, sha256, size_bytes)` row records a verified + blob and its shared R2 key + `workspaces/{workspaceId}/blobs/sha256/{prefix}/{sha256}` + (`workspaceBlobObjectKeyFor`). The key is deterministic, so two concurrent + uploads of the same bytes target the same object. +- On upload-session create, a file whose `(workspace_id, sha256, size_bytes)` + already has a blob is marked `storage_kind = 'blob'` with `uploaded_at` set + immediately, and the wire response returns `reused` instead of an + `upload_required` PUT URL. The client skips the PUT. Same-session duplicate + hashes require exactly one PUT; the rest are `reused`. +- The upload Worker verifies the plaintext digest on PUT (the signed token + carries the expected `sha256`; a mismatch fails the PUT) before + `content_blobs.upsert` records the blob. The digest is never trusted without + verification. +- Blobs are encrypted exactly like Revision files under the per-Workspace DEK + ([ADR 0063](./0063-application-layer-encryption-for-artifact-bytes.md)), but + with a distinct AAD version `v2 = (workspaceId, sha256)` that omits + `artifactId`/`revisionId`/`path`, so one stored object is legitimately + reusable across Revisions and Artifacts inside the Workspace. +- `sha256` is optional for compatibility. A client that omits it keeps the + legacy per-Revision object path (`storage_kind = 'revision'`) and does not + participate in dedup. There is no backfill of historical revision-key objects. + +## Considered Options + +- **No dedup; full re-upload every Revision (status quo before this).** + Rejected. Re-uploading unchanged multi-megabyte assets is wasted bandwidth and + R2 writes on the common "edit around one big file" agent workflow. +- **Platform-wide (global) blob dedup.** Rejected. A single global content pool + maximizes dedup but breaks tenant isolation: a blob's existence becomes an + oracle for "some other Workspace uploaded these exact bytes," and the encryption + blast radius widens past the **Workspace** boundary that is already the access + boundary ([ADR 0063](./0063-application-layer-encryption-for-artifact-bytes.md)). + Workspace-scoped dedup keeps the tenant boundary intact for the cost of missing + cross-tenant duplicates, which are not a real workload. +- **Server-computed digests.** Rejected. Hashing on the server means the bytes + must transit before the dedup decision, which defeats the point (skip the + upload). The client computes the digest; the server verifies it on the PUT it + does receive. +- **Workspace-scoped dedup (chosen).** Dedup where the bytes already live behind + the same access and encryption boundary, decided before upload from a + client-supplied digest, verified on write. + +## Consequences + +- **Unchanged files already skip re-upload across Revisions** — but only when the + client re-declares them with their `sha256`. The dedup saves the bytes on the + wire; it does not remove the requirement to re-enumerate the full file list, and + the system does not detect unchanged files on its own. Closing that gap is + [ADR 0088](./0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md). +- **Two storage kinds coexist.** `artifact_files.storage_kind` is `blob` + (shared workspace object) or `revision` (legacy per-Revision object). Byte + purge, bundle generation, and content serving treat both transparently. +- **GC is reference-counted and conservative.** A `jobs`-owned sweep deletes + unreferenced `content_blobs` rows after checking active Artifacts and live + pending upload sessions, but does **not** delete the deterministic shared R2 + object, so a concurrent verified upload cannot be de-indexed and then have its + freshly written bytes removed by a delayed delete. +- **Encryption is unchanged in substance.** Blobs use the same algorithm, DEK + derivation, and rotation as Revision files; only the AAD composition differs so + the object is path/revision-independent. +- **No new domain vocabulary.** A blob is an implementation property of how a + **Revision** file is stored; [`CONTEXT.md`](../../CONTEXT.md) does not gain a + "blob" term. + +## What this ADR is not + +- Not intra-file deduplication. The unit is the whole file; one changed byte + yields a new digest and a new blob. Sub-file delta is + [ADR 0088](./0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md). +- Not a Revision-level content address or commit graph. Only individual files are + content-addressed; Revisions remain a flat numbered list until + [ADR 0088](./0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md). diff --git a/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md b/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md new file mode 100644 index 00000000..1a92d76a --- /dev/null +++ b/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md @@ -0,0 +1,155 @@ +# Revision Commit Chain, Tree Inheritance, And Server-Reconstructed Intra-File Delta + +An agent that has already published an **Artifact** and wants to change one file +should be able to say "change this one file," not re-describe the whole tree. Two +gaps stand in the way today, both recorded as the missing half of +[ADR 0087](./0087-workspace-scoped-content-addressed-blob-deduplication.md): + +1. **No tree inheritance.** A new **Revision** must re-enumerate every path with + its `sha256`. Workspace blob dedup ([ADR 0087](./0087-workspace-scoped-content-addressed-blob-deduplication.md)) + skips the unchanged *bytes*, but the client still walks and hashes the whole + directory and sends the full manifest. The smallest change an agent can express + is "here is the entire new tree." +2. **No intra-file delta.** A blob is a whole file. One line changed in a 5 MB + file is a new plaintext digest, a new blob, and a full 5 MB upload. + +The driver is **agent ergonomics**: the natural unit of an agent's edit is "this +file changed" (and, for a large file, "this region of this file changed"), and the +contract should accept exactly that. + +## Decision + +Make the **Revision** model behave like a Git commit: a parent pointer plus a +tree that inherits from the parent and overrides only what changed. Layer +server-reconstructed intra-file delta on top so a big file with a small edit +uploads only the diff. **The encryption boundary +([ADR 0063](./0063-application-layer-encryption-for-artifact-bytes.md)) does not +change.** + +### Tree inheritance and the commit chain + +- `revisions` gains `parent_revision_id` (nullable self-reference within the same + Artifact). Existing rows are roots (`NULL`). A Revision published against a base + records that base as its parent. +- `CreateUploadSessionRequest` gains an optional `base_revision_id` and + `deleted_paths`. When `base_revision_id` is set, the `files` manifest is + "changed + added paths only"; every path present in the base and not listed or + deleted **inherits by reference** — `api` copies the base's `artifact_files` + row forward (it already points at a shared blob), so no bytes and no manifest + entry are needed for unchanged files. +- The published Revision still materializes a complete `artifact_files` tree, so + every downstream surface (content serving, bundles, byte purge, Agent View) + is unchanged. "Inheritance" is a publish-time merge, not a read-time + indirection. Diffing two Revisions' `artifact_files` rows yields the changeset. + +### Server-reconstructed intra-file delta (the chosen delta option) + +- A changed file may be sent as a **patch** instead of whole bytes: a per-file + descriptor `{ base_sha256, format: "unified", result_sha256 }` plus the diff + bytes uploaded on the normal upload path. +- Reconstruction runs in **`jobs`**, not `upload`. `jobs` fetches and decrypts + the base blob, applies the patch, hashes the result, and **fails loud** unless + it equals `result_sha256`. It then encrypts the **whole reconstructed file** as + an ordinary `storage_kind = 'blob'` object under + `workspaceBlobObjectKeyFor(result_sha256)`. This is the same + read-decrypt-transform-reencrypt-write shape `jobs` already runs for Bundle + generation (`bundle-generate-orchestration.ts`, `revision-file-bytes.ts`), so + it deepens a module that is already that shape rather than expanding one that is + not — see the placement rationale under Consequences. +- The resulting blob is indistinguishable from a whole-file upload. **`content` + never learns a patch existed**, never reconstructs on read, and keeps its no-DB + isolation and whole-object decrypt. Reconstruction happens once, on write, + behind the encryption boundary. + +### Patch format + +**Unified diff for text; whole-blob upload for binary.** Agents reason natively +in unified diffs, so it is the ergonomic match for the driver, and it is +reviewable. Binary files rarely take tiny edits, and a byte-splice format is +fiddly for an agent to produce; a changed binary file just uploads a new whole +blob (the [ADR 0087](./0087-workspace-scoped-content-addressed-blob-deduplication.md) +status quo). The CLI/MCP choose patch vs whole-blob per file: patch only when the +file is large enough and the diff is small enough to be worth it; otherwise +whole-blob. + +## Considered Options + +- **Tree inheritance only; no intra-file delta.** This delivers most of the + ergonomics (the agent sends only changed files) with zero encryption + interaction, and is the independently shippable core (stages 1–3 of the plan). + Not chosen as the endpoint because a big file with a one-line edit still + re-uploads in full; chosen as the *foundation* that intra-file delta sits on. +- **Intra-file delta only; no tree inheritance.** Rejected. Without inheritance + the agent still re-enumerates every file each Revision, so the ergonomic win is + small and the hardest piece (reconstruction) carries the least benefit. +- **Reconstruct on read in `content`.** Rejected. `content` has no database and + decrypts whole objects in-memory with no Range/streaming-partial path; giving it + base-revision + patch metadata means a DB binding or a side channel, which + breaks the isolation that the **Content Origin** exists to provide + ([ADR 0001](./0001-private-artifact-storage-behind-controlled-origin.md), + [ADR 0028](./0028-signed-url-tokens-for-content-gateway-authorization.md)). +- **Drop application-layer encryption to R2-only so ciphertext chunks become + addressable.** Rejected here; this is the option + [ADR 0063](./0063-application-layer-encryption-for-artifact-bytes.md) already + weighed and declined, and it would put Cloudflare back inside the byte- + confidentiality boundary. Not worth it for transient handoffs. +- **Content-defined chunk store with per-block AEAD.** Deferred. Maximum dedup + and true delta storage, but it re-architects the storage layer and fights + `content`'s no-DB isolation hardest. Revisit only if a real + large-file-frequent-edit workload appears. +- **Tree inheritance + server-reconstructed intra-file delta (chosen).** Git + commit semantics at file granularity, plus diff-on-the-wire for big files, + reconstructed to whole blobs before encryption so the trust boundary and read + path are untouched. + +## Consequences + +- **The agent expresses minimal change.** "Change this file" sends one file; + "change this region of a big file" sends one diff. Unchanged files are neither + re-hashed (the CLI caches the last manifest per Artifact) nor re-uploaded. +- **Revisions form a DAG-by-parent.** `parent_revision_id` enables real + "what changed between Revision N and N+1" and a browsable history. Diff views in + the viewer become possible but are out of scope here. +- **Storage is not reduced by intra-file delta.** Reconstruction writes a whole + new blob; the saving is upload bandwidth, not stored bytes. This is an accepted + trade for keeping encryption and the read path unchanged. A future chunk store + is where stored-byte savings would come from. +- **Reconstruction is a new failure mode, handled loud.** A patch that does not + apply, or whose result digest mismatches `result_sha256`, fails the + upload/finalize with a clear error; it never serves a partially-applied file. +- **Reconstruction runs in `jobs` because that is the seam-honest placement.** + `upload` is write-only against R2 today — its sole R2 op is + `env.ARTIFACTS.put` (`apps/upload/src/put.ts:150`); it never reads or decrypts + a stored object, even though it holds the DEK. Putting reconstruction there + would turn a write-only module into a read-modify-write one and make + base-blob decrypt a live path on the hot upload route, widening its blast + radius. `jobs` is *already* a read-modify-write module with the `ARTIFACTS` + binding and the encryption ring: Bundle generation reads revision files, + decrypts (`revision-file-bytes.ts`), and re-encrypts the output + (`bundle-generate-orchestration.ts`). Reconstruction is the same operation + shape, so it belongs there. The trade-off accepted: a patched file is not + servable until its `jobs` reconstruction completes, so a Revision that contains + a patched file has a brief pending state before Publish can resolve it — + modeled like the existing async Bundle/safety-scan pending states, not as a + finalize-blocking step. This supersedes the earlier draft note that started + reconstruction in `upload`. +- **Caps still apply to the reconstructed result**, not the diff: a small diff + whose applied result exceeds the file/Revision cap fails. +- **Spec + glossary updates.** [`data-model.md`](../specs/data-model.md) + (`parent_revision_id`), [`api.md`](../specs/api.md) (`base_revision_id`, + `deleted_paths`, patch descriptor, partial-manifest publish), and + [`CONTEXT.md`](../../CONTEXT.md) relationships ("a **Revision** has zero or one + parent **Revision**"; a new Revision may inherit unchanged files from its + parent) are updated so the spec stays source of truth. No new top-level domain + term is introduced for "patch"; it is an implementation property of how a + changed file is transmitted. + +## What this ADR is not + +- Not a chunk store, not per-block encryption, not Range serving, not global + dedup, not dropping encryption. Those are explicitly deferred above. +- Not a read-time change. Nothing about how a published file is served changes; + a reconstructed file is an ordinary blob. +- Not a license to expose Revision internals to recipients. The commit chain is + owner/member and agent metadata; Access Link recipients still see the published + tree, not the history, unless a separate decision opens it. diff --git a/docs/ops/git-like-revisions-todo.md b/docs/ops/git-like-revisions-todo.md new file mode 100644 index 00000000..5ff32ce6 --- /dev/null +++ b/docs/ops/git-like-revisions-todo.md @@ -0,0 +1,190 @@ +# Git-like revisions: tree-inheritance + intra-file delta + +Design + staged plan for making revision storage behave more like Git so agents +can express *small changes to a file* instead of re-submitting the whole tree, +and so a big file getting a small edit does not re-upload the whole file. + +Owner: Isaac. Drafted 2026-06-14. Status: design accepted, not yet implemented. +Driver: **agent ergonomics** (the agent saying "change just this file" cheaply +and naturally is the point; byte savings are secondary). + +## Where we are today (verified ground truth) + +The blob layer is **already half-Git**: + +- Content-addressed whole-file blobs: `content_blobs(workspace_id, sha256, + size_bytes)` -> shared R2 object `workspaces/{wid}/blobs/sha256/{prefix}/{sha256}` + (`packages/storage/src/artifact-bytes-encryption.ts` `workspaceBlobObjectKeyFor`). +- Whole-file dedup: client sends a `(path, size, sha256)` manifest; server marks + files `reused` when the blob exists, client skips the PUT + (`packages/db/src/upload-session-lifecycle.ts:47-66`, + `packages/db/src/repository/upload-session-lifecycle.ts:84-104`). +- The CLI already streams a plaintext SHA-256 per file (`sha256HexForFile` in + `apps/cli/src/local.ts:129`). + +What is **missing** vs Git: + +1. **No commit chain.** `revisions` has no `parent_revision_id` + (`packages/db/src/schema.ts:237`). Revisions are a flat numbered list. +2. **No tree inheritance.** A new revision must re-enumerate *every* path. The + client still walks + hashes the whole directory each publish; dedup only saves + the bytes, not the enumeration or the "send the whole dir" mental model. +3. **No intra-file delta.** A blob is a whole file. One line changed in a 5 MB + file -> new SHA-256 -> full 5 MB re-upload. + +## Architecture constraint that shapes the design (the seams) + +| Seam | Owns | DB | R2 | Constraint | +| --- | --- | --- | --- | --- | +| cli / mcp | client hashing, publish verb | - | - | already hashes plaintext sha256 | +| api | durable DB writes, publish coord | yes | read-only | **only place** commit-graph / tree metadata can be written | +| upload | R2 PUT, encrypt-before-write, finalize | yes | PUT | owns reused/upload_required; per-workspace DEK | +| content | serving untrusted bytes | **no DB** | read-only | decrypts **whole** object in-memory, no Range, cannot reach patch metadata | + +Two facts decide everything: + +- **Ciphertext is not content-addressable** (random IV per encrypt). Dedup works + because the key is the *plaintext* SHA-256. Blob encryption already uses a + distinct AAD `v2 = (workspaceId, sha256)` with no path/revision binding + (`artifact-bytes-encryption.ts:7-8,27-31`), so a blob is reusable across + revisions by construction. +- **`content` has no DB and decrypts whole objects.** It can never reconstruct a + file from base + patch. So **any intra-file delta must be reconstructed on the + write path (`upload`/`jobs`) into a normal whole blob**, never on read. + +### On encryption (ADR 0063) + +Encryption defends exactly **platform-tier** risk (Cloudflare-side R2 +misconfiguration / object-store insider), as defense-in-depth over R2's own +at-rest encryption. It explicitly does **not** defend the viewer tier (a leaked +Access Link still serves the bytes). It is therefore a *posture* property, low as +a user-facing control. **We keep it as-is.** The chosen design (Option 1 below) +preserves the encryption boundary completely: deltas are reconstructed to whole +blobs before encryption, so `content` and the trust boundary never change. + +## Decision + +Build **both layers**, optimize for agent ergonomics, leave encryption untouched: + +- **Layer 1 - tree + commit chain** (file-granularity "change just this file"). +- **Option 1 intra-file delta** on top (big-file-small-edit byte savings), + reconstructed server-side into a whole blob. + +Sub-file *chunk store* / per-block AEAD / dropping encryption to R2-only are +explicitly **out of scope** and deferred until usage proves the need ("if people +use this, refactor later"). + +### Patch format (recommended) + +**Unified diff for text, whole-blob fallback for binary.** + +- Agents reason natively in unified diffs; it is the ergonomic match for the + driver. Human-readable, reviewable, and the agent already produces them. +- Binary files rarely get tiny edits; forcing a byte-splice format on agents buys + little and is fiddly. Binary changes just upload a new whole blob (status quo). +- The server applies the diff to the decrypted base blob, hashes the result, + verifies it against a client-declared `result_sha256`, and writes a new whole + blob. **Fail loud** if the patch does not apply cleanly or the result hash + mismatches (never silently fall back to a partial file). + +## Staged plan + +### Stage 0 - Write the ADR(s) first (decision on record before code) + +- The whole blob-dedup subsystem shipped **without an ADR** (commit `dea091f4`, + documented only in `data-model.md`/`api.md`). Write the retroactive ADR for + workspace-scoped content-addressed blobs. +- New ADR: "Revision commit chain + tree inheritance + server-reconstructed + intra-file delta." Record: parent pointer, partial-manifest contract, Option 1 + reconstruct-on-write, encryption left intact, chunk-store deferred. +- Done: both ADRs merged; `data-model.md` + `api.md` + `CONTEXT.md` updated so + spec stays source of truth. + +### Stage 1 - Schema: parent pointer + +- Add `revisions.parent_revision_id TEXT NULL` self-FK within the same artifact + (composite-safe, mirrors existing `(workspace_id, artifact_id, id)` unique). +- Backfill: leave NULL for existing rows (they are roots). No data migration. +- Done: migration applies clean on PGlite + Neon; index added if diff queries + need it. + +### Stage 2 - Contract: base revision + partial manifest + +- `CreateUploadSessionRequest` (in `packages/contracts`): add optional + `base_revision_id` and `deleted_paths: string[]`. When `base_revision_id` is + set, `files` becomes "changed + added only"; unlisted paths inherit from base. +- Add a per-file optional `patch` descriptor: `{ base_sha256, format: "unified", + result_sha256 }` plus the diff bytes uploaded like any file body. Absence = + whole-file upload (today's behavior). +- Validate: `base_revision_id` must belong to the same artifact + workspace; + `deleted_paths` must exist in base; patch `base_sha256` must match the base + revision's file at that path. +- Done: contract + OpenAPI regenerated; round-trip tests for partial manifest and + patch descriptor. + +### Stage 3 - api: tree inheritance at finalize/publish + +- When finalizing against `base_revision_id`: copy forward the base's + `artifact_files` rows for inherited paths (already point at shared blobs), + apply overrides + `deleted_paths`. This is the "commit = parent tree + delta" + step. Set `parent_revision_id = base_revision_id`. +- Recompute `file_count` / `size_bytes` from the merged tree. +- Done: a revision published with one changed file has a full `artifact_files` + tree but only one new blob; `parent_revision_id` set; diffing two revisions' + `artifact_files` yields the changeset. + +### Stage 4 - jobs: Option 1 reconstruct-on-write + +Reconstruction runs in **`jobs`**, not `upload`. `upload` is write-only against +R2 today (sole op is `ARTIFACTS.put`, `apps/upload/src/put.ts:150`); `jobs` is +already a read-modify-write module with the `ARTIFACTS` binding + encryption ring +(Bundle generation: `revision-file-bytes.ts` decrypts, `bundle-generate- +orchestration.ts` re-encrypts). Reconstruction is the same shape, so it belongs +in `jobs` and keeps `upload`'s narrow role intact. + +- Client uploads a unified diff for a patched path. A `jobs` task fetches + + decrypts the base blob, applies the patch, hashes -> must equal + `result_sha256`, encrypts the **whole result** as a normal blob under + `workspaceBlobObjectKeyFor(result_sha256)`. +- Model the pending window like existing async Bundle/safety-scan states: a + Revision containing a patched file is not servable until reconstruction + completes, so Publish resolves it after the `jobs` task lands (not a + finalize-blocking step). +- The resulting `artifact_files` row is an ordinary `storage_kind='blob'` row. + Nothing downstream (content, bundles, GC) needs to know a patch was involved. +- Caps: patched-result size still enforced against file/revision caps. +- Done: big-file-small-edit uploads only the diff bytes; served file is + byte-identical to applying the patch locally; `content` unchanged. + +### Stage 5 - cli/mcp: the ergonomics payoff + +- CLI caches the last published manifest per artifact (paths + sha256 + revision + id) locally. On revise: diff the working dir against the cache; send only + changed/added files + `deleted_paths` against `base_revision_id`. **Unchanged + files are not re-hashed and not re-uploaded.** +- For a changed *text* file above a size threshold, generate a unified diff + against the cached base and send the patch instead of the whole file. Below + threshold or binary: whole blob (cheaper than diff overhead). +- MCP `add_revision`: accept a partial file set + optional per-file patch, same + contract. This is the no-shell parity surface. +- Done: agent expresses "change one file" and the wire carries one diff; demo on + a multi-MB asset with a one-line edit. + +## Non-goals / deferred + +- Content-defined chunk store, per-block AEAD, Range serving, dropping to + R2-only encryption. Revisit only if a real large-file-frequent-edit workload + appears. +- Cross-workspace (global) blob dedup. Stays workspace-scoped (tenant blast + radius, per ADR 0063 reasoning). +- Diff views in the viewer. The commit chain makes it possible; not in scope. + +## Open questions + +- Patch byte threshold for choosing diff vs whole-blob upload (measure; start + conservative, e.g. only diff when `diff_size < 0.5 * file_size` AND file + > a few hundred KB). +- RESOLVED: reconstruction runs in `jobs` (seam-honest; `upload` is write-only + today, `jobs` already does the read-decrypt-transform-reencrypt-write shape). + Remaining sub-question: exact pending-state model for a Revision whose Publish + waits on reconstruction (reuse Bundle `pending` machinery vs a new state). From 571b59c20c2f8c97cd39df1a600a1a347013515e Mon Sep 17 00:00:00 2001 From: Isaac Suttell Date: Sun, 14 Jun 2026 12:40:15 -0700 Subject: [PATCH 02/14] feat(db,contracts): add revision commit chain + partial-manifest upload contract (ADR 0087 stages 1-2) Stage 1 (schema): revisions.parent_revision_id nullable column with a deferrable composite self-FK on (workspace_id, artifact_id, parent_revision_id) -> revisions(workspace_id, artifact_id, id), ON DELETE SET NULL (parent_revision_id), plus revisions_parent_idx. The composite target structurally pins a parent to the same Workspace and Artifact; the column-list SET NULL nulls only the pointer (plain SET NULL would violate the NOT NULL workspace_id/artifact_id). Deferrable because claim-reparent bulk-rewrites workspace_id across all revisions inside deferred constraints. Threaded through the Revision type, insert mapper, and mapRevision; draft creation writes NULL (Stage 3 populates from base_revision_id). Migration 0024 is idempotent (journal-less runner) and verified on PGlite + snapshot regenerated. Stage 2 (contract): CreateUploadSessionRequest gains optional base_revision_id, deleted_paths, and a per-file patch descriptor {base_sha256, format:"unified", result_sha256}. A superRefine enforces the structural rules (patch/deleted_paths require base_revision_id; deleted_paths unique; a path cannot be both uploaded and deleted; format must be unified). Stateful checks and the tree-inheritance merge / diff reconstruction are deferred to Stages 3-4. OpenAPI golden regenerated; round- trip tests added. Also fixes a pre-existing Sha256Hex /u-flag leak that serialized an invalid "^...$/u" pattern into the published upload OpenAPI (now clean in all 6 spots), and folds the ADR 0087 spec-source-of-truth updates into data-model.md (column + index), api.md (request fields + rules), and CONTEXT.md (Revision parent relationship). Co-Authored-By: Claude Opus 4.8 --- CONTEXT.md | 2 +- ...eritance-and-server-reconstructed-delta.md | 4 +- docs/ops/git-like-revisions-todo.md | 28 ++--- docs/specs/api.md | 28 +++++ docs/specs/data-model.md | 44 +++---- packages/contracts/openapi/upload.json | 78 +++++++++++- packages/contracts/src/uploadSessions.test.ts | 115 ++++++++++++++++++ packages/contracts/src/uploadSessions.ts | 75 ++++++++++-- .../0024_revisions_parent_revision_id.sql | 25 ++++ packages/db/snapshot/schema.sql | 3 + .../db/src/local-mvp-sql-executor.test.ts | 1 + packages/db/src/queries/revisions.test.ts | 11 ++ packages/db/src/queries/revisions.ts | 2 + .../repository/upload-session-lifecycle.ts | 2 + packages/db/src/schema.ts | 7 ++ packages/db/src/types.ts | 1 + 16 files changed, 378 insertions(+), 48 deletions(-) create mode 100644 packages/contracts/src/uploadSessions.test.ts create mode 100644 packages/db/migrations/0024_revisions_parent_revision_id.sql diff --git a/CONTEXT.md b/CONTEXT.md index 0333cbdf..6c9cd947 100644 --- a/CONTEXT.md +++ b/CONTEXT.md @@ -21,7 +21,7 @@ _Avoid_: Empty artifact, draft artifact **Revision**: -A saved state of an **Artifact** after creation or update. +A saved state of an **Artifact** after creation or update. A **Revision** has zero or one parent **Revision** (a commit chain within the **Artifact**); a **Revision** published against a parent may inherit unchanged files from it instead of re-uploading them. _Avoid_: Version, snapshot diff --git a/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md b/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md index 1a92d76a..a2f8f912 100644 --- a/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md +++ b/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md @@ -78,7 +78,7 @@ whole-blob. ergonomics (the agent sends only changed files) with zero encryption interaction, and is the independently shippable core (stages 1–3 of the plan). Not chosen as the endpoint because a big file with a one-line edit still - re-uploads in full; chosen as the *foundation* that intra-file delta sits on. + re-uploads in full; chosen as the _foundation_ that intra-file delta sits on. - **Intra-file delta only; no tree inheritance.** Rejected. Without inheritance the agent still re-enumerates every file each Revision, so the ergonomic win is small and the hardest piece (reconstruction) carries the least benefit. @@ -123,7 +123,7 @@ whole-blob. a stored object, even though it holds the DEK. Putting reconstruction there would turn a write-only module into a read-modify-write one and make base-blob decrypt a live path on the hot upload route, widening its blast - radius. `jobs` is *already* a read-modify-write module with the `ARTIFACTS` + radius. `jobs` is _already_ a read-modify-write module with the `ARTIFACTS` binding and the encryption ring: Bundle generation reads revision files, decrypts (`revision-file-bytes.ts`), and re-encrypts the output (`bundle-generate-orchestration.ts`). Reconstruction is the same operation diff --git a/docs/ops/git-like-revisions-todo.md b/docs/ops/git-like-revisions-todo.md index 5ff32ce6..b1561eae 100644 --- a/docs/ops/git-like-revisions-todo.md +++ b/docs/ops/git-like-revisions-todo.md @@ -1,7 +1,7 @@ # Git-like revisions: tree-inheritance + intra-file delta Design + staged plan for making revision storage behave more like Git so agents -can express *small changes to a file* instead of re-submitting the whole tree, +can express _small changes to a file_ instead of re-submitting the whole tree, and so a big file getting a small edit does not re-upload the whole file. Owner: Isaac. Drafted 2026-06-14. Status: design accepted, not yet implemented. @@ -13,7 +13,7 @@ and naturally is the point; byte savings are secondary). The blob layer is **already half-Git**: - Content-addressed whole-file blobs: `content_blobs(workspace_id, sha256, - size_bytes)` -> shared R2 object `workspaces/{wid}/blobs/sha256/{prefix}/{sha256}` +size_bytes)` -> shared R2 object `workspaces/{wid}/blobs/sha256/{prefix}/{sha256}` (`packages/storage/src/artifact-bytes-encryption.ts` `workspaceBlobObjectKeyFor`). - Whole-file dedup: client sends a `(path, size, sha256)` manifest; server marks files `reused` when the blob exists, client skips the PUT @@ -26,7 +26,7 @@ What is **missing** vs Git: 1. **No commit chain.** `revisions` has no `parent_revision_id` (`packages/db/src/schema.ts:237`). Revisions are a flat numbered list. -2. **No tree inheritance.** A new revision must re-enumerate *every* path. The +2. **No tree inheritance.** A new revision must re-enumerate _every_ path. The client still walks + hashes the whole directory each publish; dedup only saves the bytes, not the enumeration or the "send the whole dir" mental model. 3. **No intra-file delta.** A blob is a whole file. One line changed in a 5 MB @@ -34,17 +34,17 @@ What is **missing** vs Git: ## Architecture constraint that shapes the design (the seams) -| Seam | Owns | DB | R2 | Constraint | -| --- | --- | --- | --- | --- | -| cli / mcp | client hashing, publish verb | - | - | already hashes plaintext sha256 | -| api | durable DB writes, publish coord | yes | read-only | **only place** commit-graph / tree metadata can be written | -| upload | R2 PUT, encrypt-before-write, finalize | yes | PUT | owns reused/upload_required; per-workspace DEK | -| content | serving untrusted bytes | **no DB** | read-only | decrypts **whole** object in-memory, no Range, cannot reach patch metadata | +| Seam | Owns | DB | R2 | Constraint | +| --------- | -------------------------------------- | --------- | --------- | -------------------------------------------------------------------------- | +| cli / mcp | client hashing, publish verb | - | - | already hashes plaintext sha256 | +| api | durable DB writes, publish coord | yes | read-only | **only place** commit-graph / tree metadata can be written | +| upload | R2 PUT, encrypt-before-write, finalize | yes | PUT | owns reused/upload_required; per-workspace DEK | +| content | serving untrusted bytes | **no DB** | read-only | decrypts **whole** object in-memory, no Range, cannot reach patch metadata | Two facts decide everything: - **Ciphertext is not content-addressable** (random IV per encrypt). Dedup works - because the key is the *plaintext* SHA-256. Blob encryption already uses a + because the key is the _plaintext_ SHA-256. Blob encryption already uses a distinct AAD `v2 = (workspaceId, sha256)` with no path/revision binding (`artifact-bytes-encryption.ts:7-8,27-31`), so a blob is reusable across revisions by construction. @@ -57,7 +57,7 @@ Two facts decide everything: Encryption defends exactly **platform-tier** risk (Cloudflare-side R2 misconfiguration / object-store insider), as defense-in-depth over R2's own at-rest encryption. It explicitly does **not** defend the viewer tier (a leaked -Access Link still serves the bytes). It is therefore a *posture* property, low as +Access Link still serves the bytes). It is therefore a _posture_ property, low as a user-facing control. **We keep it as-is.** The chosen design (Option 1 below) preserves the encryption boundary completely: deltas are reconstructed to whole blobs before encryption, so `content` and the trust boundary never change. @@ -70,7 +70,7 @@ Build **both layers**, optimize for agent ergonomics, leave encryption untouched - **Option 1 intra-file delta** on top (big-file-small-edit byte savings), reconstructed server-side into a whole blob. -Sub-file *chunk store* / per-block AEAD / dropping encryption to R2-only are +Sub-file _chunk store_ / per-block AEAD / dropping encryption to R2-only are explicitly **out of scope** and deferred until usage proves the need ("if people use this, refactor later"). @@ -114,7 +114,7 @@ use this, refactor later"). `base_revision_id` and `deleted_paths: string[]`. When `base_revision_id` is set, `files` becomes "changed + added only"; unlisted paths inherit from base. - Add a per-file optional `patch` descriptor: `{ base_sha256, format: "unified", - result_sha256 }` plus the diff bytes uploaded like any file body. Absence = +result_sha256 }` plus the diff bytes uploaded like any file body. Absence = whole-file upload (today's behavior). - Validate: `base_revision_id` must belong to the same artifact + workspace; `deleted_paths` must exist in base; patch `base_sha256` must match the base @@ -162,7 +162,7 @@ in `jobs` and keeps `upload`'s narrow role intact. id) locally. On revise: diff the working dir against the cache; send only changed/added files + `deleted_paths` against `base_revision_id`. **Unchanged files are not re-hashed and not re-uploaded.** -- For a changed *text* file above a size threshold, generate a unified diff +- For a changed _text_ file above a size threshold, generate a unified diff against the cached base and send the patch instead of the whole file. Below threshold or binary: whole blob (cheaper than diff overhead). - MCP `add_revision`: accept a partial file set + optional per-file patch, same diff --git a/docs/specs/api.md b/docs/specs/api.md index 796a41f2..598c7b78 100644 --- a/docs/specs/api.md +++ b/docs/specs/api.md @@ -100,11 +100,23 @@ Authenticated `api` and `upload` routes enforce guards in a fixed order "title": "demo", "entrypoint": "index.html", "render_mode": "html", + "base_revision_id": "rev_...", + "deleted_paths": ["old/page.html"], "files": [ { "path": "index.html", "size_bytes": 12345, "sha256": "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824" + }, + { + "path": "big.txt", + "size_bytes": 240, + "sha256": "", + "patch": { + "base_sha256": "", + "format": "unified", + "result_sha256": "" + } } ] } @@ -132,6 +144,22 @@ Rules: - `sha256` is optional for compatibility. New CLI/MCP clients send lowercase hex SHA-256 for each file. Legacy clients that omit it keep the full-upload revision-object path and do not participate in deduplication. +- `base_revision_id`, `deleted_paths`, and per-file `patch` are the optional + commit-chain / partial-manifest inputs ([ADR 0087](../adr/0087-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)). + When `base_revision_id` is set, `files` lists only changed and added paths, + `deleted_paths` drops paths, and every other path inherits from the base + Revision by reference. A per-file `patch` (`{ base_sha256, format: "unified", +result_sha256 }`) means the bytes uploaded for that entry are a unified diff + rather than the whole file: `size_bytes`/`sha256` describe the diff, + `base_sha256` is the digest of that path in the base Revision the diff applies + to, and `result_sha256` is the digest of the whole reconstructed file the + server produces and verifies. Structural rules enforced at request validation: + `patch` and `deleted_paths` require `base_revision_id`; `deleted_paths` is + unique; a path cannot be both uploaded and deleted; `format` must be + `unified`. Stateful checks (the base belongs to the same Workspace/Artifact, a + deleted path exists in the base, a patch `base_sha256` matches the base file) + and the tree-inheritance merge and diff reconstruction are applied + server-side at finalize/publish. ### `CreateUploadSessionResponse` diff --git a/docs/specs/data-model.md b/docs/specs/data-model.md index e2d73667..a04d841c 100644 --- a/docs/specs/data-model.md +++ b/docs/specs/data-model.md @@ -91,27 +91,28 @@ Unpinning re-arms the stored `expires_at` as-is. First-class revision rows for multi-revision Artifacts ([0009](../../packages/db/migrations/0009_revisions.sql)). Upload finalize creates a `draft`; publish assigns `revision_number`, sets `published_at`, and updates `artifacts.revision_id`. -| Column | Type | Notes | -| -------------------------- | ------------------------------------------------------------ | -------------------------------------------------------------------------------------- | -| `id` | `TEXT PRIMARY KEY` | `rev_...`. | -| `workspace_id` | `UUID NOT NULL REFERENCES workspaces(id) ON DELETE RESTRICT` | Tenant scope. | -| `artifact_id` | `TEXT NOT NULL REFERENCES artifacts(id) ON DELETE CASCADE` | Parent Artifact; deleting the Artifact deletes its revisions. | -| `revision_number` | `INTEGER NULL` | Assigned on publish; unique per Artifact when not null. Null while `status = 'draft'`. | -| `status` | `TEXT NOT NULL` | `draft`, `published`, or `retained`. | -| `entrypoint` | `TEXT NOT NULL` | Normalized file path. | -| `render_mode` | `TEXT NOT NULL DEFAULT 'html'` | `html`, `markdown`, `text`, `image`, `audio`, or `video`. | -| `file_count` | `INTEGER NOT NULL` | | -| `size_bytes` | `BIGINT NOT NULL` | Total uploaded bytes for this revision. | -| `bundle_status` | `TEXT NOT NULL DEFAULT 'disabled'` | `pending`, `ready`, `failed`, or `disabled`. | -| `bundle_status_updated_at` | `TIMESTAMPTZ NULL` | | -| `bundle_size_bytes` | `BIGINT NULL` | Encrypted bundle size when `bundle_status = 'ready'`. | -| `bytes_purge_enqueued_at` | `TIMESTAMPTZ NULL` | Set when byte purge is queued for a `retained` revision. | -| `created_by_type` | `TEXT NOT NULL` | `api_key` or `member`. | -| `created_by_id` | `TEXT NOT NULL` | Creator id for the stored type. | -| `created_at` | `TIMESTAMPTZ NOT NULL` | | -| `published_at` | `TIMESTAMPTZ NULL` | Set when `status` becomes `published`. | - -At most one `draft` row per Artifact (`revisions_one_draft_per_artifact`). Composite unique `(workspace_id, artifact_id, id)` supports tenant-safe foreign keys from `access_links` and `safety_warnings`. +| Column | Type | Notes | +| -------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `id` | `TEXT PRIMARY KEY` | `rev_...`. | +| `workspace_id` | `UUID NOT NULL REFERENCES workspaces(id) ON DELETE RESTRICT` | Tenant scope. | +| `artifact_id` | `TEXT NOT NULL REFERENCES artifacts(id) ON DELETE CASCADE` | Parent Artifact; deleting the Artifact deletes its revisions. | +| `parent_revision_id` | `TEXT NULL` | Commit-chain parent ([0087](../adr/0087-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)); deferrable self-FK on `(workspace_id, artifact_id, parent_revision_id)` → `revisions(workspace_id, artifact_id, id)`, `ON DELETE SET NULL (parent_revision_id)`. `NULL` for roots. | +| `revision_number` | `INTEGER NULL` | Assigned on publish; unique per Artifact when not null. Null while `status = 'draft'`. | +| `status` | `TEXT NOT NULL` | `draft`, `published`, or `retained`. | +| `entrypoint` | `TEXT NOT NULL` | Normalized file path. | +| `render_mode` | `TEXT NOT NULL DEFAULT 'html'` | `html`, `markdown`, `text`, `image`, `audio`, or `video`. | +| `file_count` | `INTEGER NOT NULL` | | +| `size_bytes` | `BIGINT NOT NULL` | Total uploaded bytes for this revision. | +| `bundle_status` | `TEXT NOT NULL DEFAULT 'disabled'` | `pending`, `ready`, `failed`, or `disabled`. | +| `bundle_status_updated_at` | `TIMESTAMPTZ NULL` | | +| `bundle_size_bytes` | `BIGINT NULL` | Encrypted bundle size when `bundle_status = 'ready'`. | +| `bytes_purge_enqueued_at` | `TIMESTAMPTZ NULL` | Set when byte purge is queued for a `retained` revision. | +| `created_by_type` | `TEXT NOT NULL` | `api_key` or `member`. | +| `created_by_id` | `TEXT NOT NULL` | Creator id for the stored type. | +| `created_at` | `TIMESTAMPTZ NOT NULL` | | +| `published_at` | `TIMESTAMPTZ NULL` | Set when `status` becomes `published`. | + +At most one `draft` row per Artifact (`revisions_one_draft_per_artifact`). Composite unique `(workspace_id, artifact_id, id)` supports tenant-safe foreign keys from `access_links`, `safety_warnings`, and the `parent_revision_id` self-reference (which structurally pins a parent to the same Workspace and Artifact). ### `artifact_files` @@ -323,6 +324,7 @@ KV values do not contain token material. - `revisions(artifact_id, revision_number) UNIQUE WHERE revision_number IS NOT NULL` - `revisions(artifact_id) UNIQUE WHERE status = 'draft'` - `revisions(artifact_id, created_at DESC)` +- `revisions(workspace_id, artifact_id, parent_revision_id)` - `artifact_files(artifact_id, revision_id, path) PRIMARY KEY` - `artifact_files(workspace_id, sha256, size_bytes)` - `safety_warnings(workspace_id, revision_id)` diff --git a/packages/contracts/openapi/upload.json b/packages/contracts/openapi/upload.json index 3a8b4e9b..42fc1d8a 100644 --- a/packages/contracts/openapi/upload.json +++ b/packages/contracts/openapi/upload.json @@ -271,6 +271,10 @@ "type": "string", "pattern": "^art_[0-9A-HJKMNP-TV-Z]{26}$" }, + "base_revision_id": { + "type": "string", + "pattern": "^rev_[0-9A-HJKMNP-TV-Z]{26}$" + }, "title": { "type": "string", "minLength": 1, @@ -292,6 +296,15 @@ "video" ] }, + "deleted_paths": { + "type": "array", + "items": { + "type": "string", + "minLength": 1, + "maxLength": 4096 + }, + "maxItems": 100 + }, "files": { "type": "array", "items": { @@ -309,7 +322,31 @@ }, "sha256": { "type": "string", - "pattern": "^[a-f0-9]{64}$/u" + "pattern": "^[a-f0-9]{64}$" + }, + "patch": { + "type": "object", + "properties": { + "base_sha256": { + "type": "string", + "pattern": "^[a-f0-9]{64}$" + }, + "format": { + "type": "string", + "enum": [ + "unified" + ] + }, + "result_sha256": { + "type": "string", + "pattern": "^[a-f0-9]{64}$" + } + }, + "required": [ + "base_sha256", + "format", + "result_sha256" + ] } }, "required": [ @@ -873,6 +910,10 @@ "type": "string", "pattern": "^art_[0-9A-HJKMNP-TV-Z]{26}$" }, + "base_revision_id": { + "type": "string", + "pattern": "^rev_[0-9A-HJKMNP-TV-Z]{26}$" + }, "title": { "type": "string", "minLength": 1, @@ -894,6 +935,15 @@ "video" ] }, + "deleted_paths": { + "type": "array", + "items": { + "type": "string", + "minLength": 1, + "maxLength": 4096 + }, + "maxItems": 100 + }, "files": { "type": "array", "items": { @@ -911,7 +961,31 @@ }, "sha256": { "type": "string", - "pattern": "^[a-f0-9]{64}$/u" + "pattern": "^[a-f0-9]{64}$" + }, + "patch": { + "type": "object", + "properties": { + "base_sha256": { + "type": "string", + "pattern": "^[a-f0-9]{64}$" + }, + "format": { + "type": "string", + "enum": [ + "unified" + ] + }, + "result_sha256": { + "type": "string", + "pattern": "^[a-f0-9]{64}$" + } + }, + "required": [ + "base_sha256", + "format", + "result_sha256" + ] } }, "required": [ diff --git a/packages/contracts/src/uploadSessions.test.ts b/packages/contracts/src/uploadSessions.test.ts new file mode 100644 index 00000000..9b3c8fe0 --- /dev/null +++ b/packages/contracts/src/uploadSessions.test.ts @@ -0,0 +1,115 @@ +import { describe, expect, it } from "vitest"; +import { CreateUploadSessionRequest } from "./uploadSessions.js"; + +const sha = (char: string) => char.repeat(64); +const baseRevisionId = "rev_01HZY7Q8X9Y2S3T4V5W6X7Y8Z9"; + +function baseRequest(overrides: Record = {}) { + return { + title: "doc", + entrypoint: "index.html", + files: [{ path: "index.html", size_bytes: 12, sha256: sha("a") }], + ...overrides, + }; +} + +describe("CreateUploadSessionRequest partial-manifest + patch", () => { + it("accepts a base_revision_id with deleted_paths and a whole-file change", () => { + const parsed = CreateUploadSessionRequest.parse( + baseRequest({ + base_revision_id: baseRevisionId, + deleted_paths: ["old/page.html"], + files: [{ path: "index.html", size_bytes: 20, sha256: sha("b") }], + }), + ); + expect(parsed.base_revision_id).toBe(baseRevisionId); + expect(parsed.deleted_paths).toEqual(["old/page.html"]); + }); + + it("accepts a per-file unified patch against a base revision", () => { + const parsed = CreateUploadSessionRequest.parse( + baseRequest({ + base_revision_id: baseRevisionId, + files: [ + { + path: "big.txt", + size_bytes: 30, + sha256: sha("c"), + patch: { base_sha256: sha("d"), format: "unified", result_sha256: sha("e") }, + }, + ], + }), + ); + expect(parsed.files[0]?.patch).toEqual({ + base_sha256: sha("d"), + format: "unified", + result_sha256: sha("e"), + }); + }); + + it("rejects a patch with no base_revision_id", () => { + const result = CreateUploadSessionRequest.safeParse( + baseRequest({ + files: [ + { + path: "big.txt", + size_bytes: 30, + sha256: sha("c"), + patch: { base_sha256: sha("d"), format: "unified", result_sha256: sha("e") }, + }, + ], + }), + ); + expect(result.success).toBe(false); + }); + + it("rejects deleted_paths with no base_revision_id", () => { + const result = CreateUploadSessionRequest.safeParse(baseRequest({ deleted_paths: ["gone.html"] })); + expect(result.success).toBe(false); + }); + + it("rejects a non-unified patch format", () => { + const result = CreateUploadSessionRequest.safeParse( + baseRequest({ + base_revision_id: baseRevisionId, + files: [ + { + path: "big.bin", + size_bytes: 30, + sha256: sha("c"), + patch: { base_sha256: sha("d"), format: "binary", result_sha256: sha("e") }, + }, + ], + }), + ); + expect(result.success).toBe(false); + }); + + it("rejects a path that is both uploaded and deleted", () => { + const result = CreateUploadSessionRequest.safeParse( + baseRequest({ + base_revision_id: baseRevisionId, + deleted_paths: ["index.html"], + files: [{ path: "index.html", size_bytes: 12, sha256: sha("a") }], + }), + ); + expect(result.success).toBe(false); + }); + + it("rejects duplicate deleted_paths", () => { + const result = CreateUploadSessionRequest.safeParse( + baseRequest({ + base_revision_id: baseRevisionId, + deleted_paths: ["dup.html", "dup.html"], + }), + ); + expect(result.success).toBe(false); + }); + + it("still accepts a legacy whole-tree manifest with no base_revision_id", () => { + const parsed = CreateUploadSessionRequest.parse(baseRequest()); + expect(parsed.base_revision_id).toBeUndefined(); + expect(parsed.deleted_paths).toBeUndefined(); + expect(parsed.files[0]).not.toHaveProperty("patch"); + }); +}); diff --git a/packages/contracts/src/uploadSessions.ts b/packages/contracts/src/uploadSessions.ts index 4201dc4f..979c30df 100644 --- a/packages/contracts/src/uploadSessions.ts +++ b/packages/contracts/src/uploadSessions.ts @@ -13,13 +13,27 @@ import { import { RenderMode } from "./revisions.js"; import { z } from "./zod.js"; -export const Sha256Hex = z.string().regex(/^[a-f0-9]{64}$/u); +export const Sha256Hex = z.string().regex(/^[a-f0-9]{64}$/); export type Sha256Hex = z.infer; +// A changed file may arrive as a patch against a base Revision's file (ADR 0087) +// instead of whole bytes. When present, the bytes uploaded for this file entry are +// the diff (so the entry's size_bytes/sha256 describe the diff), base_sha256 is the +// digest of the file in the base Revision the diff applies to, and result_sha256 is +// the digest of the whole reconstructed file the server must produce and verify. +// Only the unified-diff text format is supported; binary changes upload whole bytes. +export const UploadSessionFilePatch = z.object({ + base_sha256: Sha256Hex, + format: z.literal("unified"), + result_sha256: Sha256Hex, +}); +export type UploadSessionFilePatch = z.infer; + export const UploadSessionFileInput = z.object({ path: FilePath, size_bytes: z.number().int().nonnegative().max(Mebibytes.twentyFive), sha256: Sha256Hex.optional(), + patch: UploadSessionFilePatch.optional(), }); export type UploadSessionFileInput = z.infer; @@ -27,13 +41,58 @@ export type UploadSessionFileInput = z.infer; // client input. Clients (CLI, MCP) cannot request or influence artifact lifetime. // render_mode is an explicit client override; when absent the server infers it // from the entrypoint extension at publish time. -export const CreateUploadSessionRequest = z.object({ - artifact_id: ArtifactId.optional(), - title: PlainTextTitle, - entrypoint: FilePath, - render_mode: RenderMode.optional(), - files: z.array(UploadSessionFileInput).min(1).max(100), -}); +// base_revision_id turns this into a partial-manifest publish (ADR 0087): files +// lists only changed/added paths, deleted_paths drops paths, and every other path +// inherits from the base Revision by reference. deleted_paths and per-file patches +// are only meaningful against a base; structural checks live here, while stateful +// checks (base belongs to the workspace/artifact, deleted path exists in the base, +// patch base_sha256 matches the base file) are enforced server-side at finalize. +export const CreateUploadSessionRequest = z + .object({ + artifact_id: ArtifactId.optional(), + base_revision_id: RevisionId.optional(), + title: PlainTextTitle, + entrypoint: FilePath, + render_mode: RenderMode.optional(), + deleted_paths: z.array(FilePath).max(100).optional(), + files: z.array(UploadSessionFileInput).min(1).max(100), + }) + .superRefine((request, ctx) => { + if (request.base_revision_id === undefined) { + if (request.deleted_paths !== undefined) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + path: ["deleted_paths"], + message: "deleted_paths requires base_revision_id", + }); + } + const patchedIndex = request.files.findIndex((file) => file.patch !== undefined); + if (patchedIndex !== -1) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + path: ["files", patchedIndex, "patch"], + message: "patch requires base_revision_id", + }); + } + } + const deleted = new Set(request.deleted_paths ?? []); + if (deleted.size !== (request.deleted_paths?.length ?? 0)) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + path: ["deleted_paths"], + message: "deleted_paths must be unique", + }); + } + request.files.forEach((file, index) => { + if (deleted.has(file.path)) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + path: ["files", index, "path"], + message: "a path cannot be both uploaded and deleted", + }); + } + }); + }); export type CreateUploadSessionRequest = z.infer; export const UploadRequiredTarget = z.object({ diff --git a/packages/db/migrations/0024_revisions_parent_revision_id.sql b/packages/db/migrations/0024_revisions_parent_revision_id.sql new file mode 100644 index 00000000..55e4f733 --- /dev/null +++ b/packages/db/migrations/0024_revisions_parent_revision_id.sql @@ -0,0 +1,25 @@ +begin; + +-- Revision commit chain (ADR 0087): a Revision may point at the Revision it was +-- published against. NULL means a root (every pre-existing row is a root; no +-- backfill). The composite self-FK references (workspace_id, artifact_id, id) so +-- a parent is structurally guaranteed to live in the same Workspace and Artifact. +alter table revisions + add column if not exists parent_revision_id text; + +-- Deferrable like the other composite FKs onto revisions (see 0020): claim +-- reparent rewrites workspace_id across every revision row in one statement, so +-- the self-FK must defer its check to commit or the bulk update transiently +-- violates it. +alter table revisions + drop constraint if exists revisions_parent_fk, + add constraint revisions_parent_fk + foreign key (workspace_id, artifact_id, parent_revision_id) + references revisions(workspace_id, artifact_id, id) + on delete set null (parent_revision_id) + deferrable initially deferred; + +create index if not exists revisions_parent_idx + on revisions(workspace_id, artifact_id, parent_revision_id); + +commit; diff --git a/packages/db/snapshot/schema.sql b/packages/db/snapshot/schema.sql index df19080d..e6a5185a 100644 --- a/packages/db/snapshot/schema.sql +++ b/packages/db/snapshot/schema.sql @@ -138,6 +138,7 @@ CREATE TABLE "revisions" ( "id" text PRIMARY KEY NOT NULL, "workspace_id" uuid NOT NULL, "artifact_id" text NOT NULL, + "parent_revision_id" text, "revision_number" integer, "status" text NOT NULL, "entrypoint" text NOT NULL, @@ -277,6 +278,7 @@ ALTER TABLE "content_blobs" ADD CONSTRAINT "content_blobs_workspace_id_workspace ALTER TABLE "operation_events" ADD CONSTRAINT "operation_events_workspace_id_workspaces_id_fk" FOREIGN KEY ("workspace_id") REFERENCES "public"."workspaces"("id") ON DELETE restrict ON UPDATE no action; ALTER TABLE "revisions" ADD CONSTRAINT "revisions_workspace_id_workspaces_id_fk" FOREIGN KEY ("workspace_id") REFERENCES "public"."workspaces"("id") ON DELETE restrict ON UPDATE no action; ALTER TABLE "revisions" ADD CONSTRAINT "revisions_artifact_id_artifacts_id_fk" FOREIGN KEY ("artifact_id") REFERENCES "public"."artifacts"("id") ON DELETE cascade ON UPDATE no action; +ALTER TABLE "revisions" ADD CONSTRAINT "revisions_parent_fk" FOREIGN KEY ("workspace_id","artifact_id","parent_revision_id") REFERENCES "public"."revisions"("workspace_id","artifact_id","id") ON DELETE set null ON UPDATE no action; ALTER TABLE "safety_warnings" ADD CONSTRAINT "safety_warnings_workspace_id_workspaces_id_fk" FOREIGN KEY ("workspace_id") REFERENCES "public"."workspaces"("id") ON DELETE restrict ON UPDATE no action; ALTER TABLE "safety_warnings" ADD CONSTRAINT "safety_warnings_revision_fk" FOREIGN KEY ("workspace_id","artifact_id","revision_id") REFERENCES "public"."revisions"("workspace_id","artifact_id","id") ON DELETE cascade ON UPDATE no action; ALTER TABLE "upload_session_files" ADD CONSTRAINT "upload_session_files_workspace_id_workspaces_id_fk" FOREIGN KEY ("workspace_id") REFERENCES "public"."workspaces"("id") ON DELETE restrict ON UPDATE no action; @@ -303,6 +305,7 @@ CREATE INDEX "revisions_workspace_idx" ON "revisions" USING btree ("workspace_id CREATE UNIQUE INDEX "revisions_workspace_artifact_id_unique" ON "revisions" USING btree ("workspace_id","artifact_id","id"); CREATE UNIQUE INDEX "revisions_artifact_number_unique" ON "revisions" USING btree ("artifact_id","revision_number") WHERE "revisions"."revision_number" is not null; CREATE UNIQUE INDEX "revisions_one_draft_per_artifact" ON "revisions" USING btree ("artifact_id") WHERE "revisions"."status" = 'draft'; +CREATE INDEX "revisions_parent_idx" ON "revisions" USING btree ("workspace_id","artifact_id","parent_revision_id"); CREATE INDEX "safety_warnings_revision_idx" ON "safety_warnings" USING btree ("workspace_id","revision_id"); CREATE INDEX "safety_warnings_scanner_idx" ON "safety_warnings" USING btree ("workspace_id","revision_id","scanner_id"); CREATE INDEX "stripe_webhook_events_processed_idx" ON "stripe_webhook_events" USING btree ("processed_at"); diff --git a/packages/db/src/local-mvp-sql-executor.test.ts b/packages/db/src/local-mvp-sql-executor.test.ts index c7f18dcc..81867a89 100644 --- a/packages/db/src/local-mvp-sql-executor.test.ts +++ b/packages/db/src/local-mvp-sql-executor.test.ts @@ -12,6 +12,7 @@ function seedPublishedRevision(state: ReturnType) { id: revisionId, workspace_id: workspaceId, artifact_id: artifactId, + parent_revision_id: null, revision_number: 1, status: "published", entrypoint: "index.html", diff --git a/packages/db/src/queries/revisions.test.ts b/packages/db/src/queries/revisions.test.ts index 01ce775a..f3c22b72 100644 --- a/packages/db/src/queries/revisions.test.ts +++ b/packages/db/src/queries/revisions.test.ts @@ -51,6 +51,14 @@ describe("revisionQueries", () => { expect(db.writes.length).toBeGreaterThan(0); }); + it("round-trips the parent revision pointer through findById", async () => { + const db = fakeDrizzle([[revisionRow({ id: "rev_child", parentRevisionId: "rev_parent" })]]); + await expect(revisionQueries.findById(db, "rev_child")).resolves.toMatchObject({ + id: "rev_child", + parent_revision_id: "rev_parent", + }); + }); + it("returns null or false for missing rows", async () => { const db = fakeDrizzle([[], [], [], [{ max: 0 }], []]); await expect(revisionQueries.findById(db, "missing")).resolves.toBeNull(); @@ -72,6 +80,7 @@ function revisionEntity(overrides: Partial = {}): Revision { id: "rev_pub", workspace_id: "workspace_1", artifact_id: "artifact_1", + parent_revision_id: null, revision_number: 1, status: "published", entrypoint: "index.html", @@ -94,6 +103,7 @@ function revisionRow( overrides: Partial<{ id: string; status: string; + parentRevisionId: string | null; revisionNumber: number | null; publishedAt: Date | null; bundleStatusUpdatedAt: Date | null; @@ -104,6 +114,7 @@ function revisionRow( id: "rev_pub", workspaceId: "workspace_1", artifactId: "artifact_1", + parentRevisionId: null, revisionNumber: 1, status: "published", entrypoint: "index.html", diff --git a/packages/db/src/queries/revisions.ts b/packages/db/src/queries/revisions.ts index d5008925..8527e206 100644 --- a/packages/db/src/queries/revisions.ts +++ b/packages/db/src/queries/revisions.ts @@ -9,6 +9,7 @@ export const revisionQueries = { id: row.id, workspaceId: row.workspace_id, artifactId: row.artifact_id, + parentRevisionId: row.parent_revision_id, revisionNumber: row.revision_number, status: row.status, entrypoint: row.entrypoint, @@ -111,6 +112,7 @@ function mapRevision(row: typeof revisions.$inferSelect): Revision { id: row.id, workspace_id: row.workspaceId, artifact_id: row.artifactId, + parent_revision_id: row.parentRevisionId, revision_number: row.revisionNumber, status: row.status as Revision["status"], entrypoint: row.entrypoint, diff --git a/packages/db/src/repository/upload-session-lifecycle.ts b/packages/db/src/repository/upload-session-lifecycle.ts index 5eb07cc0..bd7c9dce 100644 --- a/packages/db/src/repository/upload-session-lifecycle.ts +++ b/packages/db/src/repository/upload-session-lifecycle.ts @@ -222,6 +222,8 @@ export async function finalizeUploadSessionInEntities( id: session.revision_id, workspace_id: session.workspace_id, artifact_id: session.artifact_id, + // Set when publishing against a base Revision (ADR 0087 tree inheritance, Stage 3). + parent_revision_id: null, revision_number: null, status: "draft", entrypoint: session.entrypoint, diff --git a/packages/db/src/schema.ts b/packages/db/src/schema.ts index dbbc791b..15e5d30b 100644 --- a/packages/db/src/schema.ts +++ b/packages/db/src/schema.ts @@ -244,6 +244,7 @@ export const revisions = pgTable( artifactId: text("artifact_id") .notNull() .references(() => artifacts.id, { onDelete: "cascade" }), + parentRevisionId: text("parent_revision_id"), revisionNumber: integer("revision_number"), status: text("status").notNull(), entrypoint: text("entrypoint").notNull(), @@ -274,6 +275,12 @@ export const revisions = pgTable( ), check("revisions_bundle_status_check", sql`${table.bundleStatus} in ('pending', 'ready', 'failed', 'disabled')`), check("revisions_created_by_type_check", sql`${table.createdByType} in ('api_key', 'member')`), + index("revisions_parent_idx").on(table.workspaceId, table.artifactId, table.parentRevisionId), + foreignKey({ + name: "revisions_parent_fk", + columns: [table.workspaceId, table.artifactId, table.parentRevisionId], + foreignColumns: [table.workspaceId, table.artifactId, table.id], + }).onDelete("set null"), ], ); diff --git a/packages/db/src/types.ts b/packages/db/src/types.ts index 78400d7f..c8e2565c 100644 --- a/packages/db/src/types.ts +++ b/packages/db/src/types.ts @@ -133,6 +133,7 @@ export type Revision = { id: string; workspace_id: string; artifact_id: string; + parent_revision_id: string | null; revision_number: number | null; status: RevisionStatus; entrypoint: string; From 27c2010b04915b97309e6db11ed9e73230ecd51a Mon Sep 17 00:00:00 2001 From: Isaac Suttell Date: Sun, 14 Jun 2026 13:52:44 -0700 Subject: [PATCH 03/14] feat(api,db): tree inheritance at finalize for revision commit chain (ADR 0087 stage 3) Publishing against base_revision_id now inherits the base Revision's unchanged blob-backed files instead of re-uploading them: a one-file change yields a full artifact_files tree with one new blob and parent_revision_id set. The merge runs at finalize (mergeBaseRevisionTree), recomputes file_count/size_bytes from the merged tree, and re-runs validateUpload (caps + entrypoint) against it. Stateful validation deferred from the Stage 2 contract now lands server-side (6 new repo error codes -> invalid_request): published-base-only, same workspace (base_revision_not_found) and same artifact (base_revision_artifact_mismatch, fired before the composite parent FK would 500), deleted-path-in-base, patch base_sha256 match, and blob-backed-only inheritance (a revision-scoped base path is not refcount-protected, so it is rejected rather than dangled). Patch descriptors (patch_base_sha256/patch_result_sha256) are recorded and validated on upload_session_files with the diff uploaded as a revision object (sha256 omitted from the signed PUT), but finalize fails loud (patch_reconstruction_unavailable) until jobs reconstruction lands in Stage 4 - otherwise the raw diff bytes would be served as the file. A file may not declare both a whole-file sha256 and a patch. Carriers: upload_sessions.base_revision_id + deleted_paths, the two patch columns on upload_session_files (migration 0025, idempotent). Specs updated alongside. Co-Authored-By: Claude Opus 4.8 --- apps/upload/src/create-session.ts | 3 + ...eritance-and-server-reconstructed-delta.md | 39 ++ docs/ops/git-like-revisions-todo.md | 10 +- docs/specs/api.md | 17 +- docs/specs/data-model.md | 71 +- .../contracts/src/routes/registry.storage.ts | 5 + packages/contracts/src/uploadSessions.test.ts | 17 +- packages/contracts/src/uploadSessions.ts | 17 +- ...upload_session_base_revision_and_patch.sql | 36 + packages/db/snapshot/schema.sql | 7 +- packages/db/src/index.test.ts | 647 ++++++++++++++++++ packages/db/src/queries/upload-sessions.ts | 8 + packages/db/src/repository-error.ts | 14 + .../repository/upload-session-lifecycle.ts | 270 ++++++-- .../workflows/upload-publish-workflow.ts | 4 + packages/db/src/schema.ts | 16 + packages/db/src/types.ts | 8 + packages/db/src/validation.ts | 8 +- 18 files changed, 1107 insertions(+), 90 deletions(-) create mode 100644 packages/db/migrations/0025_upload_session_base_revision_and_patch.sql diff --git a/apps/upload/src/create-session.ts b/apps/upload/src/create-session.ts index 7726a497..2d9cab02 100644 --- a/apps/upload/src/create-session.ts +++ b/apps/upload/src/create-session.ts @@ -37,13 +37,16 @@ export async function createUploadSession( path: file.path, size_bytes: file.size_bytes, ...(file.sha256 ? { sha256: file.sha256 } : {}), + ...(file.patch ? { patch: file.patch } : {}), })); const createRequest = { entrypoint: body.entrypoint, files, ...(body.title === undefined ? {} : { title: body.title }), ...(body.artifact_id === undefined ? {} : { artifact_id: body.artifact_id }), + ...(body.base_revision_id === undefined ? {} : { base_revision_id: body.base_revision_id }), ...(body.render_mode === undefined ? {} : { render_mode: body.render_mode }), + ...(body.deleted_paths === undefined ? {} : { deleted_paths: body.deleted_paths }), }; let session: UploadSessionRecord; diff --git a/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md b/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md index a2f8f912..996c7e20 100644 --- a/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md +++ b/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md @@ -144,6 +144,45 @@ whole-blob. term is introduced for "patch"; it is an implementation property of how a changed file is transmitted. +## Stage 3 implementation notes (tree inheritance at finalize) + +Decisions surfaced while building the api tree-inheritance step, recorded so the +next implementer does not re-derive them: + +- **The merge runs at finalize, not at session create.** The session-create + alternative was considered (the base is already known there) and rejected: it + contradicts this ADR's "publish-time merge" framing and would write inherited + rows into `upload_session_files` that the client would then be asked to PUT. + Keeping the merge at finalize is also strictly less code (the wire builder, + observe loop, and upload worker are untouched). The session carries the intent + via `upload_sessions.base_revision_id` and `upload_sessions.deleted_paths`. +- **Inheritance requires `base.status = 'published'` and blob-backed paths only.** + A draft base is uncommitted (and unreachable as a base anyway, since a session + on the same Artifact hits `draft_revision_conflict` first); a retained base's + blobs fall out of the GC refcount. A `storage_kind = 'revision'` base path is + not refcount-protected, so it is rejected (`inherited_path_not_blob_backed`) + rather than copied forward into a dangling reference. +- **The composite `revisions_parent_fk` is the DB backstop** for a cross-artifact + or cross-workspace parent. The app validates the base belongs to the same + Artifact (`base_revision_artifact_mismatch`) and Workspace + (`base_revision_not_found`) and fails fast before the foreign key would 500. +- **A patched file's diff bytes upload as a revision-scoped object with `sha256` + omitted** from the signed payload (the signed-blob-key assertion in the upload + worker only fires when `sha256` is signed). The patch descriptor + (`patch_base_sha256`, `patch_result_sha256`) is recorded on + `upload_session_files` so a later `jobs` step can reconstruct the result blob. + Stage 3 validates `base_sha256` against the base file but does not apply the + diff. +- **Stage 3 refuses to finalize a patched file** (`patch_reconstruction_unavailable`). + Without Stage 4's reconstruction, finalizing would commit the raw diff bytes as + the served file. The descriptor is still recorded and validated at create so the + wire path is exercised, but the publish flow fails loud until reconstruction + exists. This guards a hand-rolled API/MCP caller even though no first-party + client emits patches until Stage 5. A file may not declare both a whole-file + `sha256` and a `patch` (rejected at request validation). + +This confirms, and does not reverse, the Stage 2 "applied at finalize" wording. + ## What this ADR is not - Not a chunk store, not per-block encryption, not Range serving, not global diff --git a/docs/ops/git-like-revisions-todo.md b/docs/ops/git-like-revisions-todo.md index b1561eae..3bf158c4 100644 --- a/docs/ops/git-like-revisions-todo.md +++ b/docs/ops/git-like-revisions-todo.md @@ -122,7 +122,7 @@ result_sha256 }` plus the diff bytes uploaded like any file body. Absence = - Done: contract + OpenAPI regenerated; round-trip tests for partial manifest and patch descriptor. -### Stage 3 - api: tree inheritance at finalize/publish +### Stage 3 - api: tree inheritance at finalize/publish - DONE - When finalizing against `base_revision_id`: copy forward the base's `artifact_files` rows for inherited paths (already point at shared blobs), @@ -132,6 +132,14 @@ result_sha256 }` plus the diff bytes uploaded like any file body. Absence = - Done: a revision published with one changed file has a full `artifact_files` tree but only one new blob; `parent_revision_id` set; diffing two revisions' `artifact_files` yields the changeset. +- Landed: merge runs at finalize (`mergeBaseRevisionTree` in + `packages/db/src/repository/upload-session-lifecycle.ts`); session carries + `base_revision_id` + `deleted_paths`; patched files record a descriptor + (`patch_base_sha256` / `patch_result_sha256`) on `upload_session_files` with + `sha256` omitted from the signed PUT. Stateful validation (published base, + same workspace/artifact, blob-backed-only inheritance, deleted-path-in-base, + patch base match) with six new repo error codes mapped to `invalid_request`. + See the ADR 0087 Stage 3 implementation notes for the decisions. ### Stage 4 - jobs: Option 1 reconstruct-on-write diff --git a/docs/specs/api.md b/docs/specs/api.md index 598c7b78..17ebaf1e 100644 --- a/docs/specs/api.md +++ b/docs/specs/api.md @@ -156,10 +156,19 @@ result_sha256 }`) means the bytes uploaded for that entry are a unified diff server produces and verifies. Structural rules enforced at request validation: `patch` and `deleted_paths` require `base_revision_id`; `deleted_paths` is unique; a path cannot be both uploaded and deleted; `format` must be - `unified`. Stateful checks (the base belongs to the same Workspace/Artifact, a - deleted path exists in the base, a patch `base_sha256` matches the base file) - and the tree-inheritance merge and diff reconstruction are applied - server-side at finalize/publish. + `unified`. Stateful checks and the tree-inheritance merge run server-side at + finalize. The base must be a `published` Revision in the same Workspace and + Artifact (a cross-workspace base is reported as not found; a cross-artifact base + is rejected before it could violate the parent foreign key). Only blob-backed + base paths inherit; a legacy revision-scoped path must be re-uploaded. A deleted + path must exist in the base, and a patch `base_sha256` must match the base file. + At finalize the merged tree (inherited base rows + uploaded changes − deletions) + sets `revisions.parent_revision_id = base_revision_id`, and `file_count` / + `size_bytes` are recomputed from the merged tree, not the uploaded manifest. + Diff reconstruction into a whole blob is deferred to a later `jobs` step, so a + session that carries a `patch` is currently rejected at finalize rather than + serving the diff bytes as the file. A file may not declare both a whole-file + `sha256` and a `patch`. ### `CreateUploadSessionResponse` diff --git a/docs/specs/data-model.md b/docs/specs/data-model.md index a04d841c..2cefd482 100644 --- a/docs/specs/data-model.md +++ b/docs/specs/data-model.md @@ -135,6 +135,13 @@ For `storage_kind = 'revision'`, `r2_key` points at the legacy `storage_kind = 'blob'`, `r2_key` points at a workspace shared blob object under `workspaces/{workspaceId}/blobs/sha256/{prefix}/{sha256}`. +Under tree inheritance ([0087](../adr/0087-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)), +a Revision published against a base copies forward the base's unchanged +`artifact_files` rows by reference (same `sha256` / `r2_key` / `storage_kind = 'blob'`), +so a one-file change yields a full file tree but only one new blob. Only +blob-backed base paths can inherit: a legacy `storage_kind = 'revision'` path is +not refcount-protected and must be re-uploaded rather than inherited. + ### `content_blobs` | Column | Type | Notes | @@ -178,39 +185,43 @@ exposing scanner internals. ### `upload_sessions` -| Column | Type | Notes | -| --------------------- | ----------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `id` | `TEXT PRIMARY KEY` | `upl_...`. | -| `workspace_id` | `UUID NOT NULL REFERENCES workspaces(id)` | | -| `artifact_id` | `TEXT NOT NULL` | Reserved before active artifact creation. | -| `revision_id` | `TEXT NOT NULL` | Reserved before active artifact creation. | -| `status` | `TEXT NOT NULL` | `pending`, `finalized`, `expired`, or `failed`. | -| `title` | `TEXT NOT NULL` | Plain text. | -| `entrypoint` | `TEXT NOT NULL` | Normalized file path. | -| `render_mode` | `TEXT NULL` | Explicit client override (`html`, `markdown`, `text`, `image`, `audio`, `video`). Null means infer from the entrypoint extension at finalize. Copied to `revisions.render_mode`. | -| `artifact_expires_at` | `TIMESTAMPTZ NOT NULL` | Copied to `artifacts.expires_at` on finalize. | -| `file_count` | `INTEGER NOT NULL` | Expected files. | -| `size_bytes` | `BIGINT NOT NULL` | Expected total bytes. | -| `created_by_type` | `TEXT NOT NULL` | `api_key` or `member`. | -| `created_by_id` | `TEXT NOT NULL` | Creator id for the stored type. | -| `expires_at` | `TIMESTAMPTZ NOT NULL` | Upload session TTL, typically 24 hours. | -| `created_at` | `TIMESTAMPTZ NOT NULL` | | -| `finalized_at` | `TIMESTAMPTZ NULL` | | +| Column | Type | Notes | +| --------------------- | ----------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `id` | `TEXT PRIMARY KEY` | `upl_...`. | +| `workspace_id` | `UUID NOT NULL REFERENCES workspaces(id)` | | +| `artifact_id` | `TEXT NOT NULL` | Reserved before active artifact creation. | +| `revision_id` | `TEXT NOT NULL` | Reserved before active artifact creation. | +| `status` | `TEXT NOT NULL` | `pending`, `finalized`, `expired`, or `failed`. | +| `title` | `TEXT NOT NULL` | Plain text. | +| `entrypoint` | `TEXT NOT NULL` | Normalized file path. | +| `render_mode` | `TEXT NULL` | Explicit client override (`html`, `markdown`, `text`, `image`, `audio`, `video`). Null means infer from the entrypoint extension at finalize. Copied to `revisions.render_mode`. | +| `artifact_expires_at` | `TIMESTAMPTZ NOT NULL` | Copied to `artifacts.expires_at` on finalize. | +| `file_count` | `INTEGER NOT NULL` | Expected files. | +| `size_bytes` | `BIGINT NOT NULL` | Expected total bytes. | +| `created_by_type` | `TEXT NOT NULL` | `api_key` or `member`. | +| `created_by_id` | `TEXT NOT NULL` | Creator id for the stored type. | +| `expires_at` | `TIMESTAMPTZ NOT NULL` | Upload session TTL, typically 24 hours. | +| `created_at` | `TIMESTAMPTZ NOT NULL` | | +| `finalized_at` | `TIMESTAMPTZ NULL` | | +| `base_revision_id` | `TEXT NULL` | Base Revision this publish inherits from ([0087](../adr/0087-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)). `NULL` is a full manifest. Copied to `revisions.parent_revision_id` when the tree merge runs at finalize. | +| `deleted_paths` | `JSONB NOT NULL DEFAULT '[]'` | Base paths this publish drops. Lets finalize tell a deleted path apart from an inherited one (both are base paths absent from the file manifest). | ### `upload_session_files` -| Column | Type | Notes | -| --------------------- | ---------------------------------------------- | -------------------------------------------------------------------------------------------------- | -| `workspace_id` | `UUID NOT NULL REFERENCES workspaces(id)` | | -| `upload_session_id` | `TEXT NOT NULL REFERENCES upload_sessions(id)` | | -| `path` | `TEXT NOT NULL` | Normalized POSIX path. | -| `size_bytes` | `BIGINT NOT NULL` | Expected size. | -| `served_content_type` | `TEXT NOT NULL` | Derived before issuing upload URL. | -| `r2_key` | `TEXT NOT NULL` | Final artifact object key. | -| `sha256` | `TEXT NULL` | Lowercase hex digest when supplied by client. | -| `storage_kind` | `TEXT NOT NULL DEFAULT 'revision'` | `revision` or `blob`. | -| `uploaded_at` | `TIMESTAMPTZ NULL` | Set after successful PUT or existing blob reuse. | -| `put_url_expires_at` | `TIMESTAMPTZ NOT NULL` | Session-level upper bound for PUT writes. Set to `upload_sessions.expires_at` at session creation. | +| Column | Type | Notes | +| --------------------- | ---------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `workspace_id` | `UUID NOT NULL REFERENCES workspaces(id)` | | +| `upload_session_id` | `TEXT NOT NULL REFERENCES upload_sessions(id)` | | +| `path` | `TEXT NOT NULL` | Normalized POSIX path. | +| `size_bytes` | `BIGINT NOT NULL` | Expected size. | +| `served_content_type` | `TEXT NOT NULL` | Derived before issuing upload URL. | +| `r2_key` | `TEXT NOT NULL` | Final artifact object key. | +| `sha256` | `TEXT NULL` | Lowercase hex digest when supplied by client. | +| `storage_kind` | `TEXT NOT NULL DEFAULT 'revision'` | `revision` or `blob`. | +| `uploaded_at` | `TIMESTAMPTZ NULL` | Set after successful PUT or existing blob reuse. | +| `put_url_expires_at` | `TIMESTAMPTZ NOT NULL` | Session-level upper bound for PUT writes. Set to `upload_sessions.expires_at` at session creation. | +| `patch_base_sha256` | `TEXT NULL` | Intra-file delta ([0087](../adr/0087-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)): digest of the base Revision's file the uploaded unified diff applies to. | +| `patch_result_sha256` | `TEXT NULL` | Digest of the whole reconstructed file `jobs` produces from the diff (Stage 4). Both patch columns are `NULL` (whole-file upload) or both set (a `CHECK` enforces it). | Primary key `(upload_session_id, path)`. diff --git a/packages/contracts/src/routes/registry.storage.ts b/packages/contracts/src/routes/registry.storage.ts index ad7dc681..48f1340b 100644 --- a/packages/contracts/src/routes/registry.storage.ts +++ b/packages/contracts/src/routes/registry.storage.ts @@ -23,6 +23,7 @@ export const storageRouteContracts = [ ...apiKeyMutationErrors, "artifact_not_found", "draft_revision_conflict", + "entrypoint_not_in_revision", "file_count_cap_exceeded", "file_size_cap_exceeded", "revision_size_cap_exceeded", @@ -64,6 +65,10 @@ export const storageRouteContracts = [ "artifact_not_found", "draft_revision_conflict", "entrypoint_not_in_revision", + "file_count_cap_exceeded", + "file_size_cap_exceeded", + "revision_size_cap_exceeded", + "invalid_request", "unexpected_upload_object", "upload_incomplete", "upload_session_expired", diff --git a/packages/contracts/src/uploadSessions.test.ts b/packages/contracts/src/uploadSessions.test.ts index 9b3c8fe0..2fef2e3d 100644 --- a/packages/contracts/src/uploadSessions.test.ts +++ b/packages/contracts/src/uploadSessions.test.ts @@ -34,7 +34,6 @@ describe("CreateUploadSessionRequest partial-manifest + patch", () => { { path: "big.txt", size_bytes: 30, - sha256: sha("c"), patch: { base_sha256: sha("d"), format: "unified", result_sha256: sha("e") }, }, ], @@ -50,6 +49,22 @@ describe("CreateUploadSessionRequest partial-manifest + patch", () => { it("rejects a patch with no base_revision_id", () => { const result = CreateUploadSessionRequest.safeParse( baseRequest({ + files: [ + { + path: "big.txt", + size_bytes: 30, + patch: { base_sha256: sha("d"), format: "unified", result_sha256: sha("e") }, + }, + ], + }), + ); + expect(result.success).toBe(false); + }); + + it("rejects a file declaring both a whole-file sha256 and a patch", () => { + const result = CreateUploadSessionRequest.safeParse( + baseRequest({ + base_revision_id: baseRevisionId, files: [ { path: "big.txt", diff --git a/packages/contracts/src/uploadSessions.ts b/packages/contracts/src/uploadSessions.ts index 979c30df..bdcd3430 100644 --- a/packages/contracts/src/uploadSessions.ts +++ b/packages/contracts/src/uploadSessions.ts @@ -44,9 +44,10 @@ export type UploadSessionFileInput = z.infer; // base_revision_id turns this into a partial-manifest publish (ADR 0087): files // lists only changed/added paths, deleted_paths drops paths, and every other path // inherits from the base Revision by reference. deleted_paths and per-file patches -// are only meaningful against a base; structural checks live here, while stateful -// checks (base belongs to the workspace/artifact, deleted path exists in the base, -// patch base_sha256 matches the base file) are enforced server-side at finalize. +// are only meaningful against a base. Structural checks live here; the stateful +// checks and the tree-inheritance merge (base is a published Revision in the same +// workspace/artifact, only blob-backed paths inherit, deleted path exists in the +// base, patch base_sha256 matches the base file) run server-side at finalize. export const CreateUploadSessionRequest = z .object({ artifact_id: ArtifactId.optional(), @@ -91,6 +92,16 @@ export const CreateUploadSessionRequest = z message: "a path cannot be both uploaded and deleted", }); } + // A patch carries the diff's own digest; a whole-file sha256 on the same + // entry is contradictory (the bytes are a diff, not the content-addressed + // file), so reject it rather than silently dropping one. + if (file.patch !== undefined && file.sha256 !== undefined) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + path: ["files", index, "sha256"], + message: "a patched file cannot also declare a whole-file sha256", + }); + } }); }); export type CreateUploadSessionRequest = z.infer; diff --git a/packages/db/migrations/0025_upload_session_base_revision_and_patch.sql b/packages/db/migrations/0025_upload_session_base_revision_and_patch.sql new file mode 100644 index 00000000..9f8579a8 --- /dev/null +++ b/packages/db/migrations/0025_upload_session_base_revision_and_patch.sql @@ -0,0 +1,36 @@ +-- ADR 0087 Stage 3: partial-manifest publish carriers on the upload session. +-- +-- upload_sessions.base_revision_id records the Revision this publish inherits +-- from; the tree merge runs at finalize and copies it to +-- revisions.parent_revision_id. Null = full manifest (today's behavior). +-- upload_sessions.deleted_paths records base paths this publish drops, so finalize +-- can tell a deleted path apart from an inherited one. +-- +-- upload_session_files.patch_base_sha256 / patch_result_sha256 record an +-- intra-file delta: the uploaded bytes are a unified diff against the base file, +-- and jobs reconstructs the whole result blob in Stage 4. Both null (whole-file +-- upload) or both set, each a sha256 hex digest. Stage 3 only records + validates +-- them; it never applies the diff. +-- +-- Migrations are applied in filename order with no journal, so every statement is +-- idempotent (re-run safe). + +begin; + +alter table upload_sessions + add column if not exists base_revision_id text, + add column if not exists deleted_paths jsonb not null default '[]'::jsonb; + +alter table upload_session_files + add column if not exists patch_base_sha256 text, + add column if not exists patch_result_sha256 text; + +alter table upload_session_files + drop constraint if exists upload_session_files_patch_check, + add constraint upload_session_files_patch_check + check ( + (patch_base_sha256 is null and patch_result_sha256 is null) + or (patch_base_sha256 ~ '^[a-f0-9]{64}$' and patch_result_sha256 ~ '^[a-f0-9]{64}$') + ); + +commit; diff --git a/packages/db/snapshot/schema.sql b/packages/db/snapshot/schema.sql index e6a5185a..e592f8eb 100644 --- a/packages/db/snapshot/schema.sql +++ b/packages/db/snapshot/schema.sql @@ -197,9 +197,12 @@ CREATE TABLE "upload_session_files" ( "storage_kind" text DEFAULT 'revision' NOT NULL, "uploaded_at" timestamp with time zone, "put_url_expires_at" timestamp with time zone NOT NULL, + "patch_base_sha256" text, + "patch_result_sha256" text, CONSTRAINT "upload_session_files_upload_session_id_path_pk" PRIMARY KEY("upload_session_id","path"), CONSTRAINT "upload_session_files_storage_kind_check" CHECK ("upload_session_files"."storage_kind" in ('revision', 'blob')), - CONSTRAINT "upload_session_files_sha256_check" CHECK ("upload_session_files"."sha256" is null or "upload_session_files"."sha256" ~ '^[a-f0-9]{64}$') + CONSTRAINT "upload_session_files_sha256_check" CHECK ("upload_session_files"."sha256" is null or "upload_session_files"."sha256" ~ '^[a-f0-9]{64}$'), + CONSTRAINT "upload_session_files_patch_check" CHECK (("upload_session_files"."patch_base_sha256" is null and "upload_session_files"."patch_result_sha256" is null) or ("upload_session_files"."patch_base_sha256" ~ '^[a-f0-9]{64}$' and "upload_session_files"."patch_result_sha256" ~ '^[a-f0-9]{64}$')) ); CREATE TABLE "upload_sessions" ( @@ -219,6 +222,8 @@ CREATE TABLE "upload_sessions" ( "expires_at" timestamp with time zone NOT NULL, "created_at" timestamp with time zone NOT NULL, "finalized_at" timestamp with time zone, + "base_revision_id" text, + "deleted_paths" jsonb DEFAULT '[]'::jsonb NOT NULL, CONSTRAINT "upload_sessions_created_by_type_check" CHECK ("upload_sessions"."created_by_type" in ('api_key', 'member')), CONSTRAINT "upload_sessions_render_mode_check" CHECK ("upload_sessions"."render_mode" is null or "upload_sessions"."render_mode" in ('html', 'markdown', 'text', 'image', 'audio', 'video')) ); diff --git a/packages/db/src/index.test.ts b/packages/db/src/index.test.ts index 7b1adc00..24285e3e 100644 --- a/packages/db/src/index.test.ts +++ b/packages/db/src/index.test.ts @@ -2282,6 +2282,653 @@ describe("createPostgresHttpExecutor", () => { }); }); +const sha = (char: string) => char.repeat(64); + +// Publish a base Revision whose files are blob-backed (sha256 set + uploaded), so +// they are eligible to inherit forward under ADR 0087 tree inheritance. +async function publishBlobBackedBase( + repo: LocalRepository, + actor: ApiActor, + tag: string, + files: Array<{ path: string; size_bytes: number; sha256: string }>, + now: string, + entrypoint = "index.html", +) { + const session = await repo.createUploadSession({ + actor, + idempotencyKey: `idem-base-create-${tag}`, + request: { title: tag, entrypoint, files }, + now, + }); + for (const file of files) { + const descriptor = session.files.find((candidate) => candidate.path === file.path); + if (!descriptor) { + throw new Error(`expected session descriptor for ${file.path}`); + } + await repo.recordUploadedFile({ + workspaceId: actor.workspace_id, + sessionId: session.upload_session_id, + path: file.path, + objectKey: descriptor.object_key, + sizeBytes: file.size_bytes, + sha256: file.sha256, + uploadedAt: now, + }); + } + const finalized = await repo.finalizeUploadSession({ + actor, + idempotencyKey: `idem-base-finalize-${tag}`, + sessionId: session.upload_session_id, + observedFiles: files.map((file) => { + const descriptor = session.files.find((candidate) => candidate.path === file.path); + return { path: file.path, objectKey: descriptor?.object_key ?? "", sizeBytes: file.size_bytes }; + }), + now, + }); + const published = await repo.publishRevision({ + actor, + idempotencyKey: `idem-base-publish-${tag}`, + artifactId: finalized.artifact_id, + revisionId: finalized.revision_id, + now, + }); + return { artifactId: published.artifact_id, revisionId: published.revision_id }; +} + +describe("ADR 0087 tree inheritance", () => { + it("inherits unchanged blob-backed files from the base and adds one new blob", async () => { + const { repo, actor } = await localRepoWithApiActor(); + const base = await publishBlobBackedBase( + repo, + actor, + "inherit", + [ + { path: "index.html", size_bytes: 12, sha256: sha("a") }, + { path: "b.css", size_bytes: 20, sha256: sha("b") }, + { path: "big.txt", size_bytes: 5000, sha256: sha("c") }, + ], + "2026-01-01T00:00:00.000Z", + ); + const blobsBefore = repo.contentBlobs.size; + + const session = await repo.createUploadSession({ + actor, + idempotencyKey: "idem-inherit-create", + request: { + artifact_id: base.artifactId, + base_revision_id: base.revisionId, + title: "inherit", + entrypoint: "index.html", + files: [{ path: "index.html", size_bytes: 14, sha256: sha("d") }], + }, + now: "2026-01-02T00:00:00.000Z", + }); + const changed = session.files.find((file) => file.path === "index.html"); + await repo.recordUploadedFile({ + workspaceId: actor.workspace_id, + sessionId: session.upload_session_id, + path: "index.html", + objectKey: changed?.object_key ?? "", + sizeBytes: 14, + sha256: sha("d"), + uploadedAt: "2026-01-02T00:00:01.000Z", + }); + const finalized = await repo.finalizeUploadSession({ + actor, + idempotencyKey: "idem-inherit-finalize", + sessionId: session.upload_session_id, + observedFiles: [{ path: "index.html", objectKey: changed?.object_key ?? "", sizeBytes: 14 }], + now: "2026-01-02T00:00:02.000Z", + }); + + const files = [...repo.artifactFiles.values()].filter((file) => file.revision_id === finalized.revision_id); + expect(files.map((file) => file.path).sort()).toEqual(["b.css", "big.txt", "index.html"]); + const inheritedCss = files.find((file) => file.path === "b.css"); + expect(inheritedCss?.sha256).toBe(sha("b")); + expect(inheritedCss?.storage_kind).toBe("blob"); + expect(files.find((file) => file.path === "index.html")?.sha256).toBe(sha("d")); + // Only the changed file introduced a new blob; inherited rows reuse base blobs. + expect(repo.contentBlobs.size).toBe(blobsBefore + 1); + }); + + it("recomputes file_count/size_bytes from the merged tree, not the changed manifest", async () => { + const { repo, actor } = await localRepoWithApiActor(); + const base = await publishBlobBackedBase( + repo, + actor, + "counts", + [ + { path: "index.html", size_bytes: 100, sha256: sha("a") }, + { path: "b.css", size_bytes: 200, sha256: sha("b") }, + { path: "c.js", size_bytes: 300, sha256: sha("c") }, + ], + "2026-01-01T00:00:00.000Z", + ); + const session = await repo.createUploadSession({ + actor, + idempotencyKey: "idem-counts-create", + request: { + artifact_id: base.artifactId, + base_revision_id: base.revisionId, + title: "counts", + entrypoint: "index.html", + files: [{ path: "index.html", size_bytes: 50, sha256: sha("d") }], + }, + now: "2026-01-02T00:00:00.000Z", + }); + const changed = session.files.find((file) => file.path === "index.html"); + await repo.recordUploadedFile({ + workspaceId: actor.workspace_id, + sessionId: session.upload_session_id, + path: "index.html", + objectKey: changed?.object_key ?? "", + sizeBytes: 50, + sha256: sha("d"), + uploadedAt: "2026-01-02T00:00:01.000Z", + }); + const finalized = await repo.finalizeUploadSession({ + actor, + idempotencyKey: "idem-counts-finalize", + sessionId: session.upload_session_id, + observedFiles: [{ path: "index.html", objectKey: changed?.object_key ?? "", sizeBytes: 50 }], + now: "2026-01-02T00:00:02.000Z", + }); + expect(finalized.file_count).toBe(3); + expect(finalized.size_bytes).toBe(50 + 200 + 300); + expect(repo.revisions.get(finalized.revision_id)?.parent_revision_id).toBe(base.revisionId); + // The session row still describes only the changed manifest. + expect(repo.uploadSessions.get(session.upload_session_id)?.file_count).toBe(1); + }); + + it("drops a deleted base path from the merged tree", async () => { + const { repo, actor } = await localRepoWithApiActor(); + const base = await publishBlobBackedBase( + repo, + actor, + "delete", + [ + { path: "index.html", size_bytes: 12, sha256: sha("a") }, + { path: "b.css", size_bytes: 20, sha256: sha("b") }, + { path: "c.js", size_bytes: 30, sha256: sha("c") }, + ], + "2026-01-01T00:00:00.000Z", + ); + const session = await repo.createUploadSession({ + actor, + idempotencyKey: "idem-delete-create", + request: { + artifact_id: base.artifactId, + base_revision_id: base.revisionId, + title: "delete", + entrypoint: "index.html", + deleted_paths: ["c.js"], + files: [{ path: "index.html", size_bytes: 14, sha256: sha("d") }], + }, + now: "2026-01-02T00:00:00.000Z", + }); + const changed = session.files.find((file) => file.path === "index.html"); + await repo.recordUploadedFile({ + workspaceId: actor.workspace_id, + sessionId: session.upload_session_id, + path: "index.html", + objectKey: changed?.object_key ?? "", + sizeBytes: 14, + sha256: sha("d"), + uploadedAt: "2026-01-02T00:00:01.000Z", + }); + const finalized = await repo.finalizeUploadSession({ + actor, + idempotencyKey: "idem-delete-finalize", + sessionId: session.upload_session_id, + observedFiles: [{ path: "index.html", objectKey: changed?.object_key ?? "", sizeBytes: 14 }], + now: "2026-01-02T00:00:02.000Z", + }); + const files = [...repo.artifactFiles.values()].filter((file) => file.revision_id === finalized.revision_id); + expect(files.map((file) => file.path).sort()).toEqual(["b.css", "index.html"]); + expect(finalized.file_count).toBe(2); + }); + + it("inherits the entrypoint when it is unchanged", async () => { + const { repo, actor } = await localRepoWithApiActor(); + const base = await publishBlobBackedBase( + repo, + actor, + "entry", + [ + { path: "index.html", size_bytes: 12, sha256: sha("a") }, + { path: "b.css", size_bytes: 20, sha256: sha("b") }, + ], + "2026-01-01T00:00:00.000Z", + ); + const session = await repo.createUploadSession({ + actor, + idempotencyKey: "idem-entry-create", + request: { + artifact_id: base.artifactId, + base_revision_id: base.revisionId, + title: "entry", + entrypoint: "index.html", + files: [{ path: "b.css", size_bytes: 22, sha256: sha("d") }], + }, + now: "2026-01-02T00:00:00.000Z", + }); + const changed = session.files.find((file) => file.path === "b.css"); + await repo.recordUploadedFile({ + workspaceId: actor.workspace_id, + sessionId: session.upload_session_id, + path: "b.css", + objectKey: changed?.object_key ?? "", + sizeBytes: 22, + sha256: sha("d"), + uploadedAt: "2026-01-02T00:00:01.000Z", + }); + const finalized = await repo.finalizeUploadSession({ + actor, + idempotencyKey: "idem-entry-finalize", + sessionId: session.upload_session_id, + observedFiles: [{ path: "b.css", objectKey: changed?.object_key ?? "", sizeBytes: 22 }], + now: "2026-01-02T00:00:02.000Z", + }); + expect(finalized.entrypoint).toBe("index.html"); + expect(finalized.file_count).toBe(2); + }); + + it("rejects deleting the entrypoint without re-adding it", async () => { + const { repo, actor } = await localRepoWithApiActor(); + const base = await publishBlobBackedBase( + repo, + actor, + "entry-del", + [ + { path: "index.html", size_bytes: 12, sha256: sha("a") }, + { path: "b.css", size_bytes: 20, sha256: sha("b") }, + ], + "2026-01-01T00:00:00.000Z", + ); + const session = await repo.createUploadSession({ + actor, + idempotencyKey: "idem-entry-del-create", + request: { + artifact_id: base.artifactId, + base_revision_id: base.revisionId, + title: "entry-del", + entrypoint: "index.html", + deleted_paths: ["index.html"], + files: [{ path: "b.css", size_bytes: 22, sha256: sha("d") }], + }, + now: "2026-01-02T00:00:00.000Z", + }); + const changed = session.files.find((file) => file.path === "b.css"); + await repo.recordUploadedFile({ + workspaceId: actor.workspace_id, + sessionId: session.upload_session_id, + path: "b.css", + objectKey: changed?.object_key ?? "", + sizeBytes: 22, + sha256: sha("d"), + uploadedAt: "2026-01-02T00:00:01.000Z", + }); + await expect( + repo.finalizeUploadSession({ + actor, + idempotencyKey: "idem-entry-del-finalize", + sessionId: session.upload_session_id, + observedFiles: [{ path: "b.css", objectKey: changed?.object_key ?? "", sizeBytes: 22 }], + now: "2026-01-02T00:00:02.000Z", + }), + ).rejects.toThrow("entrypoint_not_in_revision"); + }); + + it("rejects deleting a path absent from the base", async () => { + const { repo, actor } = await localRepoWithApiActor(); + const base = await publishBlobBackedBase( + repo, + actor, + "del-missing", + [{ path: "index.html", size_bytes: 12, sha256: sha("a") }], + "2026-01-01T00:00:00.000Z", + ); + const session = await repo.createUploadSession({ + actor, + idempotencyKey: "idem-del-missing-create", + request: { + artifact_id: base.artifactId, + base_revision_id: base.revisionId, + title: "del-missing", + entrypoint: "index.html", + deleted_paths: ["nope.txt"], + files: [{ path: "index.html", size_bytes: 14, sha256: sha("d") }], + }, + now: "2026-01-02T00:00:00.000Z", + }); + const changed = session.files.find((file) => file.path === "index.html"); + await repo.recordUploadedFile({ + workspaceId: actor.workspace_id, + sessionId: session.upload_session_id, + path: "index.html", + objectKey: changed?.object_key ?? "", + sizeBytes: 14, + sha256: sha("d"), + uploadedAt: "2026-01-02T00:00:01.000Z", + }); + await expect( + repo.finalizeUploadSession({ + actor, + idempotencyKey: "idem-del-missing-finalize", + sessionId: session.upload_session_id, + observedFiles: [{ path: "index.html", objectKey: changed?.object_key ?? "", sizeBytes: 14 }], + now: "2026-01-02T00:00:02.000Z", + }), + ).rejects.toThrow("deleted_path_not_in_base"); + }); + + it("rejects a base in another artifact before the FK would 500", async () => { + const { repo, actor } = await localRepoWithApiActor(); + const baseA = await publishBlobBackedBase( + repo, + actor, + "art-a", + [{ path: "index.html", size_bytes: 12, sha256: sha("a") }], + "2026-01-01T00:00:00.000Z", + ); + const baseB = await publishBlobBackedBase( + repo, + actor, + "art-b", + [{ path: "index.html", size_bytes: 12, sha256: sha("b") }], + "2026-01-01T01:00:00.000Z", + ); + // Session targets artifact A but names artifact B's revision as the base. + const session = await repo.createUploadSession({ + actor, + idempotencyKey: "idem-cross-art-create", + request: { + artifact_id: baseA.artifactId, + base_revision_id: baseB.revisionId, + title: "cross-art", + entrypoint: "index.html", + files: [{ path: "index.html", size_bytes: 14, sha256: sha("d") }], + }, + now: "2026-01-02T00:00:00.000Z", + }); + const changed = session.files.find((file) => file.path === "index.html"); + await repo.recordUploadedFile({ + workspaceId: actor.workspace_id, + sessionId: session.upload_session_id, + path: "index.html", + objectKey: changed?.object_key ?? "", + sizeBytes: 14, + sha256: sha("d"), + uploadedAt: "2026-01-02T00:00:01.000Z", + }); + await expect( + repo.finalizeUploadSession({ + actor, + idempotencyKey: "idem-cross-art-finalize", + sessionId: session.upload_session_id, + observedFiles: [{ path: "index.html", objectKey: changed?.object_key ?? "", sizeBytes: 14 }], + now: "2026-01-02T00:00:02.000Z", + }), + ).rejects.toThrow("base_revision_artifact_mismatch"); + }); + + it("rejects a base revision from another workspace as not found", async () => { + const { repo, actor } = await localRepoWithApiActor(); + const base = await publishBlobBackedBase( + repo, + actor, + "cross-ws", + [{ path: "index.html", size_bytes: 12, sha256: sha("a") }], + "2026-01-01T00:00:00.000Z", + ); + const otherWorkspace = await repo.createWorkspace({ + actor: adminActor, + idempotencyKey: "idem-ws-other", + email: "other@example.com", + }); + const otherKey = await repo.createApiKey({ + actor: adminActor, + idempotencyKey: "idem-key-other", + workspaceId: otherWorkspace.id, + name: "other", + }); + const otherActor = await repo.verifyApiKey(otherKey.secret); + if (!otherActor) { + throw new Error("expected other actor"); + } + const session = await repo.createUploadSession({ + actor: otherActor, + idempotencyKey: "idem-cross-ws-create", + request: { + base_revision_id: base.revisionId, + title: "cross-ws", + entrypoint: "index.html", + files: [{ path: "index.html", size_bytes: 14, sha256: sha("d") }], + }, + now: "2026-01-02T00:00:00.000Z", + }); + const changed = session.files.find((file) => file.path === "index.html"); + await repo.recordUploadedFile({ + workspaceId: otherActor.workspace_id, + sessionId: session.upload_session_id, + path: "index.html", + objectKey: changed?.object_key ?? "", + sizeBytes: 14, + sha256: sha("d"), + uploadedAt: "2026-01-02T00:00:01.000Z", + }); + await expect( + repo.finalizeUploadSession({ + actor: otherActor, + idempotencyKey: "idem-cross-ws-finalize", + sessionId: session.upload_session_id, + observedFiles: [{ path: "index.html", objectKey: changed?.object_key ?? "", sizeBytes: 14 }], + now: "2026-01-02T00:00:02.000Z", + }), + ).rejects.toThrow("base_revision_not_found"); + }); + + it("rejects a base that is not published (retained)", async () => { + const { repo, actor } = await localRepoWithApiActor(); + const base = await publishBlobBackedBase( + repo, + actor, + "retained", + [{ path: "index.html", size_bytes: 12, sha256: sha("a") }], + "2026-01-01T00:00:00.000Z", + ); + // A retained base's blobs fall out of the GC refcount, so it cannot be inherited. + const retained = repo.revisions.get(base.revisionId); + if (!retained) { + throw new Error("expected base revision"); + } + retained.status = "retained"; + const session = await repo.createUploadSession({ + actor, + idempotencyKey: "idem-retained-create", + request: { + artifact_id: base.artifactId, + base_revision_id: base.revisionId, + title: "on-retained", + entrypoint: "index.html", + files: [{ path: "index.html", size_bytes: 14, sha256: sha("d") }], + }, + now: "2026-01-02T00:00:00.000Z", + }); + const changed = session.files.find((file) => file.path === "index.html"); + await repo.recordUploadedFile({ + workspaceId: actor.workspace_id, + sessionId: session.upload_session_id, + path: "index.html", + objectKey: changed?.object_key ?? "", + sizeBytes: 14, + sha256: sha("d"), + uploadedAt: "2026-01-02T00:00:01.000Z", + }); + await expect( + repo.finalizeUploadSession({ + actor, + idempotencyKey: "idem-retained-finalize", + sessionId: session.upload_session_id, + observedFiles: [{ path: "index.html", objectKey: changed?.object_key ?? "", sizeBytes: 14 }], + now: "2026-01-02T00:00:02.000Z", + }), + ).rejects.toThrow("base_revision_not_publishable"); + }); + + it("rejects inheriting a non-blob-backed base path", async () => { + const { repo, actor } = await localRepoWithApiActor(); + // Base file uploaded WITHOUT sha256 -> revision-scoped, not refcount-protected. + const base = await publishLocalArtifact(repo, actor, "legacy", "2026-01-01T00:00:00.000Z"); + const session = await repo.createUploadSession({ + actor, + idempotencyKey: "idem-legacy-create", + request: { + artifact_id: base.artifact_id, + base_revision_id: base.revision_id, + title: "legacy", + entrypoint: "index.html", + files: [{ path: "extra.css", size_bytes: 10, sha256: sha("d") }], + }, + now: "2026-01-02T00:00:00.000Z", + }); + const changed = session.files.find((file) => file.path === "extra.css"); + await repo.recordUploadedFile({ + workspaceId: actor.workspace_id, + sessionId: session.upload_session_id, + path: "extra.css", + objectKey: changed?.object_key ?? "", + sizeBytes: 10, + sha256: sha("d"), + uploadedAt: "2026-01-02T00:00:01.000Z", + }); + await expect( + repo.finalizeUploadSession({ + actor, + idempotencyKey: "idem-legacy-finalize", + sessionId: session.upload_session_id, + observedFiles: [{ path: "extra.css", objectKey: changed?.object_key ?? "", sizeBytes: 10 }], + now: "2026-01-02T00:00:02.000Z", + }), + ).rejects.toThrow("inherited_path_not_blob_backed"); + }); + + it("records a patch descriptor without applying it (Stage 3)", async () => { + const { repo, actor } = await localRepoWithApiActor(); + const base = await publishBlobBackedBase( + repo, + actor, + "patch", + [ + { path: "index.html", size_bytes: 12, sha256: sha("a") }, + { path: "big.txt", size_bytes: 5000, sha256: sha("c") }, + ], + "2026-01-01T00:00:00.000Z", + ); + const session = await repo.createUploadSession({ + actor, + idempotencyKey: "idem-patch-create", + request: { + artifact_id: base.artifactId, + base_revision_id: base.revisionId, + title: "patch", + entrypoint: "index.html", + files: [ + { + path: "big.txt", + size_bytes: 40, + patch: { base_sha256: sha("c"), format: "unified", result_sha256: sha("e") }, + }, + ], + }, + now: "2026-01-02T00:00:00.000Z", + }); + const descriptor = session.files.find((file) => file.path === "big.txt"); + // The diff uploads as a revision object with sha256 omitted from the signed path. + expect(descriptor?.sha256).toBeNull(); + expect(descriptor?.storage_kind).toBe("revision"); + const stored = repo.uploadSessionFiles.get(`${session.upload_session_id}:big.txt`); + expect(stored?.patch_base_sha256).toBe(sha("c")); + expect(stored?.patch_result_sha256).toBe(sha("e")); + + // Stage 3 cannot reconstruct the result blob (jobs Stage 4 owns that), so a + // valid patch must still be refused at finalize rather than serving diff bytes. + await repo.recordUploadedFile({ + workspaceId: actor.workspace_id, + sessionId: session.upload_session_id, + path: "big.txt", + objectKey: descriptor?.object_key ?? "", + sizeBytes: 40, + uploadedAt: "2026-01-02T00:00:01.000Z", + }); + await expect( + repo.finalizeUploadSession({ + actor, + idempotencyKey: "idem-patch-finalize", + sessionId: session.upload_session_id, + observedFiles: [{ path: "big.txt", objectKey: descriptor?.object_key ?? "", sizeBytes: 40 }], + now: "2026-01-02T00:00:02.000Z", + }), + ).rejects.toThrow("patch_reconstruction_unavailable"); + }); + + it("rejects a patch whose base_sha256 does not match the base file", async () => { + const { repo, actor } = await localRepoWithApiActor(); + const base = await publishBlobBackedBase( + repo, + actor, + "patch-bad", + [ + { path: "index.html", size_bytes: 12, sha256: sha("a") }, + { path: "big.txt", size_bytes: 5000, sha256: sha("c") }, + ], + "2026-01-01T00:00:00.000Z", + ); + const session = await repo.createUploadSession({ + actor, + idempotencyKey: "idem-patch-bad-create", + request: { + artifact_id: base.artifactId, + base_revision_id: base.revisionId, + title: "patch-bad", + entrypoint: "index.html", + files: [ + { + path: "big.txt", + size_bytes: 40, + patch: { base_sha256: sha("f"), format: "unified", result_sha256: sha("e") }, + }, + ], + }, + now: "2026-01-02T00:00:00.000Z", + }); + const descriptor = session.files.find((file) => file.path === "big.txt"); + await repo.recordUploadedFile({ + workspaceId: actor.workspace_id, + sessionId: session.upload_session_id, + path: "big.txt", + objectKey: descriptor?.object_key ?? "", + sizeBytes: 40, + uploadedAt: "2026-01-02T00:00:01.000Z", + }); + await expect( + repo.finalizeUploadSession({ + actor, + idempotencyKey: "idem-patch-bad-finalize", + sessionId: session.upload_session_id, + observedFiles: [{ path: "big.txt", objectKey: descriptor?.object_key ?? "", sizeBytes: 40 }], + now: "2026-01-02T00:00:02.000Z", + }), + ).rejects.toThrow("patch_base_mismatch"); + }); + + it("leaves parent_revision_id null for a non-base publish", async () => { + const { repo, actor } = await localRepoWithApiActor(); + const published = await publishLocalArtifact(repo, actor, "rootless", "2026-01-01T00:00:00.000Z"); + expect(repo.revisions.get(published.revision_id)?.parent_revision_id).toBeNull(); + }); +}); + function firstFile(session: { files: Array<{ object_key: string }> }) { const file = session.files[0]; if (!file) { diff --git a/packages/db/src/queries/upload-sessions.ts b/packages/db/src/queries/upload-sessions.ts index fb0c28d5..f16dbabd 100644 --- a/packages/db/src/queries/upload-sessions.ts +++ b/packages/db/src/queries/upload-sessions.ts @@ -22,6 +22,8 @@ export const uploadSessionQueries = { expiresAt: new Date(row.expires_at), createdAt: new Date(row.created_at), finalizedAt: row.finalized_at ? new Date(row.finalized_at) : null, + baseRevisionId: row.base_revision_id ?? null, + deletedPaths: row.deleted_paths ?? [], }); }, @@ -65,6 +67,8 @@ export const uploadSessionFileQueries = { uploadedAt: file.uploaded_at ? new Date(file.uploaded_at) : null, // putUrlExpiresAt is notNull in schema; fall back to "now" rather than producing an Invalid Date. putUrlExpiresAt: file.put_url_expires_at ? new Date(file.put_url_expires_at) : new Date(), + patchBaseSha256: file.patch_base_sha256 ?? null, + patchResultSha256: file.patch_result_sha256 ?? null, }); }, @@ -125,6 +129,8 @@ function mapUploadSession(row: typeof uploadSessions.$inferSelect): UploadSessio expires_at: row.expiresAt.toISOString(), created_at: row.createdAt.toISOString(), finalized_at: row.finalizedAt ? row.finalizedAt.toISOString() : null, + base_revision_id: row.baseRevisionId ?? null, + deleted_paths: row.deletedPaths ?? [], }; } @@ -140,5 +146,7 @@ function mapUploadSessionFile(row: typeof uploadSessionFiles.$inferSelect): Stor storage_kind: (row.storageKind ?? "revision") as StoredFileStorageKind, uploaded_at: row.uploadedAt ? row.uploadedAt.toISOString() : null, put_url_expires_at: row.putUrlExpiresAt.toISOString(), + patch_base_sha256: row.patchBaseSha256 ?? null, + patch_result_sha256: row.patchResultSha256 ?? null, }; } diff --git a/packages/db/src/repository-error.ts b/packages/db/src/repository-error.ts index 3d1462a2..4354bb73 100644 --- a/packages/db/src/repository-error.ts +++ b/packages/db/src/repository-error.ts @@ -11,8 +11,12 @@ export const RepositoryErrorCode = { access_link_share_cannot_pin_revision: "access_link_share_cannot_pin_revision", api_key_not_found: "api_key_not_found", artifact_not_found: "artifact_not_found", + base_revision_artifact_mismatch: "base_revision_artifact_mismatch", + base_revision_not_found: "base_revision_not_found", + base_revision_not_publishable: "base_revision_not_publishable", create_postgres_services_missing_connection_or_executor: "create_postgres_services_missing_connection_or_executor", current_api_key_not_found: "current_api_key_not_found", + deleted_path_not_in_base: "deleted_path_not_in_base", draft_revision_conflict: "draft_revision_conflict", drizzle_not_bound_to_executor: "drizzle_not_bound_to_executor", entrypoint_not_in_revision: "entrypoint_not_in_revision", @@ -20,12 +24,15 @@ export const RepositoryErrorCode = { file_count_cap_exceeded: "file_count_cap_exceeded", file_size_cap_exceeded: "file_size_cap_exceeded", forbidden: "forbidden", + inherited_path_not_blob_backed: "inherited_path_not_blob_backed", invalid_auto_deletion_days: "invalid_auto_deletion_days", invalid_cursor: "invalid_cursor", invalid_pagination_limit: "invalid_pagination_limit", invalid_request: "invalid_request", lockdown_insert_conflict: "lockdown_insert_conflict", not_found: "not_found", + patch_base_mismatch: "patch_base_mismatch", + patch_reconstruction_unavailable: "patch_reconstruction_unavailable", pinned_artifact_cap_exceeded: "pinned_artifact_cap_exceeded", postgres_http_error: "postgres_http_error", postgres_http_executor_no_transactions: "postgres_http_executor_no_transactions", @@ -75,8 +82,12 @@ const repositoryErrorToAppErrorMap: Record; + // Paths present in the base Revision that this publish drops (base-only). + deleted_paths?: string[]; + files: Array<{ path: string; size_bytes: number; sha256?: string; patch?: UploadSessionFilePatchInput }>; }; export type ObservedUploadFile = { path: string; objectKey: string; sizeBytes: number }; @@ -53,7 +65,10 @@ export async function createUploadSessionInEntities( } } const entrypoint = input.request.entrypoint ?? baseArtifact?.entrypoint ?? "index.html"; - validateUpload(files, input.usagePolicy, entrypoint); + // Against a base Revision the uploaded manifest is a partial delta: validate only + // its per-file/count caps now; the entrypoint and total-size cap are enforced on + // the merged tree at finalize (ADR 0087 tree inheritance). + validateUpload(files, input.usagePolicy, entrypoint, { wholeTree: !input.request.base_revision_id }); const artifactTtlSeconds = isEphemeralWorkspace(input.workspace) ? ephemeralArtifactTtlSeconds(input.usagePolicy) : artifactTtlSecondsForUpload(input.usagePolicy); @@ -77,10 +92,33 @@ export async function createUploadSessionInEntities( expires_at: new Date(new Date(input.now).getTime() + DEFAULT_UPLOAD_SESSION_TTL_MS).toISOString(), created_at: input.now, finalized_at: null, + base_revision_id: input.request.base_revision_id ?? null, + deleted_paths: (input.request.deleted_paths ?? []).map((path) => normalizeStoragePath(path)), }; await entities.uploadSessions.insert(session); const storedFiles: StoredFile[] = []; for (const file of files) { + // A patched file uploads a unified diff, which is not content-addressable, so + // it takes the revision-scoped key with sha256 omitted (put.ts asserts a blob + // key whenever sha256 is signed). The diff's own digest must never become the + // signed sha256; the patch descriptor rides separate columns for jobs (Stage 4). + if (file.patch) { + storedFiles.push({ + workspace_id: input.actor.workspace_id, + upload_session_id: session.id, + path: file.path, + size_bytes: file.size_bytes, + content_type: contentTypeForPath(file.path), + r2_key: objectKeyFor(session.artifact_id, session.revision_id, file.path), + sha256: null, + storage_kind: "revision", + uploaded_at: null, + put_url_expires_at: session.expires_at, + patch_base_sha256: file.patch.base_sha256, + patch_result_sha256: file.patch.result_sha256, + }); + continue; + } const blob = file.sha256 ? await entities.contentBlobs.find({ workspaceId: input.actor.workspace_id, @@ -142,6 +180,156 @@ export async function readUploadSessionStateInEntities( return session ? { status: session.status, expiresAt: session.expires_at } : null; } +type MergedTree = { + files: StoredFile[]; + fileCount: number; + sizeBytes: number; + parentRevisionId: string; +}; + +// ADR 0087 tree inheritance: merge the base Revision's published tree with this +// session's changed/added/deleted manifest into the full tree the new Revision +// commits. Runs at finalize (the base is a published Revision; the merge is the +// "commit = parent tree + delta" step) and validates every stateful precondition +// the contract deferred from create. +async function mergeBaseRevisionTree( + entities: Entities, + session: UploadSession, + sessionFiles: StoredFile[], +): Promise { + const baseRevisionId = session.base_revision_id; + if (!baseRevisionId) { + throw new Error("mergeBaseRevisionTree requires a base_revision_id"); + } + // Scoped to the session workspace: a cross-workspace base collapses to null + // (indistinguishable from missing, which is correct and non-enumerable). + const base = await entities.revisions.findById(baseRevisionId, session.workspace_id); + if (!base) { + repositoryError("base_revision_not_found"); + } + // Fail fast before the composite revisions_parent_fk would 500 on the parent insert. + if (base.artifact_id !== session.artifact_id) { + repositoryError("base_revision_artifact_mismatch"); + } + // Only a published base is safe to inherit: draft is uncommitted; a retained + // base's blobs fall out of the refcount and may already be GC'd. + if (base.status !== "published") { + repositoryError("base_revision_not_publishable"); + } + + const baseFiles = new Map(); + for (const file of await entities.artifactFiles.listForArtifact(base.artifact_id, baseRevisionId)) { + baseFiles.set(file.path, file); + } + const sessionPaths = new Set(sessionFiles.map((file) => file.path)); + const deletedPaths = new Set(session.deleted_paths); + + for (const path of deletedPaths) { + if (!baseFiles.has(path)) { + repositoryError("deleted_path_not_in_base"); + } + } + for (const file of sessionFiles) { + if (file.patch_base_sha256) { + const baseFile = baseFiles.get(file.path); + if (!baseFile || baseFile.sha256 !== file.patch_base_sha256) { + repositoryError("patch_base_mismatch"); + } + // Stage 3 records and validates the patch descriptor but cannot reconstruct + // the whole result blob yet (jobs Stage 4 owns that). Finalizing now would + // commit the diff bytes as the served file, so refuse until reconstruction + // exists. Fail loud rather than serve a half-applied file (ADR 0087). + repositoryError("patch_reconstruction_unavailable"); + } + } + + // Inherited rows are copied forward by reference and must be blob-backed: a + // revision-scoped base file (sha256 null) lives under that base Revision's prefix + // and is not refcount-protected, so inheriting it would dangle. The client must + // re-upload such a path (it then arrives as a changed file, not inherited). + const merged = new Map(); + for (const [path, baseFile] of baseFiles) { + if (sessionPaths.has(path) || deletedPaths.has(path)) { + continue; + } + if (baseFile.storage_kind !== "blob") { + repositoryError("inherited_path_not_blob_backed"); + } + merged.set(path, { + ...baseFile, + workspace_id: session.workspace_id, + artifact_id: session.artifact_id, + revision_id: session.revision_id, + upload_session_id: session.id, + }); + } + for (const file of sessionFiles) { + merged.set(file.path, file); + } + + const files = [...merged.values()]; + return { + files, + fileCount: files.length, + sizeBytes: files.reduce((sum, file) => sum + file.size_bytes, 0), + parentRevisionId: baseRevisionId, + }; +} + +// Create the Artifact on first finalize, or guard against a competing draft on an +// existing one. file_count/size_bytes reflect the committed (merged) tree. +async function ensureArtifactForFinalize( + entities: Entities, + input: { + actor: ApiActor; + session: UploadSession; + operationActor: ReturnType; + fileCount: number; + sizeBytes: number; + now: string; + }, +) { + const { session } = input; + const existingArtifact = await entities.artifacts.findById(session.artifact_id, input.actor.workspace_id); + if (existingArtifact) { + const existingDraft = await entities.revisions.findDraftForArtifact(existingArtifact.id); + if (existingDraft && existingDraft.id !== session.revision_id) { + repositoryError("draft_revision_conflict"); + } + return; + } + const artifact: Artifact = { + id: session.artifact_id, + workspace_id: session.workspace_id, + revision_id: null, + status: "active", + title: session.title, + entrypoint: session.entrypoint, + file_count: input.fileCount, + size_bytes: input.sizeBytes, + expires_at: session.artifact_expires_at, + pinned_at: null, + created_by_type: session.created_by_type, + created_by_id: session.created_by_id, + access_link_lockdown_at: null, + deleted_at: null, + delete_reason: null, + created_at: input.now, + updated_at: input.now, + }; + await entities.artifacts.insert(artifact); + await entities.operationEvents.insert({ + actorType: input.operationActor.actorType, + actorId: input.operationActor.actorId, + action: "artifact.created", + targetType: "artifact", + targetId: artifact.id, + workspaceId: artifact.workspace_id, + details: {}, + occurredAt: input.now, + }); +} + export async function finalizeUploadSessionInEntities( entities: Entities, input: { @@ -149,6 +337,9 @@ export async function finalizeUploadSessionInEntities( sessionId: string; observedFiles: ObservedUploadFile[]; now: string; + // Resolved lazily and only for a base-Revision merge (validateUpload on the + // merged tree), so non-base finalizes never touch the workspace lookup. + resolveUsagePolicy: () => Promise; }, ) { const session = await entities.uploadSessions.findById(input.sessionId, input.actor.workspace_id); @@ -179,58 +370,41 @@ export async function finalizeUploadSessionInEntities( repositoryError("upload_incomplete"); } } - const operationActor = operationActorFromApiActor(input.actor); - const existingArtifact = await entities.artifacts.findById(session.artifact_id, input.actor.workspace_id); - if (existingArtifact) { - const existingDraft = await entities.revisions.findDraftForArtifact(existingArtifact.id); - if (existingDraft && existingDraft.id !== session.revision_id) { - repositoryError("draft_revision_conflict"); - } - } else { - const artifact: Artifact = { - id: session.artifact_id, - workspace_id: session.workspace_id, - revision_id: null, - status: "active", - title: session.title, - entrypoint: session.entrypoint, - file_count: session.file_count, - size_bytes: session.size_bytes, - expires_at: session.artifact_expires_at, - pinned_at: null, - created_by_type: session.created_by_type, - created_by_id: session.created_by_id, - access_link_lockdown_at: null, - deleted_at: null, - delete_reason: null, - created_at: input.now, - updated_at: input.now, - }; - await entities.artifacts.insert(artifact); - await entities.operationEvents.insert({ - actorType: operationActor.actorType, - actorId: operationActor.actorId, - action: "artifact.created", - targetType: "artifact", - targetId: artifact.id, - workspaceId: artifact.workspace_id, - details: {}, - occurredAt: input.now, - }); + // Tree inheritance (ADR 0087): against a base Revision the committed tree is the + // merged base + delta, so file_count/size_bytes and the artifact_files rows come + // from the merge (the session row counts only the changed manifest). validateUpload + // re-checks caps + entrypoint against the real published tree (an inherited path + // may be the entrypoint). Without a base, behavior is unchanged. + const merged = session.base_revision_id ? await mergeBaseRevisionTree(entities, session, files) : null; + if (merged) { + validateUpload(merged.files, await input.resolveUsagePolicy(), session.entrypoint); } + const treeFiles = merged?.files ?? files; + const treeFileCount = merged?.fileCount ?? session.file_count; + const treeSizeBytes = merged?.sizeBytes ?? session.size_bytes; + const parentRevisionId = merged?.parentRevisionId ?? null; + const operationActor = operationActorFromApiActor(input.actor); + await ensureArtifactForFinalize(entities, { + actor: input.actor, + session, + operationActor, + fileCount: treeFileCount, + sizeBytes: treeSizeBytes, + now: input.now, + }); const revision: Revision = { id: session.revision_id, workspace_id: session.workspace_id, artifact_id: session.artifact_id, - // Set when publishing against a base Revision (ADR 0087 tree inheritance, Stage 3). - parent_revision_id: null, + // Set when publishing against a base Revision (ADR 0087 tree inheritance). + parent_revision_id: parentRevisionId, revision_number: null, status: "draft", entrypoint: session.entrypoint, // Explicit client choice (stored on the session) wins; otherwise infer. render_mode: session.render_mode ?? inferRenderMode(session.entrypoint), - file_count: session.file_count, - size_bytes: session.size_bytes, + file_count: treeFileCount, + size_bytes: treeSizeBytes, bundle_status: "disabled", bundle_status_updated_at: null, bundle_size_bytes: null, @@ -242,7 +416,7 @@ export async function finalizeUploadSessionInEntities( }; await entities.revisions.insert(revision); await entities.uploadSessions.markFinalized(session.id, input.now); - for (const file of files) { + for (const file of treeFiles) { await entities.artifactFiles.insert(session.artifact_id, session.revision_id, file, input.now); } await entities.operationEvents.insert({ @@ -252,7 +426,7 @@ export async function finalizeUploadSessionInEntities( targetType: "artifact", targetId: session.artifact_id, workspaceId: session.workspace_id, - details: { revision_id: session.revision_id, file_count: session.file_count }, + details: { revision_id: session.revision_id, file_count: treeFileCount }, occurredAt: input.now, }); return buildFinalizeResult({ @@ -261,7 +435,7 @@ export async function finalizeUploadSessionInEntities( revisionId: session.revision_id, title: session.title, entrypoint: session.entrypoint, - fileCount: session.file_count, - sizeBytes: session.size_bytes, + fileCount: treeFileCount, + sizeBytes: treeSizeBytes, }); } diff --git a/packages/db/src/repository/workflows/upload-publish-workflow.ts b/packages/db/src/repository/workflows/upload-publish-workflow.ts index 69a41e4e..e878455a 100644 --- a/packages/db/src/repository/workflows/upload-publish-workflow.ts +++ b/packages/db/src/repository/workflows/upload-publish-workflow.ts @@ -117,6 +117,10 @@ export async function finalizeUploadSession( sessionId: input.sessionId, observedFiles: input.observedFiles, now: input.now, + // Resolved lazily and only for a base-Revision merge, so a missing/cross + // workspace still collapses to upload_session_not_found (non-enumerable) + // via the workspace-scoped session lookup rather than workspace_not_found. + resolveUsagePolicy: async () => ctx.usagePolicyFor(await ctx.mustWorkspace(entities, input.actor.workspace_id)), }), ); } diff --git a/packages/db/src/schema.ts b/packages/db/src/schema.ts index 15e5d30b..871ab7f6 100644 --- a/packages/db/src/schema.ts +++ b/packages/db/src/schema.ts @@ -197,6 +197,12 @@ export const uploadSessions = pgTable( expiresAt: timestamp("expires_at", { withTimezone: true }).notNull(), createdAt: timestamp("created_at", { withTimezone: true }).notNull(), finalizedAt: timestamp("finalized_at", { withTimezone: true }), + // Base Revision this publish inherits from (ADR 0087 tree inheritance). Null = full + // manifest. Copied to revisions.parent_revision_id when the merge runs at finalize. + baseRevisionId: text("base_revision_id"), + // Base paths this publish drops. Needed to tell a deleted path apart from an + // inherited one at finalize (both are base paths absent from the file manifest). + deletedPaths: jsonb("deleted_paths").$type().notNull().default([]), }, (table) => [ index("upload_sessions_pending_expiry_idx").on(table.workspaceId, table.expiresAt), @@ -225,12 +231,22 @@ export const uploadSessionFiles = pgTable( storageKind: text("storage_kind").notNull().default("revision"), uploadedAt: timestamp("uploaded_at", { withTimezone: true }), putUrlExpiresAt: timestamp("put_url_expires_at", { withTimezone: true }).notNull(), + // Intra-file delta descriptor (ADR 0087). When set, the uploaded bytes are a + // unified diff against the base file; jobs reconstructs the whole result blob + // (Stage 4). base = digest of the base Revision's file, result = digest of the + // reconstructed whole file. Both null (whole-file upload) or both set. + patchBaseSha256: text("patch_base_sha256"), + patchResultSha256: text("patch_result_sha256"), }, (table) => [ primaryKey({ columns: [table.uploadSessionId, table.path] }), index("upload_session_files_blob_idx").on(table.workspaceId, table.sha256, table.sizeBytes), check("upload_session_files_storage_kind_check", sql`${table.storageKind} in ('revision', 'blob')`), check("upload_session_files_sha256_check", sql`${table.sha256} is null or ${table.sha256} ~ '^[a-f0-9]{64}$'`), + check( + "upload_session_files_patch_check", + sql`(${table.patchBaseSha256} is null and ${table.patchResultSha256} is null) or (${table.patchBaseSha256} ~ '^[a-f0-9]{64}$' and ${table.patchResultSha256} ~ '^[a-f0-9]{64}$')`, + ), ], ); diff --git a/packages/db/src/types.ts b/packages/db/src/types.ts index c8e2565c..12fb01f0 100644 --- a/packages/db/src/types.ts +++ b/packages/db/src/types.ts @@ -209,6 +209,10 @@ export type UploadSession = { expires_at: string; created_at: string; finalized_at: string | null; + // Base Revision this publish inherits from (ADR 0087); null = full manifest. + base_revision_id: string | null; + // Base paths this publish drops (distinguishes deleted from inherited at finalize). + deleted_paths: string[]; }; export type ContentBlob = { @@ -235,6 +239,10 @@ export type StoredFile = { storage_kind?: StoredFileStorageKind; uploaded_at: string | null; put_url_expires_at?: string; + // ADR 0087 intra-file delta descriptor (recorded on upload_session_files only). + // Both null (whole-file upload) or both set; jobs reconstructs the result blob. + patch_base_sha256?: string | null; + patch_result_sha256?: string | null; }; export type SafetyWarning = { diff --git a/packages/db/src/validation.ts b/packages/db/src/validation.ts index bd8a41e3..f797ead2 100644 --- a/packages/db/src/validation.ts +++ b/packages/db/src/validation.ts @@ -6,6 +6,10 @@ export function validateUpload( files: Array<{ path: string; size_bytes: number }>, usagePolicy: Pick, entrypoint = "index.html", + // A partial-manifest publish (ADR 0087) validates the uploaded delta here for + // per-file/count caps only; the entrypoint and artifact-size cap are checked + // against the merged tree at finalize, where the inherited paths are known. + options: { wholeTree?: boolean } = { wholeTree: true }, ) { if (files.length === 0 || files.length > usagePolicy.file_count_cap) { repositoryError("file_count_cap_exceeded"); @@ -17,10 +21,10 @@ export function validateUpload( } total += file.size_bytes; } - if (total > usagePolicy.artifact_size_cap_bytes) { + if (options.wholeTree !== false && total > usagePolicy.artifact_size_cap_bytes) { repositoryError("revision_size_cap_exceeded"); } - if (!files.some((file) => file.path === entrypoint)) { + if (options.wholeTree !== false && !files.some((file) => file.path === entrypoint)) { repositoryError("entrypoint_not_in_revision"); } } From 57b819c1145cfac89f8462b8f04ff523367ced66 Mon Sep 17 00:00:00 2001 From: Isaac Suttell Date: Sun, 14 Jun 2026 15:45:21 -0700 Subject: [PATCH 04/14] feat(upload,db,storage): synchronous intra-file patch reconstruction at finalize (ADR 0087 stage 4) Apply an agent-uploaded unified diff to the base blob synchronously at finalize, in the upload worker, before the new Revision commits. A patch that cannot apply fails the finalize call with patch_conflict (HTTP 422, "patch_conflict: : ") so the agent re-submits a corrected diff; a broken patch never becomes a servable Revision. The reconstructed result is an ordinary content-addressed blob, so content/bundles/GC are unchanged and no migration is needed. - packages/storage: hand-rolled byte-exact unified-diff applier (no diff dep: jsdiff's UTF-16 round-trip breaks the raw-byte result_sha256 check) + workspace-blob read/write helpers (blob AAD) and a revision-file read helper. - packages/db: RevisionReconstructor adapter (RepositoryOptions, wired in createPostgresRuntime + local MVP harness), invoked from mergeBaseRevisionTree before any DB write; result blob + content_blobs row commit with the draft; caps run on the reconstructed result size; removes the Stage 3 patch_reconstruction_unavailable gate; conflict -> patch_conflict, infra failures -> storage_unavailable. - contracts/worker-runtime: new patch_conflict ErrorCode (422), MCP status map + publishChain, route errors, openapi goldens; also maps the previously 500-falling-back finalize codes (caps, expired, incomplete) for MCP. - upload: widen R2 binding with get; surface the conflict path+reason as the error message. - scripts: smoke-local-patch.mjs drives the real reconstruction path (local + preview). Verified byte-exact serve + patch_conflict on hosted preview. Co-Authored-By: Claude Opus 4.8 --- apps/upload/src/env.ts | 8 + apps/upload/src/finalize.ts | 16 +- ...eritance-and-server-reconstructed-delta.md | 53 +++ docs/development.md | 35 +- docs/ops/git-like-revisions-todo.md | 56 ++-- docs/specs/api.md | 15 +- docs/specs/data-model.md | 28 +- package.json | 1 + packages/contracts/openapi/api.json | 216 ++++++++++++ packages/contracts/openapi/content.json | 1 + packages/contracts/openapi/upload.json | 17 + packages/contracts/src/common.ts | 1 + packages/contracts/src/mcp.test.ts | 42 +++ packages/contracts/src/mcp/error-codes.ts | 12 + .../contracts/src/routes/registry.storage.ts | 1 + packages/db/src/index.test.ts | 116 ++++++- packages/db/src/index.ts | 6 + .../postgres/revision-reconstructor.test.ts | 141 ++++++++ .../db/src/postgres/revision-reconstructor.ts | 78 +++++ packages/db/src/postgres/worker-runtime.ts | 18 + packages/db/src/repository-error.ts | 9 +- .../repository/upload-session-lifecycle.ts | 143 +++++++- .../workflows/upload-publish-workflow.ts | 1 + packages/db/src/types.ts | 46 +++ packages/storage/src/index.ts | 11 + packages/storage/src/unified-diff.test.ts | 255 ++++++++++++++ packages/storage/src/unified-diff.ts | 317 ++++++++++++++++++ .../storage/src/workspace-blob-bytes.test.ts | 101 ++++++ packages/storage/src/workspace-blob-bytes.ts | 149 ++++++++ packages/worker-runtime/src/errors.ts | 1 + .../src/route-repository-errors.ts | 8 +- scripts/local-mvp-server.mjs | 13 +- scripts/smoke-local-patch.mjs | 310 +++++++++++++++++ 33 files changed, 2131 insertions(+), 94 deletions(-) create mode 100644 packages/db/src/postgres/revision-reconstructor.test.ts create mode 100644 packages/db/src/postgres/revision-reconstructor.ts create mode 100644 packages/storage/src/unified-diff.test.ts create mode 100644 packages/storage/src/unified-diff.ts create mode 100644 packages/storage/src/workspace-blob-bytes.test.ts create mode 100644 packages/storage/src/workspace-blob-bytes.ts create mode 100644 scripts/smoke-local-patch.mjs diff --git a/apps/upload/src/env.ts b/apps/upload/src/env.ts index 566fb191..59504c2e 100644 --- a/apps/upload/src/env.ts +++ b/apps/upload/src/env.ts @@ -21,6 +21,11 @@ export type R2Object = { size: number; }; +export type R2ObjectBody = { + body: ReadableStream | ArrayBuffer | Uint8Array | string | null | undefined; + customMetadata?: Record; +}; + export type R2Bucket = { put( key: string, @@ -28,6 +33,9 @@ export type R2Bucket = { options?: { httpMetadata?: Record; customMetadata?: Record }, ): Promise; head(key: string): Promise; + // Reconstruction (ADR 0087) reads a base blob + the uploaded diff back at finalize to + // apply the patch. This is the only read on upload's R2 binding; every other op writes. + get(key: string): Promise; }; export type RateLimitBinding = { diff --git a/apps/upload/src/finalize.ts b/apps/upload/src/finalize.ts index e4b4d00f..5c3417d4 100644 --- a/apps/upload/src/finalize.ts +++ b/apps/upload/src/finalize.ts @@ -1,7 +1,12 @@ import { IdempotencyInFlightError } from "@agent-paste/commands"; import type { routeContracts } from "@agent-paste/contracts"; import { FinalizeUploadSessionResponse } from "@agent-paste/contracts"; -import { observeUploadSessionForFinalize, type Repository, repositoryErrorToAppError } from "@agent-paste/db"; +import { + isRepositoryError, + observeUploadSessionForFinalize, + type Repository, + repositoryErrorToAppError, +} from "@agent-paste/db"; import { type GuardState, getBoundResponders, type Principal } from "@agent-paste/worker-runtime"; import type { AppContext } from "./env.js"; import { uploadSessionActor } from "./upload-actor.js"; @@ -53,7 +58,14 @@ export async function finalizeUploadSession( } const repositoryCode = repositoryErrorToAppError(error); if (repositoryCode) { - return getBoundResponders(context).respondError(repositoryCode); + // A patch conflict carries the path + failure reason on the error cause so the + // agent learns which file to regenerate (ADR 0087). Other codes use their default + // message. + const detail = + repositoryCode === "patch_conflict" && isRepositoryError(error) && error.cause instanceof Error + ? error.cause.message + : undefined; + return getBoundResponders(context).respondError(repositoryCode, detail); } throw error; } diff --git a/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md b/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md index 996c7e20..d845bb01 100644 --- a/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md +++ b/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md @@ -133,6 +133,12 @@ whole-blob. modeled like the existing async Bundle/safety-scan pending states, not as a finalize-blocking step. This supersedes the earlier draft note that started reconstruction in `upload`. + **Superseded at implementation (see "Stage 4 implementation notes" below):** + reconstruction shipped SYNCHRONOUSLY at finalize in `upload`, not async in + `jobs`, and there is no pending state. The async/pending framing was reversed + because a patch that cannot apply must fail the agent's finalize call so the + agent can fix it (the conflict is the feature), which a fire-and-forget job + cannot do. - **Caps still apply to the reconstructed result**, not the diff: a small diff whose applied result exceeds the file/Revision cap fails. - **Spec + glossary updates.** [`data-model.md`](../specs/data-model.md) @@ -183,6 +189,53 @@ next implementer does not re-derive them: This confirms, and does not reverse, the Stage 2 "applied at finalize" wording. +## Stage 4 implementation notes (synchronous reconstruct-at-finalize) + +Stage 4 reverses two specifics of the "Server-reconstructed intra-file delta" +decision above (placement and the pending model). The reasons, so the next +implementer trusts these notes over the earlier framing: + +- **The conflict is the feature, so reconstruction is synchronous and fails the + finalize call.** The whole point of intra-file delta is agent ergonomics: when a + diff cannot be applied (base moved, hunk fails, result digest mismatch), the + system must flag that back to the agent in the same request so the agent + re-submits a corrected diff. An async `jobs` job with a pending state cannot fail + the caller's call; it can only 404 or DLQ, which buries the signal. So + reconstruction runs inline at finalize and a conflict throws + `RevisionReconstructionConflict` → `patch_conflict` (HTTP 422), message + `patch_conflict: : `. There is **no pending state and no + `reconstruction_status` column** — a broken patch never becomes a draft, so a + servable-but-broken revision cannot exist. +- **Placement is finalize in `upload`, not `jobs`.** The blast-radius argument for + `jobs` assumed async; synchronous reconstruction must run where the finalize + transaction is, and finalize already owns the patch gate, the only + `artifact_files` write, and the result-size cap-check. `upload` already holds the + encryption ring; the only new capability is R2 `get`. Blast radius is contained + by never exposing an arbitrary-key read in app code: the reconstructor + (`RevisionReconstructor`, injected via `RepositoryOptions` like the reparent + migrator) takes a validated `(workspaceId, sha256)` derived from the base + Revision's own `artifact_files` rows. The decrypt/apply/encrypt logic is shared + in `packages/storage` (`unified-diff.ts` applier, `workspace-blob-bytes.ts` + read/write helpers), invoked from `packages/db/src/postgres/revision-reconstructor.ts`. +- **The applier is hand-rolled and byte-exact.** No diff library: `jsdiff` fuzzes + hunks and round-trips through UTF-16, which breaks byte-exactness against the + raw-byte `result_sha256` digest and yields false conflicts. The applier splices + raw base byte ranges for context/unchanged regions and never normalizes + CRLF/BOM/trailing-newline. `result_sha256` is the backstop, so conflict reasons + are coarse (`parse_error | base_hash_mismatch | apply_failed | +result_hash_mismatch`) — the agent's only action on any of them is "regenerate + this file's diff", so hunk/line forensics would be unusable detail. +- **The result is an ordinary content-addressed blob (Option 1 holds).** Finalize + replaces the diff placeholder with a `storage_kind='blob'` `artifact_files` row + and registers a `content_blobs` row in the same transaction (so GC protects the + new blob). `content`, bundles, and GC are unchanged. No DB migration. +- **The Stage 3 `patch_reconstruction_unavailable` gate is removed** and replaced + by the reconstruction call. Infra failures (missing ring/R2, decrypt errors) map + to `storage_unavailable` (503), never `patch_conflict`, so the agent is never + told "your patch is bad" for an outage. First-failure-wins across multiple + patched files; all files apply+verify in memory before any blob is PUT, so a + batch with one conflict writes zero blobs. + ## What this ADR is not - Not a chunk store, not per-block encryption, not Range serving, not global diff --git a/docs/development.md b/docs/development.md index a59086a7..ac669806 100644 --- a/docs/development.md +++ b/docs/development.md @@ -155,23 +155,24 @@ deploy production from a laptop. ### Smoke Tests -| Command | Purpose | -| --------------------------------- | --------------------------------------------------------------------------------------------------------------------- | -| `pnpm smoke:local` | Build and run the local publish/content/delete smoke path (also gated in CI `Validate`). | -| `pnpm smoke:ci:postgres` | Build, migrate a job-local Postgres database, and run the local CLI smoke through the Postgres/RLS-backed harness. | -| `pnpm smoke:web` | Build and run local web API auth/dashboard smoke assertions. | -| `pnpm smoke:mcp` | Build and run local MCP transport + OAuth + publish/read/delete smoke. | -| `pnpm smoke:mcp:preview` | Build and run hosted preview MCP smoke (optional token for authenticated checks). | -| `pnpm smoke:mcp:production` | Build and run hosted production MCP smoke (requires explicit approval and token). | -| `pnpm lighthouse:dashboard-a11y` | Run the local Lighthouse accessibility gate on authenticated `/dashboard` empty chrome (requires `pnpm build` first). | -| `pnpm smoke:preview` | Build and run hosted preview smoke assertions. | -| `pnpm smoke:preview:ephemeral` | Build and run hosted preview ephemeral publish smoke. | -| `pnpm smoke:production` | Build and run hosted production smoke assertions. | -| `pnpm smoke:production:ephemeral` | Build and run hosted production ephemeral publish smoke (operator-only; optional WorkOS token for claim). | -| `pnpm smoke:pr` | Build and run hosted PR-preview smoke assertions manually using PR workflow-provided URLs. | -| `pnpm smoke:pr:ephemeral` | Build and run hosted PR-preview ephemeral publish smoke. | -| `pnpm smoke:preview:readonly` | Build and run the credential-free read-only preview smoke. | -| `pnpm smoke:prod:readonly` | Build and run the credential-free read-only production smoke. | +| Command | Purpose | +| --------------------------------- | ----------------------------------------------------------------------------------------------------------------------- | +| `pnpm smoke:local` | Build and run the local publish/content/delete smoke path (also gated in CI `Validate`). | +| `pnpm smoke:local:patch` | Build and run the local ADR 0087 intra-file patch reconstruction smoke (real diff apply + serve byte-exact + conflict). | +| `pnpm smoke:ci:postgres` | Build, migrate a job-local Postgres database, and run the local CLI smoke through the Postgres/RLS-backed harness. | +| `pnpm smoke:web` | Build and run local web API auth/dashboard smoke assertions. | +| `pnpm smoke:mcp` | Build and run local MCP transport + OAuth + publish/read/delete smoke. | +| `pnpm smoke:mcp:preview` | Build and run hosted preview MCP smoke (optional token for authenticated checks). | +| `pnpm smoke:mcp:production` | Build and run hosted production MCP smoke (requires explicit approval and token). | +| `pnpm lighthouse:dashboard-a11y` | Run the local Lighthouse accessibility gate on authenticated `/dashboard` empty chrome (requires `pnpm build` first). | +| `pnpm smoke:preview` | Build and run hosted preview smoke assertions. | +| `pnpm smoke:preview:ephemeral` | Build and run hosted preview ephemeral publish smoke. | +| `pnpm smoke:production` | Build and run hosted production smoke assertions. | +| `pnpm smoke:production:ephemeral` | Build and run hosted production ephemeral publish smoke (operator-only; optional WorkOS token for claim). | +| `pnpm smoke:pr` | Build and run hosted PR-preview smoke assertions manually using PR workflow-provided URLs. | +| `pnpm smoke:pr:ephemeral` | Build and run hosted PR-preview ephemeral publish smoke. | +| `pnpm smoke:preview:readonly` | Build and run the credential-free read-only preview smoke. | +| `pnpm smoke:prod:readonly` | Build and run the credential-free read-only production smoke. | ### Hooks diff --git a/docs/ops/git-like-revisions-todo.md b/docs/ops/git-like-revisions-todo.md index 3bf158c4..08520385 100644 --- a/docs/ops/git-like-revisions-todo.md +++ b/docs/ops/git-like-revisions-todo.md @@ -141,26 +141,42 @@ result_sha256 }` plus the diff bytes uploaded like any file body. Absence = patch base match) with six new repo error codes mapped to `invalid_request`. See the ADR 0087 Stage 3 implementation notes for the decisions. -### Stage 4 - jobs: Option 1 reconstruct-on-write - -Reconstruction runs in **`jobs`**, not `upload`. `upload` is write-only against -R2 today (sole op is `ARTIFACTS.put`, `apps/upload/src/put.ts:150`); `jobs` is -already a read-modify-write module with the `ARTIFACTS` binding + encryption ring -(Bundle generation: `revision-file-bytes.ts` decrypts, `bundle-generate- -orchestration.ts` re-encrypts). Reconstruction is the same shape, so it belongs -in `jobs` and keeps `upload`'s narrow role intact. - -- Client uploads a unified diff for a patched path. A `jobs` task fetches + - decrypts the base blob, applies the patch, hashes -> must equal - `result_sha256`, encrypts the **whole result** as a normal blob under - `workspaceBlobObjectKeyFor(result_sha256)`. -- Model the pending window like existing async Bundle/safety-scan states: a - Revision containing a patched file is not servable until reconstruction - completes, so Publish resolves it after the `jobs` task lands (not a - finalize-blocking step). -- The resulting `artifact_files` row is an ordinary `storage_kind='blob'` row. - Nothing downstream (content, bundles, GC) needs to know a patch was involved. -- Caps: patched-result size still enforced against file/revision caps. +### Stage 4 - synchronous reconstruct-at-finalize (DONE) + +Reconstruction runs **synchronously at finalize, in the `upload` worker**, BEFORE +the new Revision is committed as a draft. (An earlier sketch put this async in +`jobs`; that was rejected because a patch that cannot apply must FLAG BACK to the +agent so the agent can fix it - the conflict is the feature, not bookkeeping. A +broken patch must never produce a servable revision, so reconstruction has to be +able to FAIL the finalize call. Finalize is also where the patch gate, the only +`artifact_files` write, and the result-size cap-check already live, and the +`upload` worker already holds R2 + the encryption ring.) + +- A patched file uploads a unified diff (sha256 null, revision-scoped key). At + finalize, before any DB write, `mergeBaseRevisionTree` validates the diff base + against the base Revision's file (`patch_base_mismatch`) then runs the injected + `RevisionReconstructor` (`packages/db/src/postgres/revision-reconstructor.ts`): + decrypts the base blob, applies the diff (`packages/storage/src/unified-diff.ts`, + a hand-rolled byte-exact applier), verifies `sha256(result) === result_sha256`, + and encrypts the **whole result** as a normal blob under + `workspaceBlobObjectKeyFor(result_sha256)`. All files apply+verify in memory + first; only then are blobs PUT (a multi-file batch with one conflict writes zero + blobs). +- A patch that cannot apply throws `RevisionReconstructionConflict` -> finalize + fails with `patch_conflict` (HTTP 422), message `patch_conflict: : ` + (`parse_error | base_hash_mismatch | apply_failed | result_hash_mismatch`), so + the agent regenerates that file's diff and re-finalizes. Infra failures + (missing ring/R2, decrypt) stay `storage_unavailable` (503), never a conflict. + First-failure-wins across multiple patched files. +- The committed `artifact_files` row is an ordinary `storage_kind='blob'` row + + a `content_blobs` row (registered in the same tx so GC protects it). Nothing + downstream (content, bundles, GC) knows a patch was involved. No new DB column, + no `reconstruction_status`, no migration. +- Caps run on the MERGED tree carrying RECONSTRUCTED result sizes (a patched + file's session `size_bytes` is the diff size), so a result over cap is rejected. +- Security: `upload` gains R2 `get`, but app code never reads an arbitrary key - + the reconstructor takes a validated `(workspaceId, sha256)` derived from the + base Revision's own `artifact_files` rows. - Done: big-file-small-edit uploads only the diff bytes; served file is byte-identical to applying the patch locally; `content` unchanged. diff --git a/docs/specs/api.md b/docs/specs/api.md index 17ebaf1e..73e2ecf0 100644 --- a/docs/specs/api.md +++ b/docs/specs/api.md @@ -165,10 +165,17 @@ result_sha256 }`) means the bytes uploaded for that entry are a unified diff At finalize the merged tree (inherited base rows + uploaded changes − deletions) sets `revisions.parent_revision_id = base_revision_id`, and `file_count` / `size_bytes` are recomputed from the merged tree, not the uploaded manifest. - Diff reconstruction into a whole blob is deferred to a later `jobs` step, so a - session that carries a `patch` is currently rejected at finalize rather than - serving the diff bytes as the file. A file may not declare both a whole-file - `sha256` and a `patch`. + A patched file is reconstructed synchronously at finalize: the server applies the + diff to the base blob, verifies the result digest equals `result_sha256`, and + stores the whole result as an ordinary content-addressed blob — so caps are + enforced against the reconstructed result size, not the diff. If the diff cannot + be applied cleanly (base moved, hunk fails, or the result digest mismatches), + finalize fails with `patch_conflict` (HTTP 422) and message + `patch_conflict: : ` (`reason` ∈ `parse_error`, + `base_hash_mismatch`, `apply_failed`, `result_hash_mismatch`); the caller + regenerates that file's diff and re-finalizes. A broken patch never produces a + servable Revision. A file may not declare both a whole-file `sha256` and a + `patch`. ### `CreateUploadSessionResponse` diff --git a/docs/specs/data-model.md b/docs/specs/data-model.md index 2cefd482..42712d68 100644 --- a/docs/specs/data-model.md +++ b/docs/specs/data-model.md @@ -208,20 +208,20 @@ exposing scanner internals. ### `upload_session_files` -| Column | Type | Notes | -| --------------------- | ---------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `workspace_id` | `UUID NOT NULL REFERENCES workspaces(id)` | | -| `upload_session_id` | `TEXT NOT NULL REFERENCES upload_sessions(id)` | | -| `path` | `TEXT NOT NULL` | Normalized POSIX path. | -| `size_bytes` | `BIGINT NOT NULL` | Expected size. | -| `served_content_type` | `TEXT NOT NULL` | Derived before issuing upload URL. | -| `r2_key` | `TEXT NOT NULL` | Final artifact object key. | -| `sha256` | `TEXT NULL` | Lowercase hex digest when supplied by client. | -| `storage_kind` | `TEXT NOT NULL DEFAULT 'revision'` | `revision` or `blob`. | -| `uploaded_at` | `TIMESTAMPTZ NULL` | Set after successful PUT or existing blob reuse. | -| `put_url_expires_at` | `TIMESTAMPTZ NOT NULL` | Session-level upper bound for PUT writes. Set to `upload_sessions.expires_at` at session creation. | -| `patch_base_sha256` | `TEXT NULL` | Intra-file delta ([0087](../adr/0087-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)): digest of the base Revision's file the uploaded unified diff applies to. | -| `patch_result_sha256` | `TEXT NULL` | Digest of the whole reconstructed file `jobs` produces from the diff (Stage 4). Both patch columns are `NULL` (whole-file upload) or both set (a `CHECK` enforces it). | +| Column | Type | Notes | +| --------------------- | ---------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `workspace_id` | `UUID NOT NULL REFERENCES workspaces(id)` | | +| `upload_session_id` | `TEXT NOT NULL REFERENCES upload_sessions(id)` | | +| `path` | `TEXT NOT NULL` | Normalized POSIX path. | +| `size_bytes` | `BIGINT NOT NULL` | Expected size. | +| `served_content_type` | `TEXT NOT NULL` | Derived before issuing upload URL. | +| `r2_key` | `TEXT NOT NULL` | Final artifact object key. | +| `sha256` | `TEXT NULL` | Lowercase hex digest when supplied by client. | +| `storage_kind` | `TEXT NOT NULL DEFAULT 'revision'` | `revision` or `blob`. | +| `uploaded_at` | `TIMESTAMPTZ NULL` | Set after successful PUT or existing blob reuse. | +| `put_url_expires_at` | `TIMESTAMPTZ NOT NULL` | Session-level upper bound for PUT writes. Set to `upload_sessions.expires_at` at session creation. | +| `patch_base_sha256` | `TEXT NULL` | Intra-file delta ([0087](../adr/0087-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)): digest of the base Revision's file the uploaded unified diff applies to. | +| `patch_result_sha256` | `TEXT NULL` | Digest of the whole file the server reconstructs from the diff (applied synchronously at finalize); the committed `artifact_files` row is an ordinary `storage_kind='blob'` row at this sha. Both patch columns are `NULL` (whole-file upload) or both set (a `CHECK` enforces it). | Primary key `(upload_session_id, path)`. diff --git a/package.json b/package.json index 8c455115..ce1ec97c 100644 --- a/package.json +++ b/package.json @@ -51,6 +51,7 @@ "migrate:live": "pnpm migrate:production", "prepare": "node scripts/install-hooks.mjs", "smoke:local": "pnpm build && node scripts/smoke-local-mvp.mjs", + "smoke:local:patch": "pnpm build && node scripts/smoke-local-patch.mjs", "smoke:ci:postgres": "pnpm build && node scripts/smoke-ci-postgres.mjs", "smoke:preview": "pnpm build && node scripts/smoke-hosted.mjs preview", "smoke:preview:ephemeral": "pnpm build && node scripts/smoke-hosted-ephemeral.mjs preview", diff --git a/packages/contracts/openapi/api.json b/packages/contracts/openapi/api.json index ac8e430c..e0d1499d 100644 --- a/packages/contracts/openapi/api.json +++ b/packages/contracts/openapi/api.json @@ -114,6 +114,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -3861,6 +3862,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -3937,6 +3939,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -4013,6 +4016,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -4089,6 +4093,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -4185,6 +4190,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -4261,6 +4267,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -4374,6 +4381,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -4450,6 +4458,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -4526,6 +4535,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -4602,6 +4612,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -4698,6 +4709,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -4774,6 +4786,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -4887,6 +4900,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -4963,6 +4977,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -5039,6 +5054,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -5115,6 +5131,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -5211,6 +5228,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -5287,6 +5305,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -5409,6 +5428,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -5485,6 +5505,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -5561,6 +5582,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -5657,6 +5679,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -5733,6 +5756,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -5856,6 +5880,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -5932,6 +5957,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -6028,6 +6054,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -6104,6 +6131,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -6260,6 +6288,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -6346,6 +6375,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -6422,6 +6452,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -6498,6 +6529,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -6633,6 +6665,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -6709,6 +6742,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -6785,6 +6819,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -6861,6 +6896,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -6957,6 +6993,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -7033,6 +7070,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -7146,6 +7184,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -7222,6 +7261,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -7298,6 +7338,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -7374,6 +7415,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -7470,6 +7512,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -7546,6 +7589,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -7659,6 +7703,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -7735,6 +7780,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -7811,6 +7857,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -7887,6 +7934,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -7983,6 +8031,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -8059,6 +8108,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -8194,6 +8244,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -8270,6 +8321,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -8346,6 +8398,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -8422,6 +8475,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -8518,6 +8572,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -8594,6 +8649,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -8716,6 +8772,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -8792,6 +8849,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -8868,6 +8926,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -8944,6 +9003,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -9040,6 +9100,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -9116,6 +9177,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -9250,6 +9312,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -9326,6 +9389,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -9402,6 +9466,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -9478,6 +9543,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -9574,6 +9640,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -9650,6 +9717,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -9784,6 +9852,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -9860,6 +9929,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -9936,6 +10006,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -10012,6 +10083,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -10108,6 +10180,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -10184,6 +10257,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -10297,6 +10371,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -10373,6 +10448,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -10449,6 +10525,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -10525,6 +10602,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -10621,6 +10699,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -10697,6 +10776,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -10830,6 +10910,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -10906,6 +10987,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -10982,6 +11064,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -11058,6 +11141,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -11154,6 +11238,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -11230,6 +11315,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -11364,6 +11450,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -11440,6 +11527,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -11516,6 +11604,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -11592,6 +11681,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -11688,6 +11778,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -11764,6 +11855,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -11877,6 +11969,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -11953,6 +12046,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -12029,6 +12123,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -12105,6 +12200,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -12201,6 +12297,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -12277,6 +12374,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -12399,6 +12497,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -12475,6 +12574,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -12551,6 +12651,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -12627,6 +12728,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -12723,6 +12825,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -12799,6 +12902,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -12941,6 +13045,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -13017,6 +13122,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -13093,6 +13199,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -13169,6 +13276,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -13265,6 +13373,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -13341,6 +13450,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -13463,6 +13573,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -13539,6 +13650,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -13615,6 +13727,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -13691,6 +13804,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -13787,6 +13901,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -13863,6 +13978,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -13985,6 +14101,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -14061,6 +14178,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -14137,6 +14255,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -14213,6 +14332,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -14309,6 +14429,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -14385,6 +14506,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -14507,6 +14629,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -14583,6 +14706,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -14659,6 +14783,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -14735,6 +14860,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -14831,6 +14957,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -14907,6 +15034,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -15041,6 +15169,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -15117,6 +15246,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -15193,6 +15323,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -15269,6 +15400,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -15365,6 +15497,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -15441,6 +15574,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -15575,6 +15709,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -15651,6 +15786,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -15727,6 +15863,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -15803,6 +15940,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -15899,6 +16037,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -15975,6 +16114,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -16110,6 +16250,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -16186,6 +16327,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -16262,6 +16404,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -16338,6 +16481,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -16434,6 +16578,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -16510,6 +16655,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -16623,6 +16769,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -16699,6 +16846,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -16775,6 +16923,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -16851,6 +17000,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -16947,6 +17097,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -17023,6 +17174,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -17156,6 +17308,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -17232,6 +17385,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -17308,6 +17462,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -17384,6 +17539,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -17480,6 +17636,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -17556,6 +17713,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -17669,6 +17827,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -17745,6 +17904,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -17821,6 +17981,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -17897,6 +18058,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -17993,6 +18155,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -18069,6 +18232,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -18182,6 +18346,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -18258,6 +18423,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -18334,6 +18500,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -18410,6 +18577,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -18506,6 +18674,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -18582,6 +18751,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -18717,6 +18887,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -18793,6 +18964,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -18869,6 +19041,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -18945,6 +19118,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -19041,6 +19215,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -19117,6 +19292,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -19240,6 +19416,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -19316,6 +19493,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -19392,6 +19570,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -19468,6 +19647,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -19564,6 +19744,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -19640,6 +19821,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -19753,6 +19935,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -19829,6 +20012,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -19905,6 +20089,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -19981,6 +20166,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -20077,6 +20263,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -20153,6 +20340,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -20277,6 +20465,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -20353,6 +20542,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -20429,6 +20619,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -20505,6 +20696,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -20627,6 +20819,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -20703,6 +20896,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -20779,6 +20973,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -20855,6 +21050,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -20951,6 +21147,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -21027,6 +21224,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -21158,6 +21356,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -21234,6 +21433,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -21310,6 +21510,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -21386,6 +21587,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -21482,6 +21684,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -21558,6 +21761,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -21680,6 +21884,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -21756,6 +21961,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -21832,6 +22038,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -21908,6 +22115,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -22004,6 +22212,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -22080,6 +22289,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -22233,6 +22443,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -22309,6 +22520,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -22385,6 +22597,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -22461,6 +22674,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -22557,6 +22771,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -22633,6 +22848,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", diff --git a/packages/contracts/openapi/content.json b/packages/contracts/openapi/content.json index 922cb79a..810c9c4b 100644 --- a/packages/contracts/openapi/content.json +++ b/packages/contracts/openapi/content.json @@ -51,6 +51,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", diff --git a/packages/contracts/openapi/upload.json b/packages/contracts/openapi/upload.json index 42fc1d8a..053dcc35 100644 --- a/packages/contracts/openapi/upload.json +++ b/packages/contracts/openapi/upload.json @@ -59,6 +59,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -1047,6 +1048,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -1123,6 +1125,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -1199,6 +1202,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -1275,6 +1279,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -1371,6 +1376,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -1447,6 +1453,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -1581,6 +1588,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -1657,6 +1665,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -1733,6 +1742,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -1809,6 +1819,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -1963,6 +1974,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -2039,6 +2051,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -2115,6 +2128,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -2191,6 +2205,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -2287,6 +2302,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", @@ -2363,6 +2379,7 @@ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", diff --git a/packages/contracts/src/common.ts b/packages/contracts/src/common.ts index 831b0104..ef09efef 100644 --- a/packages/contracts/src/common.ts +++ b/packages/contracts/src/common.ts @@ -19,6 +19,7 @@ export const ErrorCode = z.enum([ "invalid_request", "not_authenticated", "not_found", + "patch_conflict", "pinned_artifact_cap_exceeded", "rate_limited_actor", "rate_limited_artifact", diff --git a/packages/contracts/src/mcp.test.ts b/packages/contracts/src/mcp.test.ts index 48b7b847..4dd1f395 100644 --- a/packages/contracts/src/mcp.test.ts +++ b/packages/contracts/src/mcp.test.ts @@ -3,6 +3,7 @@ import { UpdateDisplayMetadataRequest } from "./accessLinks.js"; import { buildMcpToolList, deriveMcpIdempotencyKey, + MCP_API_ERROR_HTTP_STATUS, McpAddRevisionInput, McpPublishArtifactInput, McpToolName, @@ -431,6 +432,33 @@ describe("MCP error mapping", () => { }); }); + it("maps a patch_conflict to 422, not the 500 fallback", () => { + const mapped = mapApiErrorToMcp({ + code: "patch_conflict", + message: "patch_conflict: app.js: result_hash_mismatch", + requestId: "req_patch", + }); + expect(mapped).toMatchObject({ code: "patch_conflict", httpStatus: 422 }); + // The actionable detail (path + reason) rides the message through to the agent. + expect(mapped.message).toContain("app.js"); + }); + + it("has an HTTP status for every error a forwarded MCP tool can surface", () => { + // A missing entry silently falls back to 500, masking a real client-actionable + // error (the list_artifacts null-revision class of bug). Guard the whole surface. + const reachable = new Set(); + for (const tool of mcpToolContracts) { + for (const call of tool.forwardedCalls) { + for (const code of routeContractById(call.routeId).errors) { + reachable.add(code); + } + } + } + for (const code of reachable) { + expect(MCP_API_ERROR_HTTP_STATUS[code], `missing MCP HTTP status for ${code}`).toBeDefined(); + } + }); + it("maps protocol auth and scope failures", () => { expect(mapMcpProtocolError("invalid_token", "invalid_token")).toMatchObject({ code: "invalid_token", @@ -447,6 +475,20 @@ describe("MCP error mapping", () => { expect(envelope.data?.code).toBe("insufficient_scope"); expect(envelope.message).toBe("Actor lacks share scope"); }); + + it("declares patch_conflict on every tool that forwards a finalize call", () => { + // finalize can surface patch_conflict (ADR 0087); a tool that forwards it must + // declare it, or an agent sees an error its contract never advertised (it slipped + // out of publishChain once). Scoped to patch_conflict + the finalize route rather + // than a full superset assertion, which would relitigate the deliberate exclusion + // of signed-PUT and auth codes from tool error groups. + for (const tool of mcpToolContracts) { + const forwardsFinalize = tool.forwardedCalls.some((call) => call.routeId === "uploadSessions.finalize"); + if (forwardsFinalize) { + expect(tool.errors, `${tool.name} forwards finalize but omits patch_conflict`).toContain("patch_conflict"); + } + } + }); }); describe("MCP tool name enum", () => { diff --git a/packages/contracts/src/mcp/error-codes.ts b/packages/contracts/src/mcp/error-codes.ts index 9e90824c..8c15d5c4 100644 --- a/packages/contracts/src/mcp/error-codes.ts +++ b/packages/contracts/src/mcp/error-codes.ts @@ -25,6 +25,7 @@ export const mcpToolErrorGroups = { "file_size_cap_exceeded", "idempotency_in_flight", "invalid_idempotency_key", + "patch_conflict", "revision_retained", "revision_size_cap_exceeded", "revision_unpublished", @@ -68,6 +69,12 @@ export const MCP_API_ERROR_HTTP_STATUS: Partial { const sha = (char: string) => char.repeat(64); +// A fake reconstructor for finalize-wiring tests: the real apply/crypto path is covered +// by the storage applier + revision-reconstructor factory tests, so here we only need to +// exercise how finalize collects descriptors, registers content_blobs, writes blob rows, +// and propagates a conflict. By default it echoes each request as a successful result +// blob; `conflictFor`/`resultSize` override per path. +function fakeReconstructor(options?: { + conflictFor?: string; + resultSize?: (path: string) => number; +}): RevisionReconstructor & { calls: number } { + const adapter = { + calls: 0, + async reconstruct(input: Parameters[0]) { + adapter.calls += 1; + const files = input.files.map((file) => { + if (options?.conflictFor === file.path) { + throw new RevisionReconstructionConflict(file.path, "result_hash_mismatch"); + } + return { + path: file.path, + sha256: file.resultSha256, + r2Key: `workspaces/${input.workspaceId}/blobs/sha256/${file.resultSha256.slice(0, 2)}/${file.resultSha256}`, + sizeBytes: options?.resultSize ? options.resultSize(file.path) : 100, + }; + }); + return { files }; + }, + }; + return adapter; +} + // Publish a base Revision whose files are blob-backed (sha256 set + uploaded), so // they are eligible to inherit forward under ADR 0087 tree inheritance. async function publishBlobBackedBase( @@ -2813,8 +2846,15 @@ describe("ADR 0087 tree inheritance", () => { ).rejects.toThrow("inherited_path_not_blob_backed"); }); - it("records a patch descriptor without applying it (Stage 3)", async () => { - const { repo, actor } = await localRepoWithApiActor(); + // Drives a patched-file finalize against a one-file-changed base, returning the repo, + // the finalize promise's inputs, and the reconstructor so each test can assert on the + // committed tree or the thrown conflict. + async function patchedFinalize(options?: { + reconstructor?: RevisionReconstructor & { calls: number }; + resultSize?: (path: string) => number; + }) { + const reconstructor = options?.reconstructor ?? fakeReconstructor({ resultSize: options?.resultSize }); + const { repo, actor } = await localRepoWithApiActor({ revisionReconstructor: reconstructor }); const base = await publishBlobBackedBase( repo, actor, @@ -2844,15 +2884,6 @@ describe("ADR 0087 tree inheritance", () => { now: "2026-01-02T00:00:00.000Z", }); const descriptor = session.files.find((file) => file.path === "big.txt"); - // The diff uploads as a revision object with sha256 omitted from the signed path. - expect(descriptor?.sha256).toBeNull(); - expect(descriptor?.storage_kind).toBe("revision"); - const stored = repo.uploadSessionFiles.get(`${session.upload_session_id}:big.txt`); - expect(stored?.patch_base_sha256).toBe(sha("c")); - expect(stored?.patch_result_sha256).toBe(sha("e")); - - // Stage 3 cannot reconstruct the result blob (jobs Stage 4 owns that), so a - // valid patch must still be refused at finalize rather than serving diff bytes. await repo.recordUploadedFile({ workspaceId: actor.workspace_id, sessionId: session.upload_session_id, @@ -2861,15 +2892,68 @@ describe("ADR 0087 tree inheritance", () => { sizeBytes: 40, uploadedAt: "2026-01-02T00:00:01.000Z", }); - await expect( + const finalize = () => repo.finalizeUploadSession({ actor, idempotencyKey: "idem-patch-finalize", sessionId: session.upload_session_id, observedFiles: [{ path: "big.txt", objectKey: descriptor?.object_key ?? "", sizeBytes: 40 }], now: "2026-01-02T00:00:02.000Z", - }), - ).rejects.toThrow("patch_reconstruction_unavailable"); + }); + return { repo, actor, base, session, descriptor, finalize, reconstructor }; + } + + it("records the patch descriptor on the session file (Stage 3 contract preserved)", async () => { + const { session, descriptor, repo } = await patchedFinalize(); + // The diff uploads as a revision object with sha256 omitted from the signed path. + expect(descriptor?.sha256).toBeNull(); + expect(descriptor?.storage_kind).toBe("revision"); + const stored = repo.uploadSessionFiles.get(`${session.upload_session_id}:big.txt`); + expect(stored?.patch_base_sha256).toBe(sha("c")); + expect(stored?.patch_result_sha256).toBe(sha("e")); + }); + + it("reconstructs a patched file into an ordinary blob row at finalize (Stage 4)", async () => { + const { repo, finalize, reconstructor } = await patchedFinalize({ resultSize: () => 64 }); + const blobsBefore = repo.contentBlobs.size; + const finalized = await finalize(); + expect(reconstructor.calls).toBe(1); + + // The committed artifact_files row for the patched path is a content-addressed blob, + // not a diff: storage_kind blob, sha256 = result_sha256, key = the derived blob key. + const files = [...repo.artifactFiles.values()].filter((file) => file.revision_id === finalized.revision_id); + const patched = files.find((file) => file.path === "big.txt"); + expect(patched?.storage_kind).toBe("blob"); + expect(patched?.sha256).toBe(sha("e")); + expect(patched?.r2_key).toContain(sha("e")); + expect(patched?.size_bytes).toBe(64); + + // The new result blob is registered so the GC refcount protects it. + expect(repo.contentBlobs.size).toBe(blobsBefore + 1); + const blob = [...repo.contentBlobs.values()].find((candidate) => candidate.sha256 === sha("e")); + expect(blob?.size_bytes).toBe(64); + + // The committed revision size reflects the reconstructed RESULT size, not the diff. + // (index.html 12 + big.txt result 64 = 76.) + expect(repo.revisions.get(finalized.revision_id)?.size_bytes).toBe(76); + }); + + it("fails finalize with patch_conflict when reconstruction cannot apply (Stage 4)", async () => { + const reconstructor = fakeReconstructor({ conflictFor: "big.txt" }); + const { repo, finalize } = await patchedFinalize({ reconstructor }); + const blobsBefore = repo.contentBlobs.size; + const revisionsBefore = repo.revisions.size; + await expect(finalize()).rejects.toThrow("patch_conflict"); + // Nothing committed: no revision, no content_blobs row. + expect(repo.revisions.size).toBe(revisionsBefore); + expect(repo.contentBlobs.size).toBe(blobsBefore); + }); + + it("enforces the file size cap against the reconstructed result, not the diff", async () => { + // The diff declares 40 bytes (under cap), but the applied result is enormous. + const reconstructor = fakeReconstructor({ resultSize: () => 50_000_000 }); + const { finalize } = await patchedFinalize({ reconstructor }); + await expect(finalize()).rejects.toThrow(/file_size_cap_exceeded|revision_size_cap_exceeded/); }); it("rejects a patch whose base_sha256 does not match the base file", async () => { @@ -2937,8 +3021,8 @@ function firstFile(session: { files: Array<{ object_key: string }> }) { return file; } -async function localRepoWithApiActor() { - const repo = new LocalRepository({ apiKeyPepper: "pepper" }); +async function localRepoWithApiActor(options?: Partial) { + const repo = new LocalRepository({ apiKeyPepper: "pepper", ...options }); const workspace = await repo.createWorkspace({ actor: adminActor, idempotencyKey: "idem-ws", diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index 83b0ef73..0e018eb9 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -61,6 +61,7 @@ export { } from "./postgres/executor.js"; export { reparentBlobMigratorFromEnv } from "./postgres/reparent-blob-migrator.js"; export { PostgresRepository } from "./postgres/repository.js"; +export { revisionReconstructorFromEnv } from "./postgres/revision-reconstructor.js"; export { type RlsScope, rlsExecutor } from "./postgres/rls.js"; export { createPostgresServices } from "./postgres/services.js"; export { @@ -106,6 +107,10 @@ export type { PlatformActor, ReparentBlobMigrator, RepositoryOptions, + RevisionReconstructionConflictReason, + RevisionReconstructionRequest, + RevisionReconstructionResult, + RevisionReconstructor, SafetyWarning, SafetyWarningScope, SafetyWarningSeverity, @@ -123,6 +128,7 @@ export type { WorkspaceMemberActor, WorkspacePlan, } from "./types.js"; +export { RevisionReconstructionConflict } from "./types.js"; export { buildCreateUploadSessionWireResponse, type ObjectStoragePort, diff --git a/packages/db/src/postgres/revision-reconstructor.test.ts b/packages/db/src/postgres/revision-reconstructor.test.ts new file mode 100644 index 00000000..0fabc58b --- /dev/null +++ b/packages/db/src/postgres/revision-reconstructor.test.ts @@ -0,0 +1,141 @@ +import { workspaceBlobObjectKeyFor } from "@agent-paste/storage"; +import { + seedEncryptedRevisionFile, + seedEncryptedWorkspaceBlob, + testArtifactBytesEncryptionEnv, +} from "@agent-paste/storage/test-helpers/encrypted-artifact-fixture"; +import { describe, expect, it } from "vitest"; +import { RevisionReconstructionConflict } from "../types.js"; +import { revisionReconstructorFromEnv } from "./revision-reconstructor.js"; + +const WS = "ws_recon"; +const ART = "art_recon"; +const REV = "rev_recon"; +const enc = new TextEncoder(); + +async function sha256Hex(text: string): Promise { + const digest = new Uint8Array(await crypto.subtle.digest("SHA-256", enc.encode(text))); + return [...digest].map((b) => b.toString(16).padStart(2, "0")).join(""); +} + +function fakeR2() { + const store = new Map }>(); + return { + store, + async get(key: string) { + return store.get(key) ?? null; + }, + async put(key: string, value: Uint8Array, options?: { customMetadata?: Record }) { + store.set(key, { body: value, customMetadata: options?.customMetadata }); + }, + async head(key: string) { + return store.has(key) ? {} : null; + }, + }; +} + +// Seed an encrypted base blob and an encrypted diff object, returning the wired R2 and +// the descriptor the db layer would hand the reconstructor. +async function seedPatch(input: { base: string; diff: string; path: string }) { + const r2 = fakeR2(); + const baseSha = await sha256Hex(input.base); + const seededBase = await seedEncryptedWorkspaceBlob({ workspaceId: WS, sha256: baseSha, plaintext: input.base }); + r2.store.set(seededBase.objectKey, { body: seededBase.body, customMetadata: seededBase.customMetadata }); + const seededDiff = await seedEncryptedRevisionFile({ + workspaceId: WS, + artifactId: ART, + revisionId: REV, + path: input.path, + plaintext: input.diff, + }); + r2.store.set(seededDiff.objectKey, { body: seededDiff.body, customMetadata: seededDiff.customMetadata }); + return { r2, baseSha, diffObjectKey: seededDiff.objectKey }; +} + +describe("revisionReconstructorFromEnv", () => { + it("returns undefined without the encryption ring or R2 binding", () => { + expect(revisionReconstructorFromEnv({})).toBeUndefined(); + expect(revisionReconstructorFromEnv({ ...testArtifactBytesEncryptionEnv })).toBeUndefined(); + }); + + it("applies a clean patch and writes a content-addressed result blob", async () => { + const base = "line1\nline2\nline3\n"; + const expected = "line1\nline2 changed\nline3\n"; + const diff = "@@ -1,3 +1,3 @@\n line1\n-line2\n+line2 changed\n line3\n"; + const resultSha = await sha256Hex(expected); + const { r2, baseSha, diffObjectKey } = await seedPatch({ base, diff, path: "app.txt" }); + + const reconstructor = revisionReconstructorFromEnv({ ...testArtifactBytesEncryptionEnv, ARTIFACTS: r2 }); + const out = await reconstructor?.reconstruct({ + workspaceId: WS, + files: [{ path: "app.txt", diffObjectKey, baseSha256: baseSha, resultSha256: resultSha }], + }); + + expect(out?.files).toHaveLength(1); + const file = out?.files[0]; + expect(file?.sha256).toBe(resultSha); + expect(file?.r2Key).toBe(workspaceBlobObjectKeyFor({ workspaceId: WS, sha256: resultSha })); + expect(file?.sizeBytes).toBe(enc.encode(expected).byteLength); + // The result blob is now present at the content-addressed key. + expect(r2.store.has(file?.r2Key ?? "")).toBe(true); + }); + + it("throws an agent-visible conflict when the result hash does not match", async () => { + const base = "a\nb\n"; + const diff = "@@ -1,2 +1,2 @@\n a\n-b\n+B\n"; + const { r2, baseSha, diffObjectKey } = await seedPatch({ base, diff, path: "f.txt" }); + + const reconstructor = revisionReconstructorFromEnv({ ...testArtifactBytesEncryptionEnv, ARTIFACTS: r2 }); + await expect( + reconstructor?.reconstruct({ + workspaceId: WS, + files: [{ path: "f.txt", diffObjectKey, baseSha256: baseSha, resultSha256: "9".repeat(64) }], + }), + ).rejects.toMatchObject({ name: "RevisionReconstructionConflict", path: "f.txt", reason: "result_hash_mismatch" }); + }); + + it("writes zero result blobs when one file in a batch conflicts (apply-all-before-put)", async () => { + const cleanBase = "x\n"; + const cleanDiff = "@@ -1 +1 @@\n-x\n+X\n"; + const cleanResult = "X\n"; + const cleanResultSha = await sha256Hex(cleanResult); + const { + r2, + baseSha: cleanBaseSha, + diffObjectKey: cleanDiffKey, + } = await seedPatch({ + base: cleanBase, + diff: cleanDiff, + path: "clean.txt", + }); + // Seed a second base + a diff that will not match its declared result. + const badBase = "p\n"; + const badDiff = "@@ -1 +1 @@\n-p\n+P\n"; + const badSha = await sha256Hex(badBase); + const seededBadBase = await seedEncryptedWorkspaceBlob({ workspaceId: WS, sha256: badSha, plaintext: badBase }); + r2.store.set(seededBadBase.objectKey, { body: seededBadBase.body, customMetadata: seededBadBase.customMetadata }); + const seededBadDiff = await seedEncryptedRevisionFile({ + workspaceId: WS, + artifactId: ART, + revisionId: REV, + path: "bad.txt", + plaintext: badDiff, + }); + r2.store.set(seededBadDiff.objectKey, { body: seededBadDiff.body, customMetadata: seededBadDiff.customMetadata }); + + const reconstructor = revisionReconstructorFromEnv({ ...testArtifactBytesEncryptionEnv, ARTIFACTS: r2 }); + const sizeBefore = r2.store.size; + await expect( + reconstructor?.reconstruct({ + workspaceId: WS, + files: [ + { path: "clean.txt", diffObjectKey: cleanDiffKey, baseSha256: cleanBaseSha, resultSha256: cleanResultSha }, + { path: "bad.txt", diffObjectKey: seededBadDiff.objectKey, baseSha256: badSha, resultSha256: "0".repeat(64) }, + ], + }), + ).rejects.toBeInstanceOf(RevisionReconstructionConflict); + // No new blob written: the clean file's result was held in memory until all verified. + expect(r2.store.size).toBe(sizeBefore); + expect(r2.store.has(workspaceBlobObjectKeyFor({ workspaceId: WS, sha256: cleanResultSha }))).toBe(false); + }); +}); diff --git a/packages/db/src/postgres/revision-reconstructor.ts b/packages/db/src/postgres/revision-reconstructor.ts new file mode 100644 index 00000000..82b43b5d --- /dev/null +++ b/packages/db/src/postgres/revision-reconstructor.ts @@ -0,0 +1,78 @@ +import { artifactBytesEncryptionRingFromEnv } from "@agent-paste/rotation"; +import { + applyUnifiedDiff, + readRevisionFileObjectBytes, + readWorkspaceBlobBytes, + type WorkspaceBlobR2, + writeWorkspaceBlob, +} from "@agent-paste/storage"; +import { RevisionReconstructionConflict, type RevisionReconstructor } from "../types.js"; + +// ADR 0087 Stage 4: builds the reconstructor that applies an agent-uploaded unified diff +// to a base blob and stores the whole result as an ordinary content-addressed blob, +// SYNCHRONOUSLY at finalize and BEFORE the new Revision commits. A clean patch yields a +// blob the rest of the system treats like any other; a patch that cannot apply throws an +// agent-visible conflict (failing the same finalize call), so a broken revision never +// reaches draft, let alone published. Infra failures (missing ring/R2, decrypt errors) +// propagate as-is and the db layer maps them to a retryable error, never a conflict. +export function revisionReconstructorFromEnv(env: { + ARTIFACTS?: WorkspaceBlobR2; + ARTIFACT_BYTES_ENCRYPTION_KEY?: string; + ARTIFACT_BYTES_ENCRYPTION_KEY_V2?: string; + ARTIFACT_BYTES_ENCRYPTION_KID?: string; +}): RevisionReconstructor | undefined { + const ring = artifactBytesEncryptionRingFromEnv(env); + const artifacts = env.ARTIFACTS; + if (!ring || !artifacts) { + return undefined; + } + return { + async reconstruct(input) { + // Apply + hash-verify EVERY patched file before any PUT, so a multi-file batch + // with one conflict writes zero blobs (no orphaned partial results). + const applied: Array<{ path: string; sha256: string; resultBytes: Uint8Array }> = []; + for (const file of input.files) { + const baseBytes = await readWorkspaceBlobBytes({ + r2: artifacts, + workspaceId: input.workspaceId, + sha256: file.baseSha256, + ring, + }); + const diffBytes = await readRevisionFileObjectBytes({ + r2: artifacts, + objectKey: file.diffObjectKey, + workspaceId: input.workspaceId, + ring, + }); + const result = await applyUnifiedDiff({ + baseBytes, + diffBytes, + expectedBaseSha256: file.baseSha256, + expectedResultSha256: file.resultSha256, + }); + if (!result.ok) { + throw new RevisionReconstructionConflict(file.path, result.reason); + } + applied.push({ path: file.path, sha256: file.resultSha256, resultBytes: result.result }); + } + + const files: Array<{ path: string; sha256: string; r2Key: string; sizeBytes: number }> = []; + for (const entry of applied) { + const written = await writeWorkspaceBlob({ + r2: artifacts, + workspaceId: input.workspaceId, + sha256: entry.sha256, + plaintext: entry.resultBytes, + ring, + }); + files.push({ + path: entry.path, + sha256: entry.sha256, + r2Key: written.key, + sizeBytes: entry.resultBytes.byteLength, + }); + } + return { files }; + }, + }; +} diff --git a/packages/db/src/postgres/worker-runtime.ts b/packages/db/src/postgres/worker-runtime.ts index cb68b3b2..0a2092c0 100644 --- a/packages/db/src/postgres/worker-runtime.ts +++ b/packages/db/src/postgres/worker-runtime.ts @@ -4,6 +4,7 @@ import type { Repository } from "../repository/interface.js"; import type { ApiKeyActor, HyperdriveBinding, RepositoryOptions } from "../types.js"; import { createHyperdriveExecutor } from "./executor.js"; import { reparentBlobMigratorFromEnv } from "./reparent-blob-migrator.js"; +import { revisionReconstructorFromEnv } from "./revision-reconstructor.js"; import { createPostgresServices } from "./services.js"; export type WorkerPostgresEnv = { @@ -62,6 +63,22 @@ export function createPostgresRuntime( migratorEnv.ARTIFACT_BYTES_ENCRYPTION_KID = env.ARTIFACT_BYTES_ENCRYPTION_KID; } const reparentBlobMigrator = reparentBlobMigratorFromEnv(migratorEnv); + const reconstructorEnv: Parameters[0] = {}; + if (env.ARTIFACTS) { + reconstructorEnv.ARTIFACTS = env.ARTIFACTS as NonNullable< + Parameters[0]["ARTIFACTS"] + >; + } + if (migratorEnv.ARTIFACT_BYTES_ENCRYPTION_KEY) { + reconstructorEnv.ARTIFACT_BYTES_ENCRYPTION_KEY = migratorEnv.ARTIFACT_BYTES_ENCRYPTION_KEY; + } + if (migratorEnv.ARTIFACT_BYTES_ENCRYPTION_KEY_V2) { + reconstructorEnv.ARTIFACT_BYTES_ENCRYPTION_KEY_V2 = migratorEnv.ARTIFACT_BYTES_ENCRYPTION_KEY_V2; + } + if (migratorEnv.ARTIFACT_BYTES_ENCRYPTION_KID) { + reconstructorEnv.ARTIFACT_BYTES_ENCRYPTION_KID = migratorEnv.ARTIFACT_BYTES_ENCRYPTION_KID; + } + const revisionReconstructor = revisionReconstructorFromEnv(reconstructorEnv); const services = createPostgresServices({ executor: createHyperdriveExecutor(env.DB), apiKeyPepper, @@ -69,6 +86,7 @@ export function createPostgresRuntime( apiKeyEnv: env.API_KEY_ENV ?? "preview", billingEnabled: isBillingEnabled(env.BILLING_ENABLED), ...(reparentBlobMigrator ? { reparentBlobMigrator } : {}), + ...(revisionReconstructor ? { revisionReconstructor } : {}), ...serviceUrls, }); return { auth: services.auth, db: options.pickDb(services) }; diff --git a/packages/db/src/repository-error.ts b/packages/db/src/repository-error.ts index 4354bb73..ca70ce5c 100644 --- a/packages/db/src/repository-error.ts +++ b/packages/db/src/repository-error.ts @@ -32,7 +32,7 @@ export const RepositoryErrorCode = { lockdown_insert_conflict: "lockdown_insert_conflict", not_found: "not_found", patch_base_mismatch: "patch_base_mismatch", - patch_reconstruction_unavailable: "patch_reconstruction_unavailable", + patch_conflict: "patch_conflict", pinned_artifact_cap_exceeded: "pinned_artifact_cap_exceeded", postgres_http_error: "postgres_http_error", postgres_http_executor_no_transactions: "postgres_http_executor_no_transactions", @@ -102,8 +102,11 @@ const repositoryErrorToAppErrorMap: Record; + reconstructor: RevisionReconstructor | undefined; +}): Promise> { + const { session, sessionFiles, baseFiles } = input; + const requests: Array<{ path: string; diffObjectKey: string; baseSha256: string; resultSha256: string }> = []; + for (const file of sessionFiles) { + if (!file.patch_base_sha256 || !file.patch_result_sha256) { + continue; + } + const baseFile = baseFiles.get(file.path); + if (!baseFile || baseFile.sha256 !== file.patch_base_sha256) { + repositoryError("patch_base_mismatch"); + } + requests.push({ + path: file.path, + diffObjectKey: file.r2_key, + baseSha256: file.patch_base_sha256, + resultSha256: file.patch_result_sha256, + }); + } + const reconstructed = new Map(); + if (requests.length === 0) { + return reconstructed; + } + if (!input.reconstructor) { + // The capability is wired in every real worker (upload) + the local harness; its + // absence is an infra/config failure, not an agent error. + repositoryError("storage_unavailable"); + } + let result: Awaited>; + try { + result = await input.reconstructor.reconstruct({ workspaceId: session.workspace_id, files: requests }); + } catch (error) { + // A patch that cannot apply is an agent-fixable conflict carrying the path + reason + // in the message; any other failure (R2/decrypt/ring) is infra and stays retryable. + if (error instanceof RevisionReconstructionConflict) { + repositoryError("patch_conflict", { cause: error }); + } + throw error; + } + const bySha = new Map(sessionFiles.filter((f) => f.patch_result_sha256).map((f) => [f.path, f])); + for (const file of result.files) { + const source = bySha.get(file.path); + reconstructed.set(file.path, { + workspace_id: session.workspace_id, + artifact_id: session.artifact_id, + revision_id: session.revision_id, + upload_session_id: session.id, + path: file.path, + size_bytes: file.sizeBytes, + content_type: source?.content_type ?? contentTypeForPath(file.path), + r2_key: file.r2Key, + sha256: file.sha256, + storage_kind: "blob", + uploaded_at: session.finalized_at, + }); + } + return reconstructed; +} + // ADR 0087 tree inheritance: merge the base Revision's published tree with this // session's changed/added/deleted manifest into the full tree the new Revision // commits. Runs at finalize (the base is a published Revision; the merge is the // "commit = parent tree + delta" step) and validates every stateful precondition -// the contract deferred from create. +// the contract deferred from create. A patched file's diff is reconstructed into a +// whole content-addressed blob here, before any DB write, so the committed tree only +// ever contains servable blob/revision rows — never a raw diff. async function mergeBaseRevisionTree( entities: Entities, session: UploadSession, sessionFiles: StoredFile[], + reconstructor: RevisionReconstructor | undefined, ): Promise { const baseRevisionId = session.base_revision_id; if (!baseRevisionId) { @@ -229,19 +313,10 @@ async function mergeBaseRevisionTree( repositoryError("deleted_path_not_in_base"); } } - for (const file of sessionFiles) { - if (file.patch_base_sha256) { - const baseFile = baseFiles.get(file.path); - if (!baseFile || baseFile.sha256 !== file.patch_base_sha256) { - repositoryError("patch_base_mismatch"); - } - // Stage 3 records and validates the patch descriptor but cannot reconstruct - // the whole result blob yet (jobs Stage 4 owns that). Finalizing now would - // commit the diff bytes as the served file, so refuse until reconstruction - // exists. Fail loud rather than serve a half-applied file (ADR 0087). - repositoryError("patch_reconstruction_unavailable"); - } - } + + // Reconstruct patched files into whole blobs before any DB write. A conflict throws + // here, so finalize fails atomically with nothing committed. + const reconstructed = await reconstructPatchedFiles({ session, sessionFiles, baseFiles, reconstructor }); // Inherited rows are copied forward by reference and must be blob-backed: a // revision-scoped base file (sha256 null) lives under that base Revision's prefix @@ -264,6 +339,18 @@ async function mergeBaseRevisionTree( }); } for (const file of sessionFiles) { + // A patched file MUST be replaced by its reconstructed blob row; committing the diff + // placeholder (storage_kind:"revision", the encrypted diff bytes) would serve the + // diff as content. Guard the never-serve-a-diff invariant explicitly rather than + // trusting the reconstructor returned one result per request. + if (file.patch_base_sha256 || file.patch_result_sha256) { + const blob = reconstructed.get(file.path); + if (!blob) { + throw new Error(`reconstruction missing result for patched path: ${file.path}`); + } + merged.set(file.path, blob); + continue; + } merged.set(file.path, file); } @@ -273,6 +360,7 @@ async function mergeBaseRevisionTree( fileCount: files.length, sizeBytes: files.reduce((sum, file) => sum + file.size_bytes, 0), parentRevisionId: baseRevisionId, + reconstructedBlobs: [...reconstructed.values()], }; } @@ -340,6 +428,9 @@ export async function finalizeUploadSessionInEntities( // Resolved lazily and only for a base-Revision merge (validateUpload on the // merged tree), so non-base finalizes never touch the workspace lookup. resolveUsagePolicy: () => Promise; + // Applies intra-file unified-diff patches before commit (ADR 0087 Stage 4). Only + // exercised when the session has patched files; absent on full-manifest finalizes. + revisionReconstructor?: RevisionReconstructor; }, ) { const session = await entities.uploadSessions.findById(input.sessionId, input.actor.workspace_id); @@ -375,8 +466,13 @@ export async function finalizeUploadSessionInEntities( // from the merge (the session row counts only the changed manifest). validateUpload // re-checks caps + entrypoint against the real published tree (an inherited path // may be the entrypoint). Without a base, behavior is unchanged. - const merged = session.base_revision_id ? await mergeBaseRevisionTree(entities, session, files) : null; + const merged = session.base_revision_id + ? await mergeBaseRevisionTree(entities, session, files, input.revisionReconstructor) + : null; if (merged) { + // Caps run on the MERGED tree carrying RECONSTRUCTED result sizes (a patched file's + // session size_bytes is the diff size), so an applied result that blows the cap is + // rejected here. validateUpload(merged.files, await input.resolveUsagePolicy(), session.entrypoint); } const treeFiles = merged?.files ?? files; @@ -416,6 +512,21 @@ export async function finalizeUploadSessionInEntities( }; await entities.revisions.insert(revision); await entities.uploadSessions.markFinalized(session.id, input.now); + // Register each reconstructed result blob so the content-blob refcount protects it + // (its sha256 is brand new — unlike an inherited blob whose row already exists). + for (const blob of merged?.reconstructedBlobs ?? []) { + if (!blob.sha256) { + continue; + } + await entities.contentBlobs.upsert({ + workspace_id: session.workspace_id, + sha256: blob.sha256, + size_bytes: blob.size_bytes, + r2_key: blob.r2_key, + created_at: input.now, + updated_at: input.now, + }); + } for (const file of treeFiles) { await entities.artifactFiles.insert(session.artifact_id, session.revision_id, file, input.now); } diff --git a/packages/db/src/repository/workflows/upload-publish-workflow.ts b/packages/db/src/repository/workflows/upload-publish-workflow.ts index e878455a..5f0a62bf 100644 --- a/packages/db/src/repository/workflows/upload-publish-workflow.ts +++ b/packages/db/src/repository/workflows/upload-publish-workflow.ts @@ -121,6 +121,7 @@ export async function finalizeUploadSession( // workspace still collapses to upload_session_not_found (non-enumerable) // via the workspace-scoped session lookup rather than workspace_not_found. resolveUsagePolicy: async () => ctx.usagePolicyFor(await ctx.mustWorkspace(entities, input.actor.workspace_id)), + ...(ctx.options.revisionReconstructor ? { revisionReconstructor: ctx.options.revisionReconstructor } : {}), }), ); } diff --git a/packages/db/src/types.ts b/packages/db/src/types.ts index 12fb01f0..e95d5781 100644 --- a/packages/db/src/types.ts +++ b/packages/db/src/types.ts @@ -291,6 +291,50 @@ export type ReparentBlobMigrator = { migrate(input: { fromWorkspaceId: string; toWorkspaceId: string; blobs: readonly WorkspaceBlobRef[] }): Promise; }; +// ADR 0087 Stage 4: a patched file in a partial-manifest publish uploads only a unified +// diff. Before the new Revision can commit, the diff is applied to the base blob and the +// whole result stored as an ordinary content-addressed blob. The reconstructor takes +// VALIDATED descriptors (base/result sha already checked against the base Revision's own +// artifact_files, workspace-scoped by the db layer) — never a raw object key — so a +// compromised caller cannot read an arbitrary blob. A patch that cannot apply cleanly +// throws RevisionReconstructionConflict (agent-visible); an infra failure throws anything +// else (mapped to a retryable error, never a conflict). +export type RevisionReconstructionRequest = { + workspaceId: string; + files: ReadonlyArray<{ + path: string; + diffObjectKey: string; + baseSha256: string; + resultSha256: string; + }>; +}; + +export type RevisionReconstructionResult = { + // Per patched path, the applied result's content-addressed blob. The caller turns this + // into an ordinary storage_kind='blob' artifact_files row + content_blobs row. + files: ReadonlyArray<{ path: string; sha256: string; r2Key: string; sizeBytes: number }>; +}; + +export type RevisionReconstructionConflictReason = + | "parse_error" + | "base_hash_mismatch" + | "apply_failed" + | "result_hash_mismatch"; + +export class RevisionReconstructionConflict extends Error { + readonly name = "RevisionReconstructionConflict"; + constructor( + readonly path: string, + readonly reason: RevisionReconstructionConflictReason, + ) { + super(`patch_conflict: ${path}: ${reason}`); + } +} + +export type RevisionReconstructor = { + reconstruct(input: RevisionReconstructionRequest): Promise; +}; + export type RepositoryOptions = { apiKeyPepper: string; /** When set, verification and minting use multi-pepper overlap from ADR 0045. */ @@ -303,4 +347,6 @@ export type RepositoryOptions = { billingEnabled?: boolean; /** Copies workspace blob bytes into the destination tenant before claim reparent commits. */ reparentBlobMigrator?: ReparentBlobMigrator; + /** Applies intra-file unified-diff patches to base blobs before a partial-manifest finalize commits (ADR 0087). */ + revisionReconstructor?: RevisionReconstructor; }; diff --git a/packages/storage/src/index.ts b/packages/storage/src/index.ts index 27934d97..b8bcea15 100644 --- a/packages/storage/src/index.ts +++ b/packages/storage/src/index.ts @@ -36,6 +36,17 @@ export { migrateWorkspaceBlobsForReparent, type WorkspaceBlobRef, } from "./reparent-workspace-blobs.js"; +export { type ApplyConflictReason, type ApplyUnifiedDiffResult, applyUnifiedDiff } from "./unified-diff.js"; +export { + type ArtifactBytesSigningRing, + type R2GetObjectBody, + readRevisionFileObjectBytes, + readWorkspaceBlobBytes, + WorkspaceBlobMetadataError, + WorkspaceBlobMissingError, + type WorkspaceBlobR2, + writeWorkspaceBlob, +} from "./workspace-blob-bytes.js"; export const MIME_TYPES_BY_EXTENSION = { ".css": "text/css; charset=utf-8", diff --git a/packages/storage/src/unified-diff.test.ts b/packages/storage/src/unified-diff.test.ts new file mode 100644 index 00000000..4b535168 --- /dev/null +++ b/packages/storage/src/unified-diff.test.ts @@ -0,0 +1,255 @@ +import { describe, expect, it } from "vitest"; +import { applyUnifiedDiff } from "./unified-diff.js"; + +const enc = new TextEncoder(); + +async function sha256Hex(bytes: Uint8Array): Promise { + const buf = + bytes.byteOffset === 0 && bytes.byteLength === bytes.buffer.byteLength ? bytes.buffer : bytes.slice().buffer; + const digest = new Uint8Array(await crypto.subtle.digest("SHA-256", buf)); + return [...digest].map((b) => b.toString(16).padStart(2, "0")).join(""); +} + +// Apply a real unified-diff body against a base string and assert the result is +// byte-identical to the expected result, with hashes verified by the applier. +async function applyText(base: string, diff: string, expectedResult: string) { + const baseBytes = enc.encode(base); + const resultBytes = enc.encode(expectedResult); + return applyUnifiedDiff({ + baseBytes, + diffBytes: enc.encode(diff), + expectedBaseSha256: await sha256Hex(baseBytes), + expectedResultSha256: await sha256Hex(resultBytes), + }); +} + +describe("applyUnifiedDiff", () => { + it("applies a single-hunk modification + append byte-exactly", async () => { + const base = "line1\nline2\nline3\n"; + const diff = "@@ -1,3 +1,4 @@\n line1\n-line2\n+line2 modified\n line3\n+line4 added\n"; + const expected = "line1\nline2 modified\nline3\nline4 added\n"; + const out = await applyText(base, diff, expected); + expect(out.ok).toBe(true); + if (out.ok) { + expect(new TextDecoder().decode(out.result)).toBe(expected); + } + }); + + it("applies multiple hunks", async () => { + const base = "a\nb\nc\nd\ne\nf\ng\nh\ni\nj\n"; + // Change b->B in hunk 1 and i->I in hunk 2. + const diff = "@@ -1,3 +1,3 @@\n a\n-b\n+B\n c\n@@ -8,3 +8,3 @@\n h\n-i\n+I\n j\n"; + const expected = "a\nB\nc\nd\ne\nf\ng\nh\nI\nj\n"; + const out = await applyText(base, diff, expected); + expect(out.ok).toBe(true); + if (out.ok) { + expect(new TextDecoder().decode(out.result)).toBe(expected); + } + }); + + it("handles a header preamble (---/+++) before the first hunk", async () => { + const base = "x\ny\n"; + const diff = "--- a/file\n+++ b/file\n@@ -1,2 +1,2 @@\n x\n-y\n+Y\n"; + const expected = "x\nY\n"; + const out = await applyText(base, diff, expected); + expect(out.ok).toBe(true); + }); + + it("preserves CRLF line endings byte-exactly", async () => { + const base = "one\r\ntwo\r\nthree\r\n"; + const diff = "@@ -1,3 +1,3 @@\n one\r\n-two\r\n+TWO\r\n three\r\n"; + const expected = "one\r\nTWO\r\nthree\r\n"; + const out = await applyText(base, diff, expected); + expect(out.ok).toBe(true); + if (out.ok) { + expect([...out.result]).toEqual([...enc.encode(expected)]); + } + }); + + it("preserves a leading BOM and non-ASCII bytes in unchanged regions", async () => { + const base = "héllo\nwörld\n"; + const diff = "@@ -1,2 +1,2 @@\n héllo\n-wörld\n+wörld!\n"; + const expected = "héllo\nwörld!\n"; + const out = await applyText(base, diff, expected); + expect(out.ok).toBe(true); + if (out.ok) { + expect([...out.result]).toEqual([...enc.encode(expected)]); + } + }); + + it("honors no-newline-at-eof on the added last line", async () => { + const base = "a\nb\n"; + const diff = "@@ -1,2 +1,3 @@\n a\n b\n+c\n\\ No newline at end of file\n"; + const expected = "a\nb\nc"; + const out = await applyText(base, diff, expected); + expect(out.ok).toBe(true); + if (out.ok) { + expect(new TextDecoder().decode(out.result)).toBe(expected); + expect(out.result.at(-1)).not.toBe(0x0a); + } + }); + + it("handles base-no-newline becoming result-with-newline", async () => { + const base = "x\ny"; + const diff = "@@ -1,2 +1,2 @@\n x\n-y\n\\ No newline at end of file\n+y\n"; + const expected = "x\ny\n"; + const out = await applyText(base, diff, expected); + expect(out.ok).toBe(true); + if (out.ok) { + expect(new TextDecoder().decode(out.result)).toBe(expected); + } + }); + + it("modifies a last line that had no trailing newline", async () => { + const base = "alpha\nbeta\ngamma"; + const diff = "@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n\\ No newline at end of file\n"; + const expected = "alpha\nBETA\ngamma"; + const out = await applyText(base, diff, expected); + expect(out.ok).toBe(true); + if (out.ok) { + expect(new TextDecoder().decode(out.result)).toBe(expected); + } + }); + + it("applies an insert into an empty base", async () => { + const base = ""; + const diff = "@@ -0,0 +1,2 @@\n+hello\n+world\n"; + const expected = "hello\nworld\n"; + const out = await applyText(base, diff, expected); + expect(out.ok).toBe(true); + if (out.ok) { + expect(new TextDecoder().decode(out.result)).toBe(expected); + } + }); + + it("deletes lines", async () => { + const base = "keep1\ndrop\nkeep2\n"; + const diff = "@@ -1,3 +1,2 @@\n keep1\n-drop\n keep2\n"; + const expected = "keep1\nkeep2\n"; + const out = await applyText(base, diff, expected); + expect(out.ok).toBe(true); + }); + + it("applies a real multi-hunk git diff with section headings after @@", async () => { + const base = [ + "function greet(name) {", + ' console.log("Hello, " + name);', + "}", + "", + "function farewell(name) {", + ' console.log("Bye, " + name);', + "}", + "", + 'greet("world");', + "", + ].join("\n"); + const expected = [ + "function greet(name) {", + " console.log(`Hello, ${name}!`);", + "}", + "", + "function farewell(name) {", + ' console.log("Bye, " + name);', + "}", + "", + 'greet("world");', + 'farewell("world");', + "", + ].join("\n"); + // Verbatim `git diff --no-index -U3` body, including the " function farewell(name) {" + // section heading appended after the second hunk's closing @@. + const diff = + "@@ -1,5 +1,5 @@\n" + + " function greet(name) {\n" + + '- console.log("Hello, " + name);\n' + + "+ console.log(`Hello, ${name}!`);\n" + + " }\n" + + " \n" + + " function farewell(name) {\n" + + "@@ -7,3 +7,4 @@ function farewell(name) {\n" + + " }\n" + + " \n" + + ' greet("world");\n' + + '+farewell("world");\n'; + const out = await applyText(base, diff, expected); + expect(out.ok).toBe(true); + if (out.ok) { + expect(new TextDecoder().decode(out.result)).toBe(expected); + } + }); + + describe("conflicts", () => { + it("reports base_hash_mismatch when the base digest is wrong", async () => { + const out = await applyUnifiedDiff({ + baseBytes: enc.encode("actual base\n"), + diffBytes: enc.encode("@@ -1 +1 @@\n-x\n+y\n"), + expectedBaseSha256: "0".repeat(64), + expectedResultSha256: "0".repeat(64), + }); + expect(out).toEqual({ ok: false, reason: "base_hash_mismatch" }); + }); + + it("reports parse_error on malformed diff text", async () => { + const base = "a\n"; + const baseBytes = enc.encode(base); + const out = await applyUnifiedDiff({ + baseBytes, + diffBytes: enc.encode("this is not a diff at all\n"), + expectedBaseSha256: await sha256Hex(baseBytes), + expectedResultSha256: "0".repeat(64), + }); + expect(out).toEqual({ ok: false, reason: "parse_error" }); + }); + + it("reports parse_error on non-UTF-8 diff bytes", async () => { + const base = "a\n"; + const baseBytes = enc.encode(base); + const out = await applyUnifiedDiff({ + baseBytes, + diffBytes: new Uint8Array([0xff, 0xfe, 0x00]), + expectedBaseSha256: await sha256Hex(baseBytes), + expectedResultSha256: "0".repeat(64), + }); + expect(out).toEqual({ ok: false, reason: "parse_error" }); + }); + + it("reports apply_failed when context does not match the base", async () => { + const base = "a\nb\nc\n"; + const baseBytes = enc.encode(base); + // Context claims "X" where the base has "a". + const out = await applyUnifiedDiff({ + baseBytes, + diffBytes: enc.encode("@@ -1,3 +1,3 @@\n X\n-b\n+B\n c\n"), + expectedBaseSha256: await sha256Hex(baseBytes), + expectedResultSha256: "0".repeat(64), + }); + expect(out).toEqual({ ok: false, reason: "apply_failed" }); + }); + + it("reports apply_failed on out-of-order / overlapping hunks", async () => { + const base = "a\nb\nc\nd\n"; + const baseBytes = enc.encode(base); + // Second hunk targets line 1, before the first hunk's line 3. + const out = await applyUnifiedDiff({ + baseBytes, + diffBytes: enc.encode("@@ -3,1 +3,1 @@\n-c\n+C\n@@ -1,1 +1,1 @@\n-a\n+A\n"), + expectedBaseSha256: await sha256Hex(baseBytes), + expectedResultSha256: "0".repeat(64), + }); + expect(out).toEqual({ ok: false, reason: "apply_failed" }); + }); + + it("reports result_hash_mismatch when the applied bytes do not match the declared result", async () => { + const base = "a\nb\n"; + const baseBytes = enc.encode(base); + const out = await applyUnifiedDiff({ + baseBytes, + diffBytes: enc.encode("@@ -1,2 +1,2 @@\n a\n-b\n+B\n"), + expectedBaseSha256: await sha256Hex(baseBytes), + // Correct applied result is "a\nB\n"; declare a different digest. + expectedResultSha256: "1".repeat(64), + }); + expect(out).toEqual({ ok: false, reason: "result_hash_mismatch" }); + }); + }); +}); diff --git a/packages/storage/src/unified-diff.ts b/packages/storage/src/unified-diff.ts new file mode 100644 index 00000000..eea29519 --- /dev/null +++ b/packages/storage/src/unified-diff.ts @@ -0,0 +1,317 @@ +// ADR 0087 Stage 4 intra-file delta: apply an agent-uploaded unified diff to a base +// blob and commit the whole reconstructed result. A patch that cannot be applied +// cleanly is a first-class, agent-visible CONFLICT (the agent re-submits a corrected +// diff), never a silent failure. Reconstruction is byte-exact: the result digest must +// equal the client-declared result_sha256, so this applier NEVER normalizes line +// endings, BOM, or trailing newlines. It reconstructs by copying raw base byte ranges +// for unchanged/context/deleted lines and emitting the diff's own raw bytes for added +// lines, so even non-UTF-8 content round-trips bit-for-bit. + +const HUNK_HEADER = /^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@/u; +const NO_NEWLINE_MARKER = "\\ No newline at end of file"; + +export type ApplyConflictReason = "parse_error" | "base_hash_mismatch" | "apply_failed" | "result_hash_mismatch"; + +export type ApplyUnifiedDiffResult = { ok: true; result: Uint8Array } | { ok: false; reason: ApplyConflictReason }; + +type DiffLine = + | { kind: "context"; text: string } + | { kind: "delete"; text: string } + | { kind: "add"; text: string } + | { kind: "no_newline" }; + +type Hunk = { + oldStart: number; + oldLines: number; + newStart: number; + newLines: number; + lines: DiffLine[]; +}; + +// A base line is the raw byte range it occupies INCLUDING its terminator; the final +// line may have no terminator. Splitting on raw bytes (not decoded text) keeps the +// copy byte-exact regardless of encoding. +type BaseLine = { start: number; end: number; hasTerminator: boolean }; + +function toHex(bytes: Uint8Array): string { + let hex = ""; + for (const byte of bytes) { + hex += byte.toString(16).padStart(2, "0"); + } + return hex; +} + +function asBufferSource(bytes: Uint8Array): BufferSource { + if (bytes.buffer instanceof ArrayBuffer) { + return new Uint8Array(bytes.buffer, bytes.byteOffset, bytes.byteLength); + } + return Uint8Array.from(bytes); +} + +async function sha256Hex(bytes: Uint8Array): Promise { + return toHex(new Uint8Array(await crypto.subtle.digest("SHA-256", asBufferSource(bytes)))); +} + +// Decode UTF-8, returning null on any invalid sequence. `TextDecoder({ fatal: true })` +// is the obvious tool but its option type is not in every Worker TS lib config, so we +// decode lossily then verify the decode round-trips to the same bytes — a replacement +// character inserted for an invalid sequence re-encodes to different bytes. +function decodeUtf8Strict(bytes: Uint8Array): string | null { + const text = new TextDecoder().decode(asBufferSource(bytes)); + if (!bytesEqual(new TextEncoder().encode(text), bytes)) { + return null; + } + return text; +} + +// Split the base into lines by raw LF byte (0x0a). A trailing "\r" stays part of the +// line's content range, so CRLF files round-trip; only the LF is the boundary. +function splitBaseLines(base: Uint8Array): BaseLine[] { + const lines: BaseLine[] = []; + let start = 0; + for (let i = 0; i < base.length; i++) { + if (base[i] === 0x0a) { + lines.push({ start, end: i + 1, hasTerminator: true }); + start = i + 1; + } + } + if (start < base.length) { + lines.push({ start, end: base.length, hasTerminator: false }); + } + return lines; +} + +const FILE_HEADER = /^(---|\+\+\+|diff |index |old mode|new mode|similarity|rename|copy|new file|deleted file)/u; + +// Parse one diff body line. The trailing empty string from a final "\n" split is the +// caller's concern; here a bare "" is an empty context line. Returns null on a bad marker. +function parseDiffLine(raw: string): DiffLine | null { + if (raw === NO_NEWLINE_MARKER) { + return { kind: "no_newline" }; + } + if (raw === "") { + // Some tools strip the single leading space of an empty context line. + return { kind: "context", text: "" }; + } + const text = raw.slice(1); + switch (raw[0]) { + case " ": + return { kind: "context", text }; + case "-": + return { kind: "delete", text }; + case "+": + return { kind: "add", text }; + default: + return null; + } +} + +// Advance past any leading file headers (---/+++/diff/index). Returns the index of the +// first hunk header, or null if a non-header line appears before any @@ (malformed). +function skipFileHeaders(rawLines: string[]): number | null { + let i = 0; + while (i < rawLines.length && !HUNK_HEADER.test(rawLines[i] ?? "")) { + const line = rawLines[i] ?? ""; + if (line !== "" && !FILE_HEADER.test(line)) { + return null; + } + i++; + } + return i < rawLines.length ? i : null; +} + +function parseHunkHeader(line: string): Omit | null { + const header = HUNK_HEADER.exec(line); + if (!header) { + return null; + } + return { + oldStart: Number(header[1]), + oldLines: header[2] === undefined ? 1 : Number(header[2]), + newStart: Number(header[3]), + newLines: header[4] === undefined ? 1 : Number(header[4]), + }; +} + +function parseHunks(diffText: string): Hunk[] | null { + const rawLines = diffText.split("\n"); + const hunks: Hunk[] = []; + let i = skipFileHeaders(rawLines); + if (i === null) { + return null; + } + while (i < rawLines.length) { + // A trailing empty string from a final "\n" split is benign; anything else is junk. + if ((rawLines[i] ?? "") === "" && i === rawLines.length - 1) { + break; + } + const head = parseHunkHeader(rawLines[i] ?? ""); + if (!head) { + return null; + } + i++; + const lines: DiffLine[] = []; + while (i < rawLines.length && !HUNK_HEADER.test(rawLines[i] ?? "")) { + if ((rawLines[i] ?? "") === "" && i === rawLines.length - 1) { + i++; // Trailing newline after the last hunk body line. + break; + } + const diffLine = parseDiffLine(rawLines[i] ?? ""); + if (!diffLine) { + return null; + } + lines.push(diffLine); + i++; + } + hunks.push({ ...head, lines }); + } + return hunks.length > 0 ? hunks : null; +} + +const encoder = new TextEncoder(); + +// Match a diff context/delete line against the base line at `cursor`, comparing raw +// bytes (the line content excludes its LF terminator but, for CRLF, includes the CR). +function baseLineMatches(base: Uint8Array, baseLine: BaseLine, text: string): boolean { + const contentEnd = baseLine.hasTerminator ? baseLine.end - 1 : baseLine.end; + return bytesEqual(base.subarray(baseLine.start, contentEnd), encoder.encode(text)); +} + +type HunkApplication = { out: Uint8Array[]; cursor: number }; + +// Apply one hunk starting at `cursor`, returning the emitted byte ranges and the new +// cursor, or null on any mismatch / out-of-range. Hunks must arrive in forward order. +function applyHunk(base: Uint8Array, baseLines: BaseLine[], hunk: Hunk, cursor: number): HunkApplication | null { + // Hunk line numbers are 1-based; an empty old side (oldLines 0) anchors AFTER oldStart, + // so the first changed line is oldStart (insert) — normalize to 0-based. + const hunkStart = hunk.oldLines === 0 ? hunk.oldStart : hunk.oldStart - 1; + if (hunkStart < cursor || hunkStart > baseLines.length) { + return null; + } + const out: Uint8Array[] = []; + // Copy untouched base lines between the cursor and this hunk, byte-for-byte. + for (let l = cursor; l < hunkStart; l++) { + const line = baseLines[l]; + if (!line) { + return null; + } + out.push(base.subarray(line.start, line.end)); + } + let at = hunkStart; + for (let idx = 0; idx < hunk.lines.length; idx++) { + const diffLine = hunk.lines[idx]; + if (!diffLine || diffLine.kind === "no_newline") { + continue; + } + if (diffLine.kind === "context" || diffLine.kind === "delete") { + const baseLine = baseLines[at]; + if (!baseLine || !baseLineMatches(base, baseLine, diffLine.text)) { + return null; + } + if (diffLine.kind === "context") { + out.push(base.subarray(baseLine.start, baseLine.end)); + } + at++; + } else { + // An added line carries a terminator unless a following "\ No newline" marker says + // otherwise (the last line of a no-trailing-newline result). + const followedByNoNewline = hunk.lines[idx + 1]?.kind === "no_newline"; + out.push(encoder.encode(followedByNoNewline ? diffLine.text : `${diffLine.text}\n`)); + } + } + return { out, cursor: at }; +} + +// Apply parsed hunks against the raw base, splicing raw byte ranges so the output is +// byte-exact. Returns null on any apply failure (context mismatch, out-of-range, +// overlapping/out-of-order hunks). +function applyHunks(base: Uint8Array, baseLines: BaseLine[], hunks: Hunk[]): Uint8Array | null { + const out: Uint8Array[] = []; + let cursor = 0; // next base LINE index (0-based) not yet emitted. + for (const hunk of hunks) { + const applied = applyHunk(base, baseLines, hunk, cursor); + if (!applied) { + return null; + } + out.push(...applied.out); + cursor = applied.cursor; + } + // Copy any base lines after the last hunk. + for (let l = cursor; l < baseLines.length; l++) { + const line = baseLines[l]; + if (!line) { + return null; + } + out.push(base.subarray(line.start, line.end)); + } + return concatBytes(out); +} + +function bytesEqual(a: Uint8Array, b: Uint8Array): boolean { + if (a.byteLength !== b.byteLength) { + return false; + } + for (let i = 0; i < a.byteLength; i++) { + if (a[i] !== b[i]) { + return false; + } + } + return true; +} + +function concatBytes(chunks: Uint8Array[]): Uint8Array { + let total = 0; + for (const chunk of chunks) { + total += chunk.byteLength; + } + const out = new Uint8Array(total); + let offset = 0; + for (const chunk of chunks) { + out.set(chunk, offset); + offset += chunk.byteLength; + } + return out; +} + +/** + * Apply a unified-diff patch to a base blob, byte-exactly. The base must digest to + * {@link input.expectedBaseSha256} and the applied result must digest to + * {@link input.expectedResultSha256}; either mismatch is a conflict, never a stored + * blob. The four conflict reasons are deliberately coarse: the only action an agent + * can take on any of them is to regenerate the diff for this file, so hunk/line + * forensics would be detail the agent cannot use. + */ +export async function applyUnifiedDiff(input: { + baseBytes: Uint8Array; + diffBytes: Uint8Array; + expectedBaseSha256: string; + expectedResultSha256: string; +}): Promise { + // Defense-in-depth re-check of the DB-side patch_base_mismatch gate: guards against + // a base blob mutated under us between the gate and the fetch. + if ((await sha256Hex(input.baseBytes)) !== input.expectedBaseSha256) { + return { ok: false, reason: "base_hash_mismatch" }; + } + + const diffText = decodeUtf8Strict(input.diffBytes); + if (diffText === null) { + return { ok: false, reason: "parse_error" }; + } + + const hunks = parseHunks(diffText); + if (!hunks) { + return { ok: false, reason: "parse_error" }; + } + + const baseLines = splitBaseLines(input.baseBytes); + const result = applyHunks(input.baseBytes, baseLines, hunks); + if (!result) { + return { ok: false, reason: "apply_failed" }; + } + + if ((await sha256Hex(result)) !== input.expectedResultSha256) { + return { ok: false, reason: "result_hash_mismatch" }; + } + + return { ok: true, result }; +} diff --git a/packages/storage/src/workspace-blob-bytes.test.ts b/packages/storage/src/workspace-blob-bytes.test.ts new file mode 100644 index 00000000..cce0d4bc --- /dev/null +++ b/packages/storage/src/workspace-blob-bytes.test.ts @@ -0,0 +1,101 @@ +import { describe, expect, it } from "vitest"; +import { workspaceBlobObjectKeyFor } from "./artifact-bytes-encryption.js"; +import { seedEncryptedWorkspaceBlob, testArtifactBytesKeyRing } from "./test-helpers/encrypted-artifact-fixture.js"; +import { + type R2GetObjectBody, + readWorkspaceBlobBytes, + WorkspaceBlobMetadataError, + WorkspaceBlobMissingError, + writeWorkspaceBlob, +} from "./workspace-blob-bytes.js"; + +const WORKSPACE = "ws_blob_bytes"; +const SHA = "a".repeat(64); + +function fakeR2() { + const store = new Map }>(); + const puts: string[] = []; + return { + store, + puts, + async get(key: string): Promise { + return store.get(key) ?? null; + }, + async put(key: string, value: Uint8Array, options?: { customMetadata?: Record }) { + puts.push(key); + store.set(key, { body: value, customMetadata: options?.customMetadata }); + }, + async head(key: string) { + return store.has(key) ? {} : null; + }, + }; +} + +describe("readWorkspaceBlobBytes", () => { + it("round-trips a blob seeded through the real encrypt path", async () => { + const ring = testArtifactBytesKeyRing(); + const seeded = await seedEncryptedWorkspaceBlob({ workspaceId: WORKSPACE, sha256: SHA, plaintext: "hello blob" }); + const r2 = fakeR2(); + r2.store.set(seeded.objectKey, { body: seeded.body, customMetadata: seeded.customMetadata }); + + const bytes = await readWorkspaceBlobBytes({ r2, workspaceId: WORKSPACE, sha256: SHA, ring }); + expect(new TextDecoder().decode(bytes)).toBe("hello blob"); + }); + + it("derives the key from (workspaceId, sha256), never accepting a raw key", async () => { + const ring = testArtifactBytesKeyRing(); + const seeded = await seedEncryptedWorkspaceBlob({ workspaceId: WORKSPACE, sha256: SHA, plaintext: "x" }); + const r2 = fakeR2(); + r2.store.set(seeded.objectKey, { body: seeded.body, customMetadata: seeded.customMetadata }); + + await readWorkspaceBlobBytes({ r2, workspaceId: WORKSPACE, sha256: SHA, ring }); + // The only key read is the canonical derived key. + expect(seeded.objectKey).toBe(workspaceBlobObjectKeyFor({ workspaceId: WORKSPACE, sha256: SHA })); + }); + + it("throws WorkspaceBlobMissingError when the object is absent", async () => { + const ring = testArtifactBytesKeyRing(); + await expect( + readWorkspaceBlobBytes({ r2: fakeR2(), workspaceId: WORKSPACE, sha256: SHA, ring }), + ).rejects.toBeInstanceOf(WorkspaceBlobMissingError); + }); + + it("throws WorkspaceBlobMetadataError when encryption metadata is missing", async () => { + const ring = testArtifactBytesKeyRing(); + const r2 = fakeR2(); + r2.store.set(workspaceBlobObjectKeyFor({ workspaceId: WORKSPACE, sha256: SHA }), { + body: new Uint8Array([1, 2, 3]), + }); + await expect(readWorkspaceBlobBytes({ r2, workspaceId: WORKSPACE, sha256: SHA, ring })).rejects.toBeInstanceOf( + WorkspaceBlobMetadataError, + ); + }); +}); + +describe("writeWorkspaceBlob", () => { + it("encrypts under blob AAD and PUTs at the content-addressed key, then reads back", async () => { + const ring = testArtifactBytesKeyRing(); + const r2 = fakeR2(); + const plaintext = new TextEncoder().encode("reconstructed result"); + + const result = await writeWorkspaceBlob({ r2, workspaceId: WORKSPACE, sha256: SHA, plaintext, ring }); + expect(result.written).toBe(true); + expect(result.key).toBe(workspaceBlobObjectKeyFor({ workspaceId: WORKSPACE, sha256: SHA })); + + const back = await readWorkspaceBlobBytes({ r2, workspaceId: WORKSPACE, sha256: SHA, ring }); + expect(new TextDecoder().decode(back)).toBe("reconstructed result"); + }); + + it("is idempotent: skips the PUT when the blob already exists", async () => { + const ring = testArtifactBytesKeyRing(); + const r2 = fakeR2(); + const plaintext = new TextEncoder().encode("once"); + + const first = await writeWorkspaceBlob({ r2, workspaceId: WORKSPACE, sha256: SHA, plaintext, ring }); + const second = await writeWorkspaceBlob({ r2, workspaceId: WORKSPACE, sha256: SHA, plaintext, ring }); + + expect(first.written).toBe(true); + expect(second.written).toBe(false); + expect(r2.puts).toHaveLength(1); + }); +}); diff --git a/packages/storage/src/workspace-blob-bytes.ts b/packages/storage/src/workspace-blob-bytes.ts new file mode 100644 index 00000000..f70bec73 --- /dev/null +++ b/packages/storage/src/workspace-blob-bytes.ts @@ -0,0 +1,149 @@ +// Read and write workspace content-addressed blobs through the ADR 0063 encryption +// ring. ADR 0087 Stage 4 reconstruction (decrypt a base blob, apply a patch, store +// the result blob) and Bundle generation both perform exactly this encrypt-and-store +// / decrypt-by-sha dance; these helpers are the shared ends so neither re-derives the +// blob AAD context or the object key by hand. + +import { + type ArtifactBytesKeyRing, + bytesFromReadableBody, + decryptArtifactBytesWithKeyRing, + encryptArtifactBytes, + isArtifactBytesEncryptionMetadata, + parseRevisionFileObjectKey, + workspaceBlobObjectKeyFor, +} from "./artifact-bytes-encryption.js"; + +export type R2GetObjectBody = { + body: ReadableStream | ArrayBuffer | Uint8Array | string | null | undefined; + customMetadata?: Record; +}; + +export type WorkspaceBlobR2 = { + get(key: string): Promise; + put( + key: string, + value: Uint8Array, + options?: { httpMetadata?: Record; customMetadata?: Record }, + ): Promise; + head(key: string): Promise; +}; + +// The encrypting side needs the active signing key (KeyRing); the decrypting side only +// needs kid lookup (ArtifactBytesKeyRing). KeyRing satisfies both. +export type ArtifactBytesSigningRing = ArtifactBytesKeyRing & { + signingSecret(): string; + signingKid: number; +}; + +export class WorkspaceBlobMissingError extends Error { + constructor(readonly sha256: string) { + super("workspace_blob_missing"); + this.name = "WorkspaceBlobMissingError"; + } +} + +export class WorkspaceBlobMetadataError extends Error { + constructor() { + super("workspace_blob_metadata_missing"); + this.name = "WorkspaceBlobMetadataError"; + } +} + +/** + * Fetches and decrypts a workspace content-addressed blob by its sha256. The caller + * supplies a validated (workspaceId, sha256) pair — never a raw object key — so the + * read is constrained to a blob the caller has already proven it may access (the key + * is derived here, not accepted from outside). Throws {@link WorkspaceBlobMissingError} + * when the object is absent (an infra/consistency failure, mapped by the caller to a + * retryable error, never a patch conflict). + */ +export async function readWorkspaceBlobBytes(input: { + r2: Pick; + workspaceId: string; + sha256: string; + ring: ArtifactBytesKeyRing; +}): Promise { + const key = workspaceBlobObjectKeyFor({ workspaceId: input.workspaceId, sha256: input.sha256 }); + const object = await input.r2.get(key); + if (!object) { + throw new WorkspaceBlobMissingError(input.sha256); + } + if (!isArtifactBytesEncryptionMetadata(object.customMetadata)) { + throw new WorkspaceBlobMetadataError(); + } + const ciphertext = await bytesFromReadableBody(object.body); + return decryptArtifactBytesWithKeyRing({ + ciphertext, + ring: input.ring, + metadata: object.customMetadata, + context: { kind: "blob", workspaceId: input.workspaceId, sha256: input.sha256 }, + }); +} + +/** + * Fetches and decrypts a revision-scoped file object (revision AAD v1) by its full key. + * A patched file's uploaded diff bytes live under such a key (sha256 null, not content- + * addressed), so reconstruction reads the diff this way. The key's artifact/revision/path + * are bound into the AAD, so a substituted key fails decryption. + */ +export async function readRevisionFileObjectBytes(input: { + r2: Pick; + objectKey: string; + workspaceId: string; + ring: ArtifactBytesKeyRing; +}): Promise { + const parts = parseRevisionFileObjectKey(input.objectKey); + if (!parts) { + throw new WorkspaceBlobMetadataError(); + } + const object = await input.r2.get(input.objectKey); + if (!object) { + throw new WorkspaceBlobMissingError(input.objectKey); + } + if (!isArtifactBytesEncryptionMetadata(object.customMetadata)) { + throw new WorkspaceBlobMetadataError(); + } + const ciphertext = await bytesFromReadableBody(object.body); + return decryptArtifactBytesWithKeyRing({ + ciphertext, + ring: input.ring, + metadata: object.customMetadata, + context: { + workspaceId: input.workspaceId, + artifactId: parts.artifactId, + revisionId: parts.revisionId, + normalizedPath: parts.path, + }, + }); +} + +/** + * Encrypts plaintext under the workspace blob AAD (v2 = workspaceId + sha256) and PUTs + * it at the content-addressed key. Idempotent: if the blob already exists it skips the + * PUT (content-addressed, so any existing object is byte-identical), which makes a + * finalize replay free and avoids rewriting an already-reconstructed result. + */ +export async function writeWorkspaceBlob(input: { + r2: Pick; + workspaceId: string; + sha256: string; + plaintext: Uint8Array; + ring: ArtifactBytesSigningRing; +}): Promise<{ key: string; written: boolean }> { + const key = workspaceBlobObjectKeyFor({ workspaceId: input.workspaceId, sha256: input.sha256 }); + if (await input.r2.head(key)) { + return { key, written: false }; + } + const encrypted = await encryptArtifactBytes({ + plaintext: input.plaintext, + rootSecret: input.ring.signingSecret(), + kid: input.ring.signingKid, + context: { kind: "blob", workspaceId: input.workspaceId, sha256: input.sha256 }, + }); + await input.r2.put(key, encrypted.ciphertext, { + httpMetadata: { contentType: "application/octet-stream" }, + customMetadata: encrypted.customMetadata, + }); + return { key, written: true }; +} diff --git a/packages/worker-runtime/src/errors.ts b/packages/worker-runtime/src/errors.ts index c8c709e8..0b5c925f 100644 --- a/packages/worker-runtime/src/errors.ts +++ b/packages/worker-runtime/src/errors.ts @@ -27,6 +27,7 @@ export const ERROR_STATUS: Record = { upload_incomplete: 409, upload_session_expired: 409, entrypoint_not_in_revision: 422, + patch_conflict: 422, revision_retained: 410, rate_limited_actor: 429, rate_limited_artifact: 429, diff --git a/packages/worker-runtime/src/route-repository-errors.ts b/packages/worker-runtime/src/route-repository-errors.ts index 324b870f..3a99ded0 100644 --- a/packages/worker-runtime/src/route-repository-errors.ts +++ b/packages/worker-runtime/src/route-repository-errors.ts @@ -95,7 +95,13 @@ export const routeRepositorySurfaces = { "revision_size_cap_exceeded", ], "uploadSessions.putFile": [], - "uploadSessions.finalize": ["draft_revision_conflict", "upload_incomplete", "upload_session_not_found"], + "uploadSessions.finalize": [ + "draft_revision_conflict", + "patch_base_mismatch", + "patch_conflict", + "upload_incomplete", + "upload_session_not_found", + ], "content.get": [], "content.head": [], "content.bundle": [], diff --git a/scripts/local-mvp-server.mjs b/scripts/local-mvp-server.mjs index fcf6a5bf..038ad5be 100644 --- a/scripts/local-mvp-server.mjs +++ b/scripts/local-mvp-server.mjs @@ -7,7 +7,12 @@ import jobsWorker from "../apps/jobs/dist/index.js"; import streamWorker from "../apps/stream/dist/index.js"; import { createMemoryArtifactLiveNamespace } from "../apps/stream/dist/memory-artifact-live.js"; import uploadWorker from "../apps/upload/dist/index.js"; -import { createLocalServices, createPostgresServices, reparentBlobMigratorFromEnv } from "../packages/db/dist/index.js"; +import { + createLocalServices, + createPostgresServices, + reparentBlobMigratorFromEnv, + revisionReconstructorFromEnv, +} from "../packages/db/dist/index.js"; import { encryptArtifactBytes } from "../packages/storage/dist/index.js"; import { createMemoryWriteAllowanceNamespace } from "../packages/write-allowance/dist/index.js"; import { loadEnvFiles } from "./lib/load-env-files.mjs"; @@ -269,6 +274,10 @@ const reparentBlobMigrator = reparentBlobMigratorFromEnv({ ARTIFACTS: artifacts, ARTIFACT_BYTES_ENCRYPTION_KEY: artifactBytesEncryptionKey, }); +const revisionReconstructor = revisionReconstructorFromEnv({ + ARTIFACTS: artifacts, + ARTIFACT_BYTES_ENCRYPTION_KEY: artifactBytesEncryptionKey, +}); const services = postgresBinding ? createPostgresServices({ binding: postgresBinding, @@ -276,12 +285,14 @@ const services = postgresBinding apiBaseUrl, contentBaseUrl, reparentBlobMigrator, + revisionReconstructor, }) : createLocalServices({ apiKeyPepper, apiBaseUrl, contentBaseUrl, reparentBlobMigrator, + revisionReconstructor, }); const denylist = new MemoryKVNamespace(); const cliRelease = new MemoryKVNamespace(); diff --git a/scripts/smoke-local-patch.mjs b/scripts/smoke-local-patch.mjs new file mode 100644 index 00000000..2a4dac91 --- /dev/null +++ b/scripts/smoke-local-patch.mjs @@ -0,0 +1,310 @@ +#!/usr/bin/env node +// End-to-end smoke for ADR 0087 Stage 4 intra-file patch reconstruction. Unlike the +// unit/integration tests (which use a fake reconstructor), this drives the REAL path: +// boots the local MVP server (real encryption ring + in-memory R2 that round-trips +// ciphertext), publishes a base Revision with known bytes, then create-session with a +// real `base_revision_id` + unified-diff `patch`, PUTs the diff bytes (encrypted under +// revision AAD), finalizes (the real RevisionReconstructor decrypts the base blob, +// applies the diff, hash-verifies, re-encrypts under blob AAD), publishes, and fetches +// the served content asserting it is byte-identical to applying the patch locally. Also +// asserts the conflict path: a diff whose result digest is wrong fails with patch_conflict. +import { createHash } from "node:crypto"; +import { spawn } from "node:child_process"; +import { once } from "node:events"; +import { setTimeout as delay } from "node:timers/promises"; +import { fileURLToPath } from "node:url"; +import { waitForHarnessHealth } from "./lib/smoke-port.mjs"; +import { provisionSmokeWorkspace, smokeHarnessSecretFromEnv, waitForHealthz } from "./smoke-harness.mjs"; + +const root = new URL("..", import.meta.url); +// Default target is the local MVP harness; pass `preview` to run against hosted preview. +const target = (process.argv[2] ?? "local").toLowerCase(); +const isLocal = target === "local"; +const apiPort = intEnv("AGENT_PASTE_LOCAL_API_PORT", 8787); +const uploadPort = intEnv("AGENT_PASTE_LOCAL_UPLOAD_PORT", 8788); +const contentPort = intEnv("AGENT_PASTE_LOCAL_CONTENT_PORT", 8789); +const jobsPort = intEnv("AGENT_PASTE_LOCAL_JOBS_PORT", 8790); +const apiBaseUrl = isLocal + ? `http://127.0.0.1:${apiPort}` + : env("AGENT_PASTE_PREVIEW_API_URL", "https://agent-paste-api-preview.isaac-a46.workers.dev"); +const uploadBaseUrl = isLocal + ? `http://127.0.0.1:${uploadPort}` + : env("AGENT_PASTE_PREVIEW_UPLOAD_URL", "https://agent-paste-upload-preview.isaac-a46.workers.dev"); +const jobsBaseUrl = isLocal ? `http://127.0.0.1:${jobsPort}` : ""; +const harnessSecret = smokeHarnessSecretFromEnv(); +const serverEntry = fileURLToPath(new URL("./local-mvp-server.mjs", import.meta.url)); + +const sha256Hex = (bytes) => createHash("sha256").update(bytes).digest("hex"); +const enc = new TextEncoder(); + +const server = isLocal + ? spawn(process.execPath, [serverEntry], { + cwd: root, + env: { + ...process.env, + AGENT_PASTE_LOCAL_API_PORT: String(apiPort), + AGENT_PASTE_LOCAL_UPLOAD_PORT: String(uploadPort), + AGENT_PASTE_LOCAL_CONTENT_PORT: String(contentPort), + AGENT_PASTE_LOCAL_JOBS_PORT: String(jobsPort), + SMOKE_HARNESS_SECRET: harnessSecret, + }, + stdio: ["ignore", "pipe", "pipe"], + }) + : null; +let serverLog = ""; +if (server) { + server.stdout.on("data", (c) => { + serverLog += c.toString(); + }); + server.stderr.on("data", (c) => { + serverLog += c.toString(); + }); +} + +try { + const { ApiClient } = await import("../packages/api-client/dist/src/index.js"); + if (server) { + await waitForHarnessHealth( + server, + [apiBaseUrl, jobsBaseUrl], + { getLog: () => serverLog, timeoutMs: 10_000, sleepMs: 100 }, + waitForHealthz, + ); + } + + const apiKeySecret = await resolveApiKey(); + assert(apiKeySecret, "resolved an API key"); + + const client = new ApiClient({ + auth: { type: "api_key", apiKey: apiKeySecret }, + apiBaseUrl, + uploadBaseUrl, + }); + + // Base Revision: index.html (entrypoint) + a large text file we will patch. Both are + // blob-backed (sha256 set) so the patched path can inherit / be replaced cleanly. + const indexBytes = enc.encode("patch base

base

\n"); + const baseBig = enc.encode(makeBigText("line")); + const base = await publishTree(client, "patch base", [ + { path: "index.html", bytes: indexBytes }, + { path: "big.txt", bytes: baseBig }, + ]); + assert(base.revision_id?.startsWith("rev_"), "base publish returned a revision id"); + + // Apply a real one-line edit locally to compute the expected result + its digest. + const resultBig = enc.encode(makeBigText("line").replace("line 500\n", "LINE FIVE HUNDRED\n")); + const diff = unifiedDiffLineSwap(500, "line 500", "LINE FIVE HUNDRED"); + + // --- Happy path: patch reconstructs byte-exactly and serves through content. --- + const revised = await publishPatch(client, { + artifactId: base.artifact_id, + baseRevisionId: base.revision_id, + path: "big.txt", + diffBytes: enc.encode(diff), + baseSha256: sha256Hex(baseBig), + resultSha256: sha256Hex(resultBig), + }); + assert(revised.revision_id !== base.revision_id, "patch publish created a new revision"); + + // Fetch the reconstructed file through the content path and assert byte-exactness. + const served = await fetchArtifactFile(revised, "big.txt"); + assertBytesEqual(served, resultBig, "served big.txt is byte-identical to the locally-applied patch"); + + // The entrypoint inherited unchanged from the base must still serve. + const servedIndex = await fetchArtifactFile(revised, "index.html"); + assertBytesEqual(servedIndex, indexBytes, "inherited index.html still serves byte-identically"); + + // --- Conflict path: a diff whose declared result digest is wrong must fail loud. --- + let conflict; + try { + await publishPatch(client, { + artifactId: base.artifact_id, + baseRevisionId: revised.revision_id, + path: "big.txt", + diffBytes: enc.encode(unifiedDiffLineSwap(500, "LINE FIVE HUNDRED", "broken edit", resultBig)), + baseSha256: sha256Hex(resultBig), + resultSha256: "0".repeat(64), // deliberately wrong → result_hash_mismatch + }); + } catch (error) { + conflict = error; + } + assert(conflict, "a patch with a wrong result digest must throw"); + assert( + conflict.code === "patch_conflict", + `conflict code should be patch_conflict, got ${conflict.code} (${conflict.message})`, + ); + assert(conflict.status === 422, `conflict status should be 422, got ${conflict.status}`); + assert( + typeof conflict.message === "string" && conflict.message.includes("big.txt"), + `conflict message should name the path: ${conflict.message}`, + ); + + process.stdout.write(`Patch smoke passed (${target}). + + Base revision: ${base.revision_id} + Patched revision: ${revised.revision_id} + Reconstructed big.txt served byte-exact (${resultBig.byteLength} bytes from a ${diff.length}-byte diff). + Conflict path: ${conflict.code} (${conflict.status}) — "${conflict.message}" + +`); +} catch (error) { + process.stderr.write(`Patch smoke failed (${target}): ${error instanceof Error ? error.message : String(error)}\n`); + if (serverLog.trim()) { + process.stderr.write(`\nLocal server output:\n${serverLog}\n`); + } + process.exitCode = 1; +} finally { + if (server) { + server.kill("SIGTERM"); + await Promise.race([once(server, "exit"), delay(1000)]).catch(() => undefined); + if (server.exitCode === null) { + server.kill("SIGKILL"); + await Promise.race([once(server, "exit"), delay(1000)]).catch(() => undefined); + } + } +} + +function env(name, fallback) { + const value = process.env[name]; + return typeof value === "string" && value.length > 0 ? value : fallback; +} + +// Mirror scripts/smoke-hosted.mjs credential resolution: a preprovisioned key wins; +// otherwise provision via the harness secret (the PREVIEW_-prefixed one against preview, +// the local default against the harness). +async function resolveApiKey() { + const preprovisioned = env("AGENT_PASTE_SMOKE_API_KEY", ""); + if (preprovisioned) { + return preprovisioned; + } + const secret = isLocal + ? harnessSecret + : env("AGENT_PASTE_PREVIEW_SMOKE_HARNESS_SECRET", env("AGENT_PASTE_SMOKE_HARNESS_SECRET", harnessSecret)); + const provisioned = await provisionSmokeWorkspace(apiBaseUrl, { + email: `patch-${Date.now()}@example.test`, + name: "Patch Smoke", + secret, + }); + assert(provisioned.api_key?.secret, "provision returned an API key"); + return provisioned.api_key.secret; +} + +// Publish a full-manifest tree of { path, bytes } via create-session → PUT → finalize → +// publish, returning the publish result. +async function publishTree(client, title, files) { + const idem = `base-${Date.now()}-${Math.round(performance.now())}`; + const session = await client.uploadSessions.create( + { + title, + entrypoint: "index.html", + files: files.map((f) => ({ path: f.path, size_bytes: f.bytes.byteLength, sha256: sha256Hex(f.bytes) })), + }, + idem, + ); + await putTargets(client, session, files); + const finalized = await client.uploadSessions.finalize(session.upload_session_id, `${idem}-fin`); + return client.revisions.publish(finalized.artifact_id, finalized.revision_id, `${idem}-pub`); +} + +// Publish a partial-manifest revision carrying a single patched file. +async function publishPatch(client, input) { + const idem = `patch-${Date.now()}-${Math.round(performance.now())}`; + const session = await client.uploadSessions.create( + { + artifact_id: input.artifactId, + base_revision_id: input.baseRevisionId, + title: "patched", + entrypoint: "index.html", + files: [ + { + path: input.path, + size_bytes: input.diffBytes.byteLength, + patch: { base_sha256: input.baseSha256, format: "unified", result_sha256: input.resultSha256 }, + }, + ], + }, + idem, + ); + await putTargets(client, session, [{ path: input.path, bytes: input.diffBytes }]); + const finalized = await client.uploadSessions.finalize(session.upload_session_id, `${idem}-fin`); + return client.revisions.publish(finalized.artifact_id, finalized.revision_id, `${idem}-pub`); +} + +async function putTargets(client, session, files) { + for (const target of session.files) { + if (target.status !== "upload_required") { + continue; + } + const file = files.find((f) => f.path === target.path); + assert(file, `no bytes for upload target ${target.path}`); + await client.putFile(target.put_url, file.bytes, target.required_headers); + } +} + +// Fetch one file of a published artifact through the content origin via the agent view. +async function fetchArtifactFile(publishResult, path) { + const agentView = await fetchJson(publishResult.agent_view_url); + const file = agentView.files.find((f) => f.path === path); + assert(file, `agent view did not list ${path}`); + const response = await fetch(file.url); + assert(response.status === 200, `content fetch for ${path} returned ${response.status}`); + return new Uint8Array(await response.arrayBuffer()); +} + +function makeBigText(prefix) { + const lines = []; + for (let i = 1; i <= 1000; i++) { + lines.push(`${prefix} ${i}`); + } + return `${lines.join("\n")}\n`; +} + +// Build a minimal valid unified diff swapping one line (3 lines of context each side), +// computed against the real surrounding lines so it applies cleanly. +function unifiedDiffLineSwap(lineNo, from, to, baseOverride) { + const baseText = baseOverride ? new TextDecoder().decode(baseOverride) : makeBigText("line"); + const lines = baseText.split("\n"); + const idx = lineNo - 1; + assert(lines[idx] === from, `expected base line ${lineNo} to be "${from}", got "${lines[idx]}"`); + const ctxBefore = [lines[idx - 3], lines[idx - 2], lines[idx - 1]]; + const ctxAfter = [lines[idx + 1], lines[idx + 2], lines[idx + 3]]; + const oldStart = lineNo - 3; + return [ + `@@ -${oldStart},7 +${oldStart},7 @@`, + ...ctxBefore.map((l) => ` ${l}`), + `-${from}`, + `+${to}`, + ...ctxAfter.map((l) => ` ${l}`), + "", + ].join("\n"); +} + +async function fetchJson(url) { + const response = await fetch(url); + if (!response.ok) { + throw new Error(`${url} returned ${response.status}`); + } + return response.json(); +} + +function assertBytesEqual(actual, expected, message) { + if (actual.byteLength !== expected.byteLength) { + throw new Error(`${message}: length ${actual.byteLength} !== ${expected.byteLength}`); + } + for (let i = 0; i < actual.byteLength; i++) { + if (actual[i] !== expected[i]) { + throw new Error(`${message}: byte ${i} differs (${actual[i]} !== ${expected[i]})`); + } + } +} + +function assert(condition, message) { + if (!condition) { + throw new Error(message); + } +} + +function intEnv(name, fallback) { + const value = Number.parseInt(process.env[name] ?? "", 10); + return Number.isFinite(value) && value > 0 ? value : fallback; +} From aca383a71732b7489d77cf6f23e9556eb2417829 Mon Sep 17 00:00:00 2001 From: Isaac Suttell Date: Sun, 14 Jun 2026 15:51:26 -0700 Subject: [PATCH 05/14] ci: run the patch reconstruction smoke locally and against PR preview (ADR 0087 stage 4) Stage 4's unit/integration tests use a fake reconstructor, so the only checks that exercise the real decrypt -> apply diff -> hash-verify -> re-encrypt -> serve path are the smoke. Wire it into both gates: - ci.yml: `pnpm smoke:local:patch` after the existing local smoke (in-memory MVP, every PR via Validate). - pr-preview.yml: `node scripts/smoke-local-patch.mjs pr` against the deployed PR preview Workers, reusing the per-PR deploy outputs + harness secret. smoke-local-patch.mjs now supports local/preview/pr targets with env resolution mirroring scripts/smoke-hosted.mjs. Co-Authored-By: Claude Opus 4.8 --- .github/workflows/ci.yml | 7 ++++ .github/workflows/pr-preview.yml | 11 +++++++ scripts/smoke-local-patch.mjs | 55 ++++++++++++++++++++++++-------- 3 files changed, 60 insertions(+), 13 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 77d4e3dd..3631befb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -199,6 +199,13 @@ jobs: if: ${{ env.RUN_CODE == 'true' }} run: pnpm smoke:local + # Real intra-file patch reconstruction through the in-memory harness (ADR 0087 + # stage 4): the unit tests use a fake reconstructor, so this is the only check + # that exercises real decrypt -> apply diff -> hash-verify -> re-encrypt -> serve. + - name: Local patch smoke + if: ${{ env.RUN_CODE == 'true' }} + run: pnpm smoke:local:patch + # The OpenAPI specs are validated by the release security attestation # (`pnpm security:attest`), which otherwise only runs at deploy/release # time. Gate the *deterministic* spec check here too so a contract change diff --git a/.github/workflows/pr-preview.yml b/.github/workflows/pr-preview.yml index 6738d9b5..8f5f6cae 100644 --- a/.github/workflows/pr-preview.yml +++ b/.github/workflows/pr-preview.yml @@ -228,6 +228,17 @@ jobs: AGENT_PASTE_EPHEMERAL_SMOKE_WORKOS_ACCESS_TOKEN: ${{ secrets.AGENT_PASTE_EPHEMERAL_SMOKE_WORKOS_ACCESS_TOKEN }} run: node scripts/smoke-hosted-ephemeral.mjs pr + # Real intra-file patch reconstruction against the deployed PR preview (ADR 0087 + # stage 4): exercises decrypt -> apply diff -> hash-verify -> re-encrypt -> serve + # byte-exact + the patch_conflict path through the live upload/api/content Workers. + - name: Hosted patch reconstruction smoke + env: + PR_NUMBER: ${{ github.event.pull_request.number }} + AGENT_PASTE_PR_API_URL: ${{ steps.deploy.outputs.api_url }} + AGENT_PASTE_PR_UPLOAD_URL: ${{ steps.deploy.outputs.upload_url }} + AGENT_PASTE_PR_SMOKE_HARNESS_SECRET: ${{ steps.deploy.outputs.smoke_harness_secret }} + run: node scripts/smoke-local-patch.mjs pr + - name: Lighthouse dashboard accessibility gate env: # Override in workflow_dispatch or forked test runs if the style-guide threshold changes. diff --git a/scripts/smoke-local-patch.mjs b/scripts/smoke-local-patch.mjs index 2a4dac91..32735cd3 100644 --- a/scripts/smoke-local-patch.mjs +++ b/scripts/smoke-local-patch.mjs @@ -1,4 +1,5 @@ #!/usr/bin/env node +import { spawn } from "node:child_process"; // End-to-end smoke for ADR 0087 Stage 4 intra-file patch reconstruction. Unlike the // unit/integration tests (which use a fake reconstructor), this drives the REAL path: // boots the local MVP server (real encryption ring + in-memory R2 that round-trips @@ -9,7 +10,6 @@ // the served content asserting it is byte-identical to applying the patch locally. Also // asserts the conflict path: a diff whose result digest is wrong fails with patch_conflict. import { createHash } from "node:crypto"; -import { spawn } from "node:child_process"; import { once } from "node:events"; import { setTimeout as delay } from "node:timers/promises"; import { fileURLToPath } from "node:url"; @@ -17,23 +17,51 @@ import { waitForHarnessHealth } from "./lib/smoke-port.mjs"; import { provisionSmokeWorkspace, smokeHarnessSecretFromEnv, waitForHealthz } from "./smoke-harness.mjs"; const root = new URL("..", import.meta.url); -// Default target is the local MVP harness; pass `preview` to run against hosted preview. +// Targets: `local` (in-memory MVP harness, default), `preview` (persistent preview env), +// `pr` (per-PR ephemeral preview deploy). Hosted targets mirror scripts/smoke-hosted.mjs +// env resolution so CI can reuse the same secrets. const target = (process.argv[2] ?? "local").toLowerCase(); const isLocal = target === "local"; const apiPort = intEnv("AGENT_PASTE_LOCAL_API_PORT", 8787); const uploadPort = intEnv("AGENT_PASTE_LOCAL_UPLOAD_PORT", 8788); const contentPort = intEnv("AGENT_PASTE_LOCAL_CONTENT_PORT", 8789); const jobsPort = intEnv("AGENT_PASTE_LOCAL_JOBS_PORT", 8790); -const apiBaseUrl = isLocal - ? `http://127.0.0.1:${apiPort}` - : env("AGENT_PASTE_PREVIEW_API_URL", "https://agent-paste-api-preview.isaac-a46.workers.dev"); -const uploadBaseUrl = isLocal - ? `http://127.0.0.1:${uploadPort}` - : env("AGENT_PASTE_PREVIEW_UPLOAD_URL", "https://agent-paste-upload-preview.isaac-a46.workers.dev"); +const hosted = hostedConfig(target); +const apiBaseUrl = isLocal ? `http://127.0.0.1:${apiPort}` : hosted.apiBaseUrl; +const uploadBaseUrl = isLocal ? `http://127.0.0.1:${uploadPort}` : hosted.uploadBaseUrl; const jobsBaseUrl = isLocal ? `http://127.0.0.1:${jobsPort}` : ""; const harnessSecret = smokeHarnessSecretFromEnv(); const serverEntry = fileURLToPath(new URL("./local-mvp-server.mjs", import.meta.url)); +function hostedConfig(name) { + if (name === "local") { + return { apiBaseUrl: "", uploadBaseUrl: "", harnessSecret: "" }; + } + if (name === "preview") { + return { + apiBaseUrl: env("AGENT_PASTE_PREVIEW_API_URL", "https://agent-paste-api-preview.isaac-a46.workers.dev"), + uploadBaseUrl: env("AGENT_PASTE_PREVIEW_UPLOAD_URL", "https://agent-paste-upload-preview.isaac-a46.workers.dev"), + harnessSecret: env("AGENT_PASTE_PREVIEW_SMOKE_HARNESS_SECRET", env("AGENT_PASTE_SMOKE_HARNESS_SECRET", "")), + }; + } + if (name === "pr") { + return { + apiBaseUrl: requiredEnv("AGENT_PASTE_PR_API_URL"), + uploadBaseUrl: requiredEnv("AGENT_PASTE_PR_UPLOAD_URL"), + harnessSecret: env("AGENT_PASTE_PR_SMOKE_HARNESS_SECRET", env("AGENT_PASTE_PREVIEW_SMOKE_HARNESS_SECRET", "")), + }; + } + throw new Error(`unknown target "${name}" (expected local, preview, or pr)`); +} + +function requiredEnv(name) { + const value = process.env[name]; + if (!value) { + throw new Error(`missing required env ${name} for target ${target}`); + } + return value; +} + const sha256Hex = (bytes) => createHash("sha256").update(bytes).digest("hex"); const enc = new TextEncoder(); @@ -170,16 +198,17 @@ function env(name, fallback) { } // Mirror scripts/smoke-hosted.mjs credential resolution: a preprovisioned key wins; -// otherwise provision via the harness secret (the PREVIEW_-prefixed one against preview, -// the local default against the harness). +// otherwise provision via the target's harness secret (local default for the harness, +// the per-env harness secret for preview/pr). async function resolveApiKey() { const preprovisioned = env("AGENT_PASTE_SMOKE_API_KEY", ""); if (preprovisioned) { return preprovisioned; } - const secret = isLocal - ? harnessSecret - : env("AGENT_PASTE_PREVIEW_SMOKE_HARNESS_SECRET", env("AGENT_PASTE_SMOKE_HARNESS_SECRET", harnessSecret)); + const secret = isLocal ? harnessSecret : hosted.harnessSecret; + if (!secret) { + throw new Error(`no API key or harness secret available for target ${target}`); + } const provisioned = await provisionSmokeWorkspace(apiBaseUrl, { email: `patch-${Date.now()}@example.test`, name: "Patch Smoke", From fdc468729552aaf9dff7265bcd3d5f39af9fc9bf Mon Sep 17 00:00:00 2001 From: Isaac Suttell Date: Sun, 14 Jun 2026 16:01:15 -0700 Subject: [PATCH 06/14] docs: renumber Git-revision ADRs to 0087/0088 after merge collision Merged PR #525 claimed ADR 0086 for "publish is content-only, private-first". This branch's earlier work also created an 0086 (workspace-scoped blob dedup) and an 0087 (revision commit chain + reconstructed delta). Renumber this branch's pair to 0087 (blob dedup) and 0088 (revision delta) so 0086 stays the merged publish-privacy ADR, and sweep every cross-reference (ADR bodies, specs, migrations, code comments, CI smoke steps) to the new numbers. Reference-only: no schema, contract, or logic change. Full suite green (typecheck 39/39, test 39/39, openapi:check). Co-Authored-By: Claude Opus 4.8 --- .github/workflows/ci.yml | 2 +- .github/workflows/pr-preview.yml | 2 +- apps/upload/src/env.ts | 2 +- apps/upload/src/finalize.ts | 2 +- docs/development.md | 2 +- docs/ops/git-like-revisions-todo.md | 2 +- docs/specs/api.md | 2 +- docs/specs/data-model.md | 8 ++++---- packages/contracts/src/mcp.test.ts | 2 +- packages/contracts/src/uploadSessions.ts | 4 ++-- .../0024_revisions_parent_revision_id.sql | 2 +- ...25_upload_session_base_revision_and_patch.sql | 2 +- packages/db/src/index.test.ts | 4 ++-- .../db/src/postgres/revision-reconstructor.ts | 2 +- packages/db/src/repository-error.ts | 2 +- .../src/repository/upload-session-lifecycle.ts | 16 ++++++++-------- packages/db/src/schema.ts | 4 ++-- packages/db/src/types.ts | 8 ++++---- packages/db/src/validation.ts | 2 +- packages/storage/src/unified-diff.ts | 2 +- packages/storage/src/workspace-blob-bytes.ts | 2 +- scripts/smoke-local-patch.mjs | 2 +- 22 files changed, 38 insertions(+), 38 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3631befb..854e1c98 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -199,7 +199,7 @@ jobs: if: ${{ env.RUN_CODE == 'true' }} run: pnpm smoke:local - # Real intra-file patch reconstruction through the in-memory harness (ADR 0087 + # Real intra-file patch reconstruction through the in-memory harness (ADR 0088 # stage 4): the unit tests use a fake reconstructor, so this is the only check # that exercises real decrypt -> apply diff -> hash-verify -> re-encrypt -> serve. - name: Local patch smoke diff --git a/.github/workflows/pr-preview.yml b/.github/workflows/pr-preview.yml index 8f5f6cae..375bd8af 100644 --- a/.github/workflows/pr-preview.yml +++ b/.github/workflows/pr-preview.yml @@ -228,7 +228,7 @@ jobs: AGENT_PASTE_EPHEMERAL_SMOKE_WORKOS_ACCESS_TOKEN: ${{ secrets.AGENT_PASTE_EPHEMERAL_SMOKE_WORKOS_ACCESS_TOKEN }} run: node scripts/smoke-hosted-ephemeral.mjs pr - # Real intra-file patch reconstruction against the deployed PR preview (ADR 0087 + # Real intra-file patch reconstruction against the deployed PR preview (ADR 0088 # stage 4): exercises decrypt -> apply diff -> hash-verify -> re-encrypt -> serve # byte-exact + the patch_conflict path through the live upload/api/content Workers. - name: Hosted patch reconstruction smoke diff --git a/apps/upload/src/env.ts b/apps/upload/src/env.ts index 59504c2e..d6231dd9 100644 --- a/apps/upload/src/env.ts +++ b/apps/upload/src/env.ts @@ -33,7 +33,7 @@ export type R2Bucket = { options?: { httpMetadata?: Record; customMetadata?: Record }, ): Promise; head(key: string): Promise; - // Reconstruction (ADR 0087) reads a base blob + the uploaded diff back at finalize to + // Reconstruction (ADR 0088) reads a base blob + the uploaded diff back at finalize to // apply the patch. This is the only read on upload's R2 binding; every other op writes. get(key: string): Promise; }; diff --git a/apps/upload/src/finalize.ts b/apps/upload/src/finalize.ts index 5c3417d4..8aebd4c5 100644 --- a/apps/upload/src/finalize.ts +++ b/apps/upload/src/finalize.ts @@ -59,7 +59,7 @@ export async function finalizeUploadSession( const repositoryCode = repositoryErrorToAppError(error); if (repositoryCode) { // A patch conflict carries the path + failure reason on the error cause so the - // agent learns which file to regenerate (ADR 0087). Other codes use their default + // agent learns which file to regenerate (ADR 0088). Other codes use their default // message. const detail = repositoryCode === "patch_conflict" && isRepositoryError(error) && error.cause instanceof Error diff --git a/docs/development.md b/docs/development.md index ac669806..1e206bb5 100644 --- a/docs/development.md +++ b/docs/development.md @@ -158,7 +158,7 @@ deploy production from a laptop. | Command | Purpose | | --------------------------------- | ----------------------------------------------------------------------------------------------------------------------- | | `pnpm smoke:local` | Build and run the local publish/content/delete smoke path (also gated in CI `Validate`). | -| `pnpm smoke:local:patch` | Build and run the local ADR 0087 intra-file patch reconstruction smoke (real diff apply + serve byte-exact + conflict). | +| `pnpm smoke:local:patch` | Build and run the local ADR 0088 intra-file patch reconstruction smoke (real diff apply + serve byte-exact + conflict). | | `pnpm smoke:ci:postgres` | Build, migrate a job-local Postgres database, and run the local CLI smoke through the Postgres/RLS-backed harness. | | `pnpm smoke:web` | Build and run local web API auth/dashboard smoke assertions. | | `pnpm smoke:mcp` | Build and run local MCP transport + OAuth + publish/read/delete smoke. | diff --git a/docs/ops/git-like-revisions-todo.md b/docs/ops/git-like-revisions-todo.md index 08520385..40c047a2 100644 --- a/docs/ops/git-like-revisions-todo.md +++ b/docs/ops/git-like-revisions-todo.md @@ -139,7 +139,7 @@ result_sha256 }` plus the diff bytes uploaded like any file body. Absence = `sha256` omitted from the signed PUT. Stateful validation (published base, same workspace/artifact, blob-backed-only inheritance, deleted-path-in-base, patch base match) with six new repo error codes mapped to `invalid_request`. - See the ADR 0087 Stage 3 implementation notes for the decisions. + See the ADR 0088 Stage 3 implementation notes for the decisions. ### Stage 4 - synchronous reconstruct-at-finalize (DONE) diff --git a/docs/specs/api.md b/docs/specs/api.md index 73e2ecf0..d9124ed0 100644 --- a/docs/specs/api.md +++ b/docs/specs/api.md @@ -145,7 +145,7 @@ Rules: hex SHA-256 for each file. Legacy clients that omit it keep the full-upload revision-object path and do not participate in deduplication. - `base_revision_id`, `deleted_paths`, and per-file `patch` are the optional - commit-chain / partial-manifest inputs ([ADR 0087](../adr/0087-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)). + commit-chain / partial-manifest inputs ([ADR 0088](../adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)). When `base_revision_id` is set, `files` lists only changed and added paths, `deleted_paths` drops paths, and every other path inherits from the base Revision by reference. A per-file `patch` (`{ base_sha256, format: "unified", diff --git a/docs/specs/data-model.md b/docs/specs/data-model.md index 42712d68..18430132 100644 --- a/docs/specs/data-model.md +++ b/docs/specs/data-model.md @@ -96,7 +96,7 @@ First-class revision rows for multi-revision Artifacts ([0009](../../packages/db | `id` | `TEXT PRIMARY KEY` | `rev_...`. | | `workspace_id` | `UUID NOT NULL REFERENCES workspaces(id) ON DELETE RESTRICT` | Tenant scope. | | `artifact_id` | `TEXT NOT NULL REFERENCES artifacts(id) ON DELETE CASCADE` | Parent Artifact; deleting the Artifact deletes its revisions. | -| `parent_revision_id` | `TEXT NULL` | Commit-chain parent ([0087](../adr/0087-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)); deferrable self-FK on `(workspace_id, artifact_id, parent_revision_id)` → `revisions(workspace_id, artifact_id, id)`, `ON DELETE SET NULL (parent_revision_id)`. `NULL` for roots. | +| `parent_revision_id` | `TEXT NULL` | Commit-chain parent ([0088](../adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)); deferrable self-FK on `(workspace_id, artifact_id, parent_revision_id)` → `revisions(workspace_id, artifact_id, id)`, `ON DELETE SET NULL (parent_revision_id)`. `NULL` for roots. | | `revision_number` | `INTEGER NULL` | Assigned on publish; unique per Artifact when not null. Null while `status = 'draft'`. | | `status` | `TEXT NOT NULL` | `draft`, `published`, or `retained`. | | `entrypoint` | `TEXT NOT NULL` | Normalized file path. | @@ -135,7 +135,7 @@ For `storage_kind = 'revision'`, `r2_key` points at the legacy `storage_kind = 'blob'`, `r2_key` points at a workspace shared blob object under `workspaces/{workspaceId}/blobs/sha256/{prefix}/{sha256}`. -Under tree inheritance ([0087](../adr/0087-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)), +Under tree inheritance ([0088](../adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)), a Revision published against a base copies forward the base's unchanged `artifact_files` rows by reference (same `sha256` / `r2_key` / `storage_kind = 'blob'`), so a one-file change yields a full file tree but only one new blob. Only @@ -203,7 +203,7 @@ exposing scanner internals. | `expires_at` | `TIMESTAMPTZ NOT NULL` | Upload session TTL, typically 24 hours. | | `created_at` | `TIMESTAMPTZ NOT NULL` | | | `finalized_at` | `TIMESTAMPTZ NULL` | | -| `base_revision_id` | `TEXT NULL` | Base Revision this publish inherits from ([0087](../adr/0087-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)). `NULL` is a full manifest. Copied to `revisions.parent_revision_id` when the tree merge runs at finalize. | +| `base_revision_id` | `TEXT NULL` | Base Revision this publish inherits from ([0088](../adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)). `NULL` is a full manifest. Copied to `revisions.parent_revision_id` when the tree merge runs at finalize. | | `deleted_paths` | `JSONB NOT NULL DEFAULT '[]'` | Base paths this publish drops. Lets finalize tell a deleted path apart from an inherited one (both are base paths absent from the file manifest). | ### `upload_session_files` @@ -220,7 +220,7 @@ exposing scanner internals. | `storage_kind` | `TEXT NOT NULL DEFAULT 'revision'` | `revision` or `blob`. | | `uploaded_at` | `TIMESTAMPTZ NULL` | Set after successful PUT or existing blob reuse. | | `put_url_expires_at` | `TIMESTAMPTZ NOT NULL` | Session-level upper bound for PUT writes. Set to `upload_sessions.expires_at` at session creation. | -| `patch_base_sha256` | `TEXT NULL` | Intra-file delta ([0087](../adr/0087-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)): digest of the base Revision's file the uploaded unified diff applies to. | +| `patch_base_sha256` | `TEXT NULL` | Intra-file delta ([0088](../adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)): digest of the base Revision's file the uploaded unified diff applies to. | | `patch_result_sha256` | `TEXT NULL` | Digest of the whole file the server reconstructs from the diff (applied synchronously at finalize); the committed `artifact_files` row is an ordinary `storage_kind='blob'` row at this sha. Both patch columns are `NULL` (whole-file upload) or both set (a `CHECK` enforces it). | Primary key `(upload_session_id, path)`. diff --git a/packages/contracts/src/mcp.test.ts b/packages/contracts/src/mcp.test.ts index 4dd1f395..5131316a 100644 --- a/packages/contracts/src/mcp.test.ts +++ b/packages/contracts/src/mcp.test.ts @@ -477,7 +477,7 @@ describe("MCP error mapping", () => { }); it("declares patch_conflict on every tool that forwards a finalize call", () => { - // finalize can surface patch_conflict (ADR 0087); a tool that forwards it must + // finalize can surface patch_conflict (ADR 0088); a tool that forwards it must // declare it, or an agent sees an error its contract never advertised (it slipped // out of publishChain once). Scoped to patch_conflict + the finalize route rather // than a full superset assertion, which would relitigate the deliberate exclusion diff --git a/packages/contracts/src/uploadSessions.ts b/packages/contracts/src/uploadSessions.ts index bdcd3430..3bb94ecd 100644 --- a/packages/contracts/src/uploadSessions.ts +++ b/packages/contracts/src/uploadSessions.ts @@ -16,7 +16,7 @@ import { z } from "./zod.js"; export const Sha256Hex = z.string().regex(/^[a-f0-9]{64}$/); export type Sha256Hex = z.infer; -// A changed file may arrive as a patch against a base Revision's file (ADR 0087) +// A changed file may arrive as a patch against a base Revision's file (ADR 0088) // instead of whole bytes. When present, the bytes uploaded for this file entry are // the diff (so the entry's size_bytes/sha256 describe the diff), base_sha256 is the // digest of the file in the base Revision the diff applies to, and result_sha256 is @@ -41,7 +41,7 @@ export type UploadSessionFileInput = z.infer; // client input. Clients (CLI, MCP) cannot request or influence artifact lifetime. // render_mode is an explicit client override; when absent the server infers it // from the entrypoint extension at publish time. -// base_revision_id turns this into a partial-manifest publish (ADR 0087): files +// base_revision_id turns this into a partial-manifest publish (ADR 0088): files // lists only changed/added paths, deleted_paths drops paths, and every other path // inherits from the base Revision by reference. deleted_paths and per-file patches // are only meaningful against a base. Structural checks live here; the stateful diff --git a/packages/db/migrations/0024_revisions_parent_revision_id.sql b/packages/db/migrations/0024_revisions_parent_revision_id.sql index 55e4f733..72251485 100644 --- a/packages/db/migrations/0024_revisions_parent_revision_id.sql +++ b/packages/db/migrations/0024_revisions_parent_revision_id.sql @@ -1,6 +1,6 @@ begin; --- Revision commit chain (ADR 0087): a Revision may point at the Revision it was +-- Revision commit chain (ADR 0088): a Revision may point at the Revision it was -- published against. NULL means a root (every pre-existing row is a root; no -- backfill). The composite self-FK references (workspace_id, artifact_id, id) so -- a parent is structurally guaranteed to live in the same Workspace and Artifact. diff --git a/packages/db/migrations/0025_upload_session_base_revision_and_patch.sql b/packages/db/migrations/0025_upload_session_base_revision_and_patch.sql index 9f8579a8..86055bda 100644 --- a/packages/db/migrations/0025_upload_session_base_revision_and_patch.sql +++ b/packages/db/migrations/0025_upload_session_base_revision_and_patch.sql @@ -1,4 +1,4 @@ --- ADR 0087 Stage 3: partial-manifest publish carriers on the upload session. +-- ADR 0088 Stage 3: partial-manifest publish carriers on the upload session. -- -- upload_sessions.base_revision_id records the Revision this publish inherits -- from; the tree merge runs at finalize and copies it to diff --git a/packages/db/src/index.test.ts b/packages/db/src/index.test.ts index 6dc01271..8995d50a 100644 --- a/packages/db/src/index.test.ts +++ b/packages/db/src/index.test.ts @@ -2318,7 +2318,7 @@ function fakeReconstructor(options?: { } // Publish a base Revision whose files are blob-backed (sha256 set + uploaded), so -// they are eligible to inherit forward under ADR 0087 tree inheritance. +// they are eligible to inherit forward under ADR 0088 tree inheritance. async function publishBlobBackedBase( repo: LocalRepository, actor: ApiActor, @@ -2368,7 +2368,7 @@ async function publishBlobBackedBase( return { artifactId: published.artifact_id, revisionId: published.revision_id }; } -describe("ADR 0087 tree inheritance", () => { +describe("ADR 0088 tree inheritance", () => { it("inherits unchanged blob-backed files from the base and adds one new blob", async () => { const { repo, actor } = await localRepoWithApiActor(); const base = await publishBlobBackedBase( diff --git a/packages/db/src/postgres/revision-reconstructor.ts b/packages/db/src/postgres/revision-reconstructor.ts index 82b43b5d..c2406556 100644 --- a/packages/db/src/postgres/revision-reconstructor.ts +++ b/packages/db/src/postgres/revision-reconstructor.ts @@ -8,7 +8,7 @@ import { } from "@agent-paste/storage"; import { RevisionReconstructionConflict, type RevisionReconstructor } from "../types.js"; -// ADR 0087 Stage 4: builds the reconstructor that applies an agent-uploaded unified diff +// ADR 0088 Stage 4: builds the reconstructor that applies an agent-uploaded unified diff // to a base blob and stores the whole result as an ordinary content-addressed blob, // SYNCHRONOUSLY at finalize and BEFORE the new Revision commits. A clean patch yields a // blob the rest of the system treats like any other; a patch that cannot apply throws an diff --git a/packages/db/src/repository-error.ts b/packages/db/src/repository-error.ts index ca70ce5c..eae4a657 100644 --- a/packages/db/src/repository-error.ts +++ b/packages/db/src/repository-error.ts @@ -104,7 +104,7 @@ const repositoryErrorToAppErrorMap: Record Promise; - // Applies intra-file unified-diff patches before commit (ADR 0087 Stage 4). Only + // Applies intra-file unified-diff patches before commit (ADR 0088 Stage 4). Only // exercised when the session has patched files; absent on full-manifest finalizes. revisionReconstructor?: RevisionReconstructor; }, @@ -461,7 +461,7 @@ export async function finalizeUploadSessionInEntities( repositoryError("upload_incomplete"); } } - // Tree inheritance (ADR 0087): against a base Revision the committed tree is the + // Tree inheritance (ADR 0088): against a base Revision the committed tree is the // merged base + delta, so file_count/size_bytes and the artifact_files rows come // from the merge (the session row counts only the changed manifest). validateUpload // re-checks caps + entrypoint against the real published tree (an inherited path @@ -492,7 +492,7 @@ export async function finalizeUploadSessionInEntities( id: session.revision_id, workspace_id: session.workspace_id, artifact_id: session.artifact_id, - // Set when publishing against a base Revision (ADR 0087 tree inheritance). + // Set when publishing against a base Revision (ADR 0088 tree inheritance). parent_revision_id: parentRevisionId, revision_number: null, status: "draft", diff --git a/packages/db/src/schema.ts b/packages/db/src/schema.ts index 871ab7f6..d34fd937 100644 --- a/packages/db/src/schema.ts +++ b/packages/db/src/schema.ts @@ -197,7 +197,7 @@ export const uploadSessions = pgTable( expiresAt: timestamp("expires_at", { withTimezone: true }).notNull(), createdAt: timestamp("created_at", { withTimezone: true }).notNull(), finalizedAt: timestamp("finalized_at", { withTimezone: true }), - // Base Revision this publish inherits from (ADR 0087 tree inheritance). Null = full + // Base Revision this publish inherits from (ADR 0088 tree inheritance). Null = full // manifest. Copied to revisions.parent_revision_id when the merge runs at finalize. baseRevisionId: text("base_revision_id"), // Base paths this publish drops. Needed to tell a deleted path apart from an @@ -231,7 +231,7 @@ export const uploadSessionFiles = pgTable( storageKind: text("storage_kind").notNull().default("revision"), uploadedAt: timestamp("uploaded_at", { withTimezone: true }), putUrlExpiresAt: timestamp("put_url_expires_at", { withTimezone: true }).notNull(), - // Intra-file delta descriptor (ADR 0087). When set, the uploaded bytes are a + // Intra-file delta descriptor (ADR 0088). When set, the uploaded bytes are a // unified diff against the base file; jobs reconstructs the whole result blob // (Stage 4). base = digest of the base Revision's file, result = digest of the // reconstructed whole file. Both null (whole-file upload) or both set. diff --git a/packages/db/src/types.ts b/packages/db/src/types.ts index e95d5781..49f4596c 100644 --- a/packages/db/src/types.ts +++ b/packages/db/src/types.ts @@ -209,7 +209,7 @@ export type UploadSession = { expires_at: string; created_at: string; finalized_at: string | null; - // Base Revision this publish inherits from (ADR 0087); null = full manifest. + // Base Revision this publish inherits from (ADR 0088); null = full manifest. base_revision_id: string | null; // Base paths this publish drops (distinguishes deleted from inherited at finalize). deleted_paths: string[]; @@ -239,7 +239,7 @@ export type StoredFile = { storage_kind?: StoredFileStorageKind; uploaded_at: string | null; put_url_expires_at?: string; - // ADR 0087 intra-file delta descriptor (recorded on upload_session_files only). + // ADR 0088 intra-file delta descriptor (recorded on upload_session_files only). // Both null (whole-file upload) or both set; jobs reconstructs the result blob. patch_base_sha256?: string | null; patch_result_sha256?: string | null; @@ -291,7 +291,7 @@ export type ReparentBlobMigrator = { migrate(input: { fromWorkspaceId: string; toWorkspaceId: string; blobs: readonly WorkspaceBlobRef[] }): Promise; }; -// ADR 0087 Stage 4: a patched file in a partial-manifest publish uploads only a unified +// ADR 0088 Stage 4: a patched file in a partial-manifest publish uploads only a unified // diff. Before the new Revision can commit, the diff is applied to the base blob and the // whole result stored as an ordinary content-addressed blob. The reconstructor takes // VALIDATED descriptors (base/result sha already checked against the base Revision's own @@ -347,6 +347,6 @@ export type RepositoryOptions = { billingEnabled?: boolean; /** Copies workspace blob bytes into the destination tenant before claim reparent commits. */ reparentBlobMigrator?: ReparentBlobMigrator; - /** Applies intra-file unified-diff patches to base blobs before a partial-manifest finalize commits (ADR 0087). */ + /** Applies intra-file unified-diff patches to base blobs before a partial-manifest finalize commits (ADR 0088). */ revisionReconstructor?: RevisionReconstructor; }; diff --git a/packages/db/src/validation.ts b/packages/db/src/validation.ts index f797ead2..e64d051c 100644 --- a/packages/db/src/validation.ts +++ b/packages/db/src/validation.ts @@ -6,7 +6,7 @@ export function validateUpload( files: Array<{ path: string; size_bytes: number }>, usagePolicy: Pick, entrypoint = "index.html", - // A partial-manifest publish (ADR 0087) validates the uploaded delta here for + // A partial-manifest publish (ADR 0088) validates the uploaded delta here for // per-file/count caps only; the entrypoint and artifact-size cap are checked // against the merged tree at finalize, where the inherited paths are known. options: { wholeTree?: boolean } = { wholeTree: true }, diff --git a/packages/storage/src/unified-diff.ts b/packages/storage/src/unified-diff.ts index eea29519..773b254f 100644 --- a/packages/storage/src/unified-diff.ts +++ b/packages/storage/src/unified-diff.ts @@ -1,4 +1,4 @@ -// ADR 0087 Stage 4 intra-file delta: apply an agent-uploaded unified diff to a base +// ADR 0088 Stage 4 intra-file delta: apply an agent-uploaded unified diff to a base // blob and commit the whole reconstructed result. A patch that cannot be applied // cleanly is a first-class, agent-visible CONFLICT (the agent re-submits a corrected // diff), never a silent failure. Reconstruction is byte-exact: the result digest must diff --git a/packages/storage/src/workspace-blob-bytes.ts b/packages/storage/src/workspace-blob-bytes.ts index f70bec73..21a631be 100644 --- a/packages/storage/src/workspace-blob-bytes.ts +++ b/packages/storage/src/workspace-blob-bytes.ts @@ -1,5 +1,5 @@ // Read and write workspace content-addressed blobs through the ADR 0063 encryption -// ring. ADR 0087 Stage 4 reconstruction (decrypt a base blob, apply a patch, store +// ring. ADR 0088 Stage 4 reconstruction (decrypt a base blob, apply a patch, store // the result blob) and Bundle generation both perform exactly this encrypt-and-store // / decrypt-by-sha dance; these helpers are the shared ends so neither re-derives the // blob AAD context or the object key by hand. diff --git a/scripts/smoke-local-patch.mjs b/scripts/smoke-local-patch.mjs index 32735cd3..a89af0b0 100644 --- a/scripts/smoke-local-patch.mjs +++ b/scripts/smoke-local-patch.mjs @@ -1,6 +1,6 @@ #!/usr/bin/env node import { spawn } from "node:child_process"; -// End-to-end smoke for ADR 0087 Stage 4 intra-file patch reconstruction. Unlike the +// End-to-end smoke for ADR 0088 Stage 4 intra-file patch reconstruction. Unlike the // unit/integration tests (which use a fake reconstructor), this drives the REAL path: // boots the local MVP server (real encryption ring + in-memory R2 that round-trips // ciphertext), publishes a base Revision with known bytes, then create-session with a From c08ffd4588a51f23e9e17e41e472c443a5b393f6 Mon Sep 17 00:00:00 2001 From: Isaac Suttell Date: Sun, 14 Jun 2026 17:51:28 -0700 Subject: [PATCH 07/14] feat(api,cli,mcp,contracts): agent file read-back + CLI incremental-revise diff client (ADR 0089) Stage 5 of the git-like revision model: give an agent without a working copy a way to read back exactly what's stored so it can produce a correct unified-diff revise, and make the CLI send only what changed. - sha256 on Agent View file entries (optional, non-breaking add) - new member-authed GET /v1/artifacts/{id}/file-content in the api worker: it decrypts the owning member's plaintext and returns { path, sha256, size_bytes, content_type, is_binary, body? }. Oversize text and binary files omit the body; oversize short-circuits before touching R2. Any decrypt/storage failure maps to storage_unavailable (503), never a 500. - MCP read_file tool forwarding to it; api-client artifacts.readFile() - CLI: per-artifact manifest cache (0600), a byte-exact unified-diff generator that self-checks (re-applies its own diff and verifies the digest before attaching it; a generator bug degrades to a whole-blob upload, never a finalize conflict), incremental revise wiring, a `pull` verb, and single-shot full-republish fallback when the cached base is unusable - finalize now carries the precise base-* repository kind as the error detail so the CLI self-heal fires for all base-unusable conditions, not just patch conflicts (the 5 base-* kinds collapse to invalid_request on the wire) ADR 0089 records the api-decrypts-member-plaintext trust-boundary decision. Co-Authored-By: Claude Opus 4.8 --- apps/api/package.json | 1 + apps/api/src/env.ts | 7 + apps/api/src/index.ts | 9 + .../src/routes/artifact-file-content.test.ts | 186 ++++++ apps/api/src/routes/artifact-file-content.ts | 105 ++++ apps/cli/src/help.ts | 42 ++ apps/cli/src/index.ts | 277 ++++---- apps/cli/src/local.ts | 17 + apps/cli/src/manifest-cache.test.ts | 51 ++ apps/cli/src/manifest-cache.ts | 96 +++ apps/cli/src/publish-format.ts | 106 ++++ apps/cli/src/revise.test.ts | 165 +++++ apps/cli/src/revise.ts | 151 +++++ apps/cli/src/unified-diff-gen.test.ts | 83 +++ apps/cli/src/unified-diff-gen.ts | 206 ++++++ apps/cli/test/index.test.ts | 211 ++++++- apps/mcp/src/tools.test.ts | 25 + apps/mcp/src/tools.ts | 15 + apps/mcp/src/transport.test.ts | 1 + apps/upload/src/finalize.test.ts | 33 + apps/upload/src/finalize.ts | 42 +- ...eritance-and-server-reconstructed-delta.md | 2 +- ...read-back-api-decrypts-member-plaintext.md | 137 ++++ docs/adr/README.md | 6 +- docs/ops/git-like-revisions-todo.md | 60 +- docs/specs/api.md | 3 + docs/specs/cli.md | 36 +- packages/api-client/src/index.ts | 18 + packages/api-client/src/publish.test.ts | 31 + packages/api-client/src/publish.ts | 39 +- packages/contracts/openapi/api.json | 592 ++++++++++++++++++ packages/contracts/src/agentView.ts | 6 + packages/contracts/src/artifacts.ts | 25 +- packages/contracts/src/mcp.test.ts | 1 + packages/contracts/src/mcp/registry.ts | 21 + packages/contracts/src/mcp/schemas.ts | 12 +- packages/contracts/src/mcp/tool-schemas.ts | 4 + packages/contracts/src/mvp-contracts.test.ts | 2 + .../contracts/src/openapi/api.artifacts.ts | 21 +- packages/contracts/src/openapi/api.helpers.ts | 17 + packages/contracts/src/openapi/shared.ts | 9 +- packages/contracts/src/primitives.ts | 3 + .../src/routes/registry.artifacts.ts | 20 + packages/contracts/src/uploadSessions.ts | 6 +- packages/db/src/agent-view.ts | 3 + packages/storage/src/index.ts | 7 +- packages/storage/src/unified-diff.ts | 2 +- .../src/route-repository-errors.ts | 8 + pnpm-lock.yaml | 3 + scripts/smoke-local-patch.mjs | 48 +- 50 files changed, 2762 insertions(+), 209 deletions(-) create mode 100644 apps/api/src/routes/artifact-file-content.test.ts create mode 100644 apps/api/src/routes/artifact-file-content.ts create mode 100644 apps/cli/src/help.ts create mode 100644 apps/cli/src/manifest-cache.test.ts create mode 100644 apps/cli/src/manifest-cache.ts create mode 100644 apps/cli/src/publish-format.ts create mode 100644 apps/cli/src/revise.test.ts create mode 100644 apps/cli/src/revise.ts create mode 100644 apps/cli/src/unified-diff-gen.test.ts create mode 100644 apps/cli/src/unified-diff-gen.ts create mode 100644 docs/adr/0089-agent-file-read-back-api-decrypts-member-plaintext.md diff --git a/apps/api/package.json b/apps/api/package.json index 11e149ca..10755338 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -32,6 +32,7 @@ "@agent-paste/contracts": "workspace:*", "@agent-paste/db": "workspace:*", "@agent-paste/rotation": "workspace:*", + "@agent-paste/storage": "workspace:*", "@agent-paste/tokens": "workspace:*", "@agent-paste/worker-runtime": "workspace:*", "@agent-paste/write-allowance": "workspace:*", diff --git a/apps/api/src/env.ts b/apps/api/src/env.ts index bab81637..bfeb426c 100644 --- a/apps/api/src/env.ts +++ b/apps/api/src/env.ts @@ -18,9 +18,16 @@ export type PaginationInput = { export type R2ListedObject = { key: string }; export type R2Objects = { objects: R2ListedObject[]; truncated: boolean; cursor?: string }; +export type R2GetObjectBody = { + body: ReadableStream | ArrayBuffer | Uint8Array | string | null | undefined; + customMetadata?: Record; +}; export type R2Bucket = { list(options: { prefix?: string; cursor?: string; limit?: number }): Promise; delete(keys: string | string[]): Promise; + // ADR 0089: the file-content read route decrypts a stored blob. This is + // the only read on api's R2 binding; every other api op lists or deletes. + get(key: string): Promise; }; export type KVNamespace = { diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index aa0419ab..5e2f633c 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -25,6 +25,7 @@ import { revokeAccessLinkRoute, } from "./routes/access-links.js"; import { getUsagePolicy, mcpWhoami, revokeCurrentApiKey, whoami } from "./routes/account.js"; +import { readArtifactFileContent } from "./routes/artifact-file-content.js"; import { billingCheckout, billingInvoices, @@ -202,6 +203,14 @@ apiDbRegistrar.mount(contractById("agentView.getRevision"), async (context, prin revisionId: context.req.param("revision_id") ?? "", }), ); +apiDbRegistrar.mount(contractById("artifacts.fileContent"), async (context, principal, db) => { + const revisionId = context.req.query("revision_id"); + return readArtifactFileContent(context as AppContext, principal, db, { + artifactId: context.req.param("artifact_id") ?? "", + path: context.req.query("path") ?? "", + ...(revisionId ? { revisionId } : {}), + }); +}); apiDbRegistrar.mount(contractById("revisions.list"), async (context, principal, db) => listRevisions(context as AppContext, principal, db, { artifactId: context.req.param("artifact_id") ?? "" }), ); diff --git a/apps/api/src/routes/artifact-file-content.test.ts b/apps/api/src/routes/artifact-file-content.test.ts new file mode 100644 index 00000000..ff6e1e75 --- /dev/null +++ b/apps/api/src/routes/artifact-file-content.test.ts @@ -0,0 +1,186 @@ +import { McpReadFileOutput } from "@agent-paste/contracts"; +import type { Repository } from "@agent-paste/db"; +import { + seedEncryptedWorkspaceBlob, + testArtifactBytesEncryptionEnv, +} from "@agent-paste/storage/test-helpers/encrypted-artifact-fixture"; +import { describe, expect, it } from "vitest"; +import { apiPrincipal, contextFor, nonePrincipal, responseJson, workspaceId } from "../../test/route-test-helpers.js"; +import type { Env, R2GetObjectBody } from "../env.js"; +import { readArtifactFileContent } from "./artifact-file-content.js"; + +// Real sha256 of the seeded plaintext so the route's row matches the blob key. +async function sha256Hex(text: string): Promise { + const digest = await crypto.subtle.digest("SHA-256", new TextEncoder().encode(text)); + return Array.from(new Uint8Array(digest)) + .map((b) => b.toString(16).padStart(2, "0")) + .join(""); +} + +function fakeR2(seed?: { key: string; body: Uint8Array; customMetadata?: Record }): Env["ARTIFACTS"] { + const store = new Map }>(); + if (seed) { + store.set(seed.key, { body: seed.body, customMetadata: seed.customMetadata }); + } + return { + async get(key: string): Promise { + return store.get(key) ?? null; + }, + async list() { + return { objects: [], truncated: false }; + }, + async delete() {}, + }; +} + +function dbWithFile(file: Record | null): Repository { + return { + async getAgentView() { + return file ? { workspace_id: workspaceId, files: [file] } : null; + }, + } as unknown as Repository; +} + +const ARTIFACT_ID = "art_00000000000000000000000001"; + +describe("artifacts.fileContent route", () => { + it("returns the decoded text body + sha256 for a text file", async () => { + const plaintext = "# Title\nhello\n"; + const sha = await sha256Hex(plaintext); + const seeded = await seedEncryptedWorkspaceBlob({ workspaceId, sha256: sha, plaintext }); + const env: Env = { + ...testArtifactBytesEncryptionEnv, + ARTIFACTS: fakeR2({ key: seeded.objectKey, body: seeded.body, customMetadata: seeded.customMetadata }), + }; + const file = { path: "index.md", sha256: sha, size_bytes: plaintext.length, content_type: "text/markdown" }; + + const response = await readArtifactFileContent(contextFor({ env }), apiPrincipal(), dbWithFile(file), { + artifactId: ARTIFACT_ID, + path: "index.md", + }); + + expect(response.status).toBe(200); + const json = await responseJson(response); + expect(json).toMatchObject({ path: "index.md", sha256: sha, is_binary: false, body: plaintext }); + // The strict MCP output contract must accept the real handler output unchanged + // (guards the strict-parse-500 class: no extra fields like object_key leak). + expect(McpReadFileOutput.safeParse(json).success).toBe(true); + }); + + it("flags binary content with is_binary and no body", async () => { + const plaintext = new Uint8Array([0xff, 0xfe, 0x00, 0x01]); + const sha = Array.from(new Uint8Array(await crypto.subtle.digest("SHA-256", plaintext))) + .map((b) => b.toString(16).padStart(2, "0")) + .join(""); + const seeded = await seedEncryptedWorkspaceBlob({ workspaceId, sha256: sha, plaintext }); + const env: Env = { + ...testArtifactBytesEncryptionEnv, + ARTIFACTS: fakeR2({ key: seeded.objectKey, body: seeded.body, customMetadata: seeded.customMetadata }), + }; + const file = { + path: "logo.bin", + sha256: sha, + size_bytes: plaintext.length, + content_type: "application/octet-stream", + }; + + const response = await readArtifactFileContent(contextFor({ env }), apiPrincipal(), dbWithFile(file), { + artifactId: ARTIFACT_ID, + path: "logo.bin", + }); + + const json = await responseJson<{ is_binary: boolean; body?: string }>(response); + expect(json.is_binary).toBe(true); + expect(json.body).toBeUndefined(); + expect(McpReadFileOutput.safeParse(json).success).toBe(true); + }); + + it("returns oversize text as metadata without reading R2", async () => { + let getCalled = false; + const env: Env = { + ...testArtifactBytesEncryptionEnv, + ARTIFACTS: { + async get() { + getCalled = true; + return null; + }, + async list() { + return { objects: [], truncated: false }; + }, + async delete() {}, + }, + }; + const sha = await sha256Hex("placeholder"); + const file = { path: "huge.txt", sha256: sha, size_bytes: 11 * 1024 * 1024, content_type: "text/plain" }; + + const response = await readArtifactFileContent(contextFor({ env }), apiPrincipal(), dbWithFile(file), { + artifactId: ARTIFACT_ID, + path: "huge.txt", + }); + + const json = await responseJson<{ is_binary: boolean; body?: string }>(response); + expect(getCalled).toBe(false); + expect(json.is_binary).toBe(false); + expect(json.body).toBeUndefined(); + }); + + it("404s when the path is not in the artifact or the row has no sha256", async () => { + const env: Env = { ...testArtifactBytesEncryptionEnv, ARTIFACTS: fakeR2() }; + const missing = await readArtifactFileContent(contextFor({ env }), apiPrincipal(), dbWithFile(null), { + artifactId: ARTIFACT_ID, + path: "index.md", + }); + expect(missing.status).toBe(404); + + const nullSha = await readArtifactFileContent( + contextFor({ env }), + apiPrincipal(), + dbWithFile({ path: "index.md", size_bytes: 1, content_type: "text/plain" }), + { artifactId: ARTIFACT_ID, path: "index.md" }, + ); + expect(nullSha.status).toBe(404); + }); + + it("401s without a workspace actor", async () => { + const response = await readArtifactFileContent( + contextFor({ env: testArtifactBytesEncryptionEnv }), + nonePrincipal(), + dbWithFile(null), + { artifactId: ARTIFACT_ID, path: "index.md" }, + ); + expect(response.status).toBe(401); + }); + + it("returns storage_unavailable when the blob is missing", async () => { + const sha = await sha256Hex("present-in-row-missing-in-r2"); + const env: Env = { ...testArtifactBytesEncryptionEnv, ARTIFACTS: fakeR2() }; + const file = { path: "index.md", sha256: sha, size_bytes: 10, content_type: "text/markdown" }; + + const response = await readArtifactFileContent(contextFor({ env }), apiPrincipal(), dbWithFile(file), { + artifactId: ARTIFACT_ID, + path: "index.md", + }); + expect(response.status).toBe(503); + }); + + it("returns storage_unavailable (not 500) when decryption fails on tampered ciphertext", async () => { + // A corrupt/auth-tag-rejected ciphertext throws a plain Error from the ring, not a + // WorkspaceBlob* error. It must still degrade to 503 (retryable), never a 500 (ADR 0089). + const plaintext = "secret\n"; + const sha = await sha256Hex(plaintext); + const seeded = await seedEncryptedWorkspaceBlob({ workspaceId, sha256: sha, plaintext }); + const tampered = new Uint8Array(seeded.body); + tampered[tampered.length - 1] ^= 0xff; // flip a ciphertext byte → AES-GCM auth tag fails + const env: Env = { + ...testArtifactBytesEncryptionEnv, + ARTIFACTS: fakeR2({ key: seeded.objectKey, body: tampered, customMetadata: seeded.customMetadata }), + }; + const file = { path: "index.md", sha256: sha, size_bytes: plaintext.length, content_type: "text/markdown" }; + + const response = await readArtifactFileContent(contextFor({ env }), apiPrincipal(), dbWithFile(file), { + artifactId: ARTIFACT_ID, + path: "index.md", + }); + expect(response.status).toBe(503); + }); +}); diff --git a/apps/api/src/routes/artifact-file-content.ts b/apps/api/src/routes/artifact-file-content.ts new file mode 100644 index 00000000..d3613e07 --- /dev/null +++ b/apps/api/src/routes/artifact-file-content.ts @@ -0,0 +1,105 @@ +import { Mebibytes } from "@agent-paste/contracts"; +import type { ApiActor, Repository } from "@agent-paste/db"; +import { artifactBytesEncryptionRingFromEnv } from "@agent-paste/rotation"; +import { decodeUtf8Strict, readWorkspaceBlobBytes } from "@agent-paste/storage"; +import type { Principal } from "@agent-paste/worker-runtime"; +import { getBoundResponders } from "@agent-paste/worker-runtime"; +import type { AppContext } from "../env.js"; +import { workspaceApiActor } from "../principals.js"; +import { contentBaseUrl } from "../runtime.js"; + +type FileContentParams = { artifactId: string; path: string; revisionId?: string }; + +// Reads one stored file's decrypted plaintext for the owning Workspace Member so +// an agent can diff against it and revise with a unified-diff patch (ADR 0089). +// The agent already owns the artifact and can fetch the same bytes via +// the signed content url, so returning plaintext here adds no confidentiality +// exposure; it just gives an agent without the working dir a base to diff. +// +// `getAgentView` resolves the artifact + revision + file set under the actor's +// workspace scope (RLS), so a cross-tenant read returns not_found. The blob key is +// DERIVED from the validated row's plaintext sha256 + the actor's workspace id, +// never from client input, and the encryption AAD binds both — a substituted key +// cannot decrypt. is_binary is byte-derived (true binary only). A file over the +// inline cap is returned as metadata with no body WITHOUT reading R2, so a single +// request never buffers a multi-megabyte decrypt (ADR 0063 intent). +export async function readArtifactFileContent( + context: AppContext, + principal: Principal, + db: Repository, + params: FileContentParams, +): Promise { + const env = context.env; + const responders = getBoundResponders(context); + const actor = workspaceApiActor(principal); + if (!actor) { + return responders.respondError("not_authenticated"); + } + if (!params.path) { + return responders.respondError("not_found"); + } + + const view = await db.getAgentView(buildViewInput(actor, params, contentBaseUrl(env))); + const file = view?.files.find((entry) => entry.path === params.path); + if (!file?.sha256) { + return responders.respondError("not_found"); + } + + // Oversize files are not inlined: return metadata only and skip the R2 read so a + // large file never forces a full decrypt into memory. body absent + is_binary + // false tells the agent "text, too big to inline — fetch via url / whole-blob". + if (file.size_bytes > Mebibytes.ten) { + return responders.respondJson({ + path: file.path, + sha256: file.sha256, + size_bytes: file.size_bytes, + content_type: file.content_type, + is_binary: false, + }); + } + + const ring = artifactBytesEncryptionRingFromEnv(env); + if (!ring || !env.ARTIFACTS) { + return responders.respondError("storage_unavailable"); + } + + let bytes: Uint8Array; + try { + bytes = await readWorkspaceBlobBytes({ + r2: env.ARTIFACTS, + workspaceId: actor.workspace_id, + sha256: file.sha256, + ring, + }); + } catch { + // readWorkspaceBlobBytes has exactly one success path (a clean decrypt of a + // present, well-formed blob). Every throw — missing object, bad/absent metadata, + // an unknown kid or AAD/auth-tag rejection from the ring — is an operational or + // crypto condition on a row we already validated, not a client error. All map to + // storage_unavailable (503, retryable), never a 500 (ADR 0089). + return responders.respondError("storage_unavailable"); + } + + const decoded = decodeUtf8Strict(bytes); + const isBinary = decoded === null; + return responders.respondJson({ + path: file.path, + sha256: file.sha256, + size_bytes: file.size_bytes, + content_type: file.content_type, + is_binary: isBinary, + ...(isBinary ? {} : { body: decoded }), + }); +} + +function buildViewInput(actor: ApiActor, params: FileContentParams, contentBase: string) { + const input: { actor: ApiActor; artifactId: string; revisionId?: string; contentBaseUrl: string } = { + actor, + artifactId: params.artifactId, + contentBaseUrl: contentBase, + }; + if (params.revisionId) { + input.revisionId = params.revisionId; + } + return input; +} diff --git a/apps/cli/src/help.ts b/apps/cli/src/help.ts new file mode 100644 index 00000000..ce5ee37f --- /dev/null +++ b/apps/cli/src/help.ts @@ -0,0 +1,42 @@ +export const HELP_TEXT = `agent-paste + +Usage: + agent-paste login + agent-paste logout + agent-paste whoami [--json] + agent-paste publish [--artifact-id ] [--title ] [--entrypoint ] [--render-mode ] [--ephemeral] [--json] + agent-paste pull [--revision-id ] [--json] + agent-paste make-public [--json] + agent-paste version [--json] + agent-paste upgrade [] + +Publish: + --artifact-id Revise an EXISTING Artifact: publishes a new Revision under it + instead of creating a new Artifact. The viewer link is stable and + live-updates pages already open — this is how you change published + work. On a revise the CLI sends only the files that changed (large + text files as a diff), inheriting the rest, so a one-line edit is a + small upload. Omit it to create a new Artifact on a new link. + Re-publishing an edit without --artifact-id strands the user's link. + --title Set the Artifact title. + --entrypoint Override the entrypoint file within . + --render-mode text | markdown | html (otherwise inferred from the entrypoint). + --ephemeral Accountless 24h publish with a one-time claim link (no login). + +Pull: + Read one file's stored content back (so you can edit it and revise). Prints the + text body to stdout (cat-like); --json adds sha256/size/is_binary. Binary or + oversize files have no inline body (fetch via the content URL). --revision-id + reads a specific Revision instead of the latest. + +Make public: + Publish keeps an Artifact private (the link is a login-walled viewer). To make + it reachable without login, run make-public : it creates (or + reuses) the Artifact's revocable Share Link and prints the public URL. + +Output: + --json Machine-readable JSON on stdout (stable, carries schema_version). + --quiet Suppress the human summary; errors and exit code still apply. + --color Force colour/rich output; --no-color forces plain. + Default: rich on a TTY, plain when piped or NO_COLOR/CI is set. +`; diff --git a/apps/cli/src/index.ts b/apps/cli/src/index.ts index 4c3fdfb6..e35ab667 100644 --- a/apps/cli/src/index.ts +++ b/apps/cli/src/index.ts @@ -11,8 +11,9 @@ import { runPublish as runSharedPublish, } from "@agent-paste/api-client"; import type { EphemeralProvisionResponse } from "@agent-paste/contracts"; -import { ArtifactId, RenderMode } from "@agent-paste/contracts"; +import { ArtifactId, FilePath, RenderMode, RevisionId } from "@agent-paste/contracts"; import { type Credential, deleteCredential, isCredentialExpired, loadCredential } from "./credentials.js"; +import { HELP_TEXT } from "./help.js"; import { contentTypeForLocalPath, inferPublishOptions, @@ -21,7 +22,19 @@ import { walkLocalPath, } from "./local.js"; import { login } from "./login.js"; +import { loadManifestCache, type ManifestCacheFile, saveManifestCache } from "./manifest-cache.js"; +import { + ephemeralClaimUrl, + formatEphemeralPublishResult, + formatMakePublic, + formatPublishResult, + type PublishResultShape, +} from "./publish-format.js"; import { apiClientTransport } from "./publish-transport.js"; + +// Re-exported for tests that import it from the CLI entrypoint. +export { ephemeralClaimUrl } from "./publish-format.js"; + import { createProgress, exitCodeFor, @@ -32,6 +45,7 @@ import { paint, resolveMode, } from "./render.js"; +import { buildRevisePlan, isBaseUnusableError, type LocalFileWithDigest, type RevisePlan } from "./revise.js"; import { commandInvocation, detectChannel, runUpdateCheck, signedOutHint } from "./update-check.js"; import { runUpgrade } from "./upgrade.js"; import { CLI_VERSION } from "./version.js"; @@ -99,6 +113,8 @@ async function dispatch(command: string, parsed: Parsed, client: ApiClient) { return publish(parsed, client); case "make-public": return makePublic(parsed, client); + case "pull": + return pull(parsed, client); default: throw new Error(`Unknown command: ${command}`); } @@ -242,6 +258,16 @@ export function shellQuote(value: string) { return `'${value.replace(/'/g, "'\\''")}'`; } +function wholePublishFile(file: LocalFileWithDigest): PublishFile { + return { + path: file.path, + sizeBytes: file.sizeBytes, + sha256: file.sha256, + contentType: contentTypeForLocalPath(file.path), + read: () => fs.readFile(file.absolutePath), + }; +} + async function publish(parsed: Parsed, client: ApiClient) { const mode = outputModeFor(parsed.global); const result = await runPublish(parsed, client, mode); @@ -279,36 +305,75 @@ async function runPublish(parsed: Parsed, client: ApiClient, mode: OutputMode) { const digestByPath = new Map( await Promise.all(files.map(async (file) => [file.path, await sha256HexForFile(file.absolutePath)] as const)), ); - - const publishFiles: PublishFile[] = files.map((file) => { + const filesWithDigest: LocalFileWithDigest[] = files.map((file) => { const digest = digestByPath.get(file.path); if (!digest) { throw new Error(`Missing digest for ${file.path}`); } - return { - path: file.path, - sizeBytes: digest.sizeBytes, - sha256: digest.sha256, - contentType: contentTypeForLocalPath(file.path), - read: () => fs.readFile(file.absolutePath), - }; + return { ...file, sha256: digest.sha256, sizeBytes: digest.sizeBytes }; }); + const wholeManifest = (): PublishFile[] => filesWithDigest.map(wholePublishFile); + const fullTree = (): ManifestCacheFile[] => + filesWithDigest.map((file) => ({ path: file.path, sha256: file.sha256, size_bytes: file.sizeBytes })); + const artifactIdFlag = stringFlag(parsed, "artifact-id"); const artifactId = artifactIdFlag ? ArtifactId.parse(artifactIdFlag) : undefined; + + // On a revise with a matching local cache, send only changed/added files (some + // as verified unified diffs) against the base Revision; unchanged files inherit. + // No cache (first publish elsewhere / fresh machine) => a full whole-blob publish. + const cache = artifactId ? await loadManifestCache(artifactId) : null; + const plan = + artifactId && cache + ? await buildRevisePlan({ client, artifactId, cache, files: filesWithDigest, entrypoint: inferred.entrypoint }) + : null; + const progress = createProgress(mode); - const outcome = await runSharedPublish(apiClientTransport(client), { - files: publishFiles, - title: inferred.title, - entrypoint: inferred.entrypoint, - ...(explicitRenderMode ? { renderMode: explicitRenderMode } : {}), - ...(artifactId ? { artifactId } : {}), - idempotencyKey: createIdempotencyKey("cli_publish"), - onUploadProgress: ({ uploadedFiles, totalToUpload, uploadedBytes }) => - progress.update({ done: uploadedFiles, total: totalToUpload, bytes: uploadedBytes }), - }); + const runOnce = (revise: RevisePlan | null) => + runSharedPublish(apiClientTransport(client), { + files: revise ? revise.publishFiles : wholeManifest(), + title: inferred.title, + entrypoint: inferred.entrypoint, + ...(explicitRenderMode ? { renderMode: explicitRenderMode } : {}), + ...(artifactId ? { artifactId } : {}), + ...(revise + ? { + baseRevisionId: RevisionId.parse(revise.baseRevisionId), + ...(revise.deletedPaths.length > 0 + ? { deletedPaths: revise.deletedPaths.map((p) => FilePath.parse(p)) } + : {}), + } + : {}), + idempotencyKey: createIdempotencyKey("cli_publish"), + onUploadProgress: ({ uploadedFiles, totalToUpload, uploadedBytes }) => + progress.update({ done: uploadedFiles, total: totalToUpload, bytes: uploadedBytes }), + }); + + let outcome: Awaited>; + try { + outcome = await runOnce(plan); + } catch (error) { + // A cached base that the server can no longer use (concurrent revise, retained + // base, non-inheritable file) is recoverable: drop the partial manifest and + // re-publish the whole working dir, which is always on disk. + if (plan && isBaseUnusableError(error)) { + progress.done(); + outcome = await runOnce(null); + } else { + throw error; + } + } progress.done(); + // Seed the cache with the full effective tree so the next revise diffs correctly. + if (outcome.result.artifact_id && outcome.result.revision_id) { + await saveManifestCache(outcome.result.artifact_id, { + revision_id: outcome.result.revision_id, + files: plan ? plan.effectiveTree : fullTree(), + }); + } + // Publish is content-only and private: one link to hand the user, the private // viewer URL (`/v/`), identical to what the MCP server returns. Going public // is a separate, explicit step (`agent-paste make-public `). @@ -341,16 +406,39 @@ async function makePublic(parsed: Parsed, client: ApiClient) { return output(payload, parsed.global, formatMakePublic(outputModeFor(parsed.global), payload)); } -function formatMakePublic(mode: OutputMode, payload: { public_url: string }) { - const label = (text: string) => paint(mode, "dim", text); - return [ - `${paint(mode, "green", "✓")} Public link created`, - "", - ` ${label("Public")} ${hyperlink(mode, payload.public_url)}`, - ` ${label("(anyone with this link can open it, no login; revoke to take it down)")}`, - "", - paint(mode, "cyan", ` → open ${payload.public_url}`), - ].join("\n"); +// Read one stored file's content for the owning member (ADR 0089). Default +// output is cat-like: the raw text body to stdout, so `agent-paste pull +// > file` works. --json emits structured metadata; binary content is base64 in +// json and refused in plain (raw bytes would corrupt a terminal / piped text). +async function pull(parsed: Parsed, client: ApiClient) { + const artifactId = ArtifactId.parse(requiredArg(parsed, 0, "artifact-id")); + const filePath = requiredArg(parsed, 1, "path"); + const revisionId = stringFlag(parsed, "revision-id"); + const file = await client.artifacts.readFile(artifactId, filePath, revisionId); + + if (parsed.global.json) { + return output( + { + path: file.path, + sha256: file.sha256, + size_bytes: file.size_bytes, + content_type: file.content_type, + is_binary: file.is_binary, + ...(file.body !== undefined ? { body: file.body } : {}), + }, + parsed.global, + ); + } + if (file.is_binary) { + throw new Error(`${file.path} is binary; use --json (base64) or the content URL`); + } + if (file.body === undefined) { + throw new Error(`${file.path} is ${file.size_bytes} bytes, too large to inline; fetch via the content URL`); + } + // The body IS pull's result (cat-like), not a human summary, so --quiet does not + // suppress it — like --quiet --json still emitting the object. Otherwise + // `pull --quiet > file` would silently write an empty file. + await writeStdout(file.body); } export function parseArgs(argv: string[]): Parsed { @@ -406,7 +494,7 @@ function commandParts(positionals: string[]) { } function takesValue(name: string) { - return new Set(["artifact-id", "title", "entrypoint", "render-mode", "name"]).has(name); + return new Set(["artifact-id", "title", "entrypoint", "render-mode", "name", "revision-id"]).has(name); } function requiredArg(parsed: Parsed, index: number, label: string) { @@ -468,133 +556,8 @@ function writeStdout(value: string) { }); } -type PublishResultShape = { - artifact_id: string; - revision_id: string; - title: string; - private_url: string; - revision_content_url: string; - agent_view_url: string; - expires_at: string; - upload_stats?: { - total_files: number; - total_bytes: number; - uploaded_files: number; - uploaded_bytes: number; - reused_files: number; - reused_bytes: number; - }; -}; - -// Render expires_at as a plain calendar date when it parses as an ISO instant; -// otherwise pass the raw value through unchanged. Never fabricate a date. -function formatExpiry(expiresAt: string) { - const date = new Date(expiresAt); - return Number.isNaN(date.getTime()) ? expiresAt : date.toISOString().slice(0, 10); -} - -function uploadStatsLine(mode: OutputMode, stats: NonNullable) { - const uploaded = paint(mode, "green", `${stats.uploaded_files}/${stats.total_files} uploaded`); - return ` ${paint(mode, "dim", "Upload")} ${uploaded}, ${stats.reused_files} reused · ${formatBytes(stats.uploaded_bytes)} sent, ${formatBytes(stats.reused_bytes)} cached`; -} - -// Human-readable publish result. The handoff leads with the live viewer URL, -// then shows the one command to revise this Artifact in place so the agent -// edits via add-revision (stable link, live-updates the open page) instead of -// republishing a new Artifact. Snapshot URLs stay on the JSON surface. -function formatPublishResult(mode: OutputMode, result: PublishResultShape, updateCommand: string) { - const label = (text: string) => paint(mode, "dim", text); - const privateUrl = result.private_url; - return [ - `${paint(mode, "green", "✓")} Published ${paint(mode, "bold", `"${result.title}"`)}`, - "", - ` ${label("View")} ${hyperlink(mode, privateUrl)}`, - ` ${label("Expires")} ${formatExpiry(result.expires_at)}`, - ...(result.upload_stats ? [uploadStatsLine(mode, result.upload_stats)] : []), - "", - ` ${label("Update")} ${updateCommand}`, - ` ${label("(revises this Artifact; same link live-updates the open page)")}`, - ...(privateUrl ? ["", paint(mode, "cyan", ` → open ${privateUrl}`)] : []), - ].join("\n"); -} - -export function ephemeralClaimUrl(claimToken: string) { - const base = (process.env.AGENT_PASTE_WEB_URL ?? "https://app.agent-paste.sh").replace(/\/+$/, ""); - return `${base}/claim#${claimToken}`; -} - -function formatEphemeralPublishResult(mode: OutputMode, result: PublishResultShape, claimUrl: string) { - assertClaimTokenNotInPublicUrls(result, claimUrl); - const label = (text: string) => paint(mode, "dim", text); - const privateUrl = result.private_url; - return [ - `${paint(mode, "green", "✓")} Published ${paint(mode, "bold", `"${result.title}"`)}`, - "", - paint(mode, "dim", "Open this to view, keep, and unlock your artifact:"), - ` ${label("Claim")} ${hyperlink(mode, claimUrl)}`, - ` ${label("Expires")} ${formatExpiry(result.expires_at)}`, - ...(result.upload_stats ? [uploadStatsLine(mode, result.upload_stats)] : []), - "", - paint(mode, "dim", "The token lives in the URL hash only (never the query string)."), - ...(privateUrl - ? ["", ` ${label("View")} ${hyperlink(mode, privateUrl)} ${paint(mode, "dim", "(works after claiming)")}`] - : []), - "", - paint(mode, "cyan", ` → open ${claimUrl}`), - ].join("\n"); -} - -function assertClaimTokenNotInPublicUrls(result: PublishResultShape, claimUrl: string) { - const claimToken = claimUrl.split("#")[1] ?? ""; - if (!claimToken || !claimUrl.includes("#")) { - throw new Error("Claim URL must carry the token in the URL hash"); - } - if (claimUrl.includes("?") && claimUrl.includes(claimToken)) { - throw new Error("Claim Token must not appear in the URL query string"); - } - if ( - result.private_url.includes(claimToken) || - result.revision_content_url.includes(claimToken) || - result.agent_view_url.includes(claimToken) - ) { - throw new Error("Claim Token must not appear in public Access Link Signed URLs"); - } -} - function printHelp() { - return writeStdout(`agent-paste - -Usage: - agent-paste login - agent-paste logout - agent-paste whoami [--json] - agent-paste publish [--artifact-id ] [--title ] [--entrypoint ] [--render-mode ] [--ephemeral] [--json] - agent-paste make-public [--json] - agent-paste version [--json] - agent-paste upgrade [] - -Publish: - --artifact-id Revise an EXISTING Artifact: publishes a new Revision under it - instead of creating a new Artifact. The viewer link is stable and - live-updates pages already open — this is how you change published - work. Omit it to create a new Artifact on a new link. Re-publishing - an edit without --artifact-id strands the link the user already has. - --title Set the Artifact title. - --entrypoint Override the entrypoint file within . - --render-mode text | markdown | html (otherwise inferred from the entrypoint). - --ephemeral Accountless 24h publish with a one-time claim link (no login). - -Make public: - Publish keeps an Artifact private (the link is a login-walled viewer). To make - it reachable without login, run make-public : it creates (or - reuses) the Artifact's revocable Share Link and prints the public URL. - -Output: - --json Machine-readable JSON on stdout (stable, carries schema_version). - --quiet Suppress the human summary; errors and exit code still apply. - --color Force colour/rich output; --no-color forces plain. - Default: rich on a TTY, plain when piped or NO_COLOR/CI is set. -`); + return writeStdout(HELP_TEXT); } export function isMainEntrypoint(metaUrl: string, argv1: string | undefined, platform = process.platform) { diff --git a/apps/cli/src/local.ts b/apps/cli/src/local.ts index 250adf60..4ee38db4 100644 --- a/apps/cli/src/local.ts +++ b/apps/cli/src/local.ts @@ -148,6 +148,23 @@ export async function sha256HexForFile(absolutePath: string): Promise { + tmp = await fs.mkdtemp(path.join(os.tmpdir(), "manifest-cache-test-")); + prevHome = process.env.XDG_CONFIG_HOME; + process.env.XDG_CONFIG_HOME = tmp; +}); +afterEach(async () => { + if (prevHome === undefined) { + delete process.env.XDG_CONFIG_HOME; + } else { + process.env.XDG_CONFIG_HOME = prevHome; + } + await fs.rm(tmp, { recursive: true, force: true }); +}); + +describe("manifest cache", () => { + it("round-trips a saved manifest", async () => { + const cache = { + revision_id: "rev_01HZY7Q8X9Y2S3T4V5W6X7Y8Z9", + files: [{ path: "a.txt", sha256: "a".repeat(64), size_bytes: 3 }], + }; + await saveManifestCache("art_1", cache); + expect(await loadManifestCache("art_1")).toEqual(cache); + }); + + it("returns null on a cache miss (no file)", async () => { + expect(await loadManifestCache("art_missing")).toBeNull(); + }); + + it("treats malformed JSON as a cache miss", async () => { + const filePath = manifestCachePath("art_bad"); + await fs.mkdir(path.dirname(filePath), { recursive: true }); + await fs.writeFile(filePath, "{ not json"); + expect(await loadManifestCache("art_bad")).toBeNull(); + }); + + it("treats a wrong-shape cache as a cache miss (schema drift)", async () => { + const filePath = manifestCachePath("art_drift"); + await fs.mkdir(path.dirname(filePath), { recursive: true }); + await fs.writeFile(filePath, JSON.stringify({ revision_id: 5, files: "nope" })); + expect(await loadManifestCache("art_drift")).toBeNull(); + }); +}); diff --git a/apps/cli/src/manifest-cache.ts b/apps/cli/src/manifest-cache.ts new file mode 100644 index 00000000..a1775bf9 --- /dev/null +++ b/apps/cli/src/manifest-cache.ts @@ -0,0 +1,96 @@ +import { promises as fs } from "node:fs"; +import path from "node:path"; +import { RevisionId } from "@agent-paste/contracts"; +import { configDir } from "./credentials.js"; + +// Per-artifact record of what the CLI last published, so a revise can diff the +// working dir against it and send only changed/added files + deleted_paths against +// base_revision_id (ADR 0089). Holds only paths, plaintext sha256, sizes, +// and the base revision id — no bytes, no secrets. A stale or corrupt cache can +// never cause a bad publish: the server re-validates base_revision_id and every +// patch base at finalize, and the revise path drops the cache and re-publishes +// whole on any base-unusable error. + +export type ManifestCacheFile = { path: string; sha256: string; size_bytes: number }; +export type ManifestCache = { revision_id: string; files: ManifestCacheFile[] }; + +function manifestsDir(): string { + return path.join(configDir(), "manifests"); +} + +export function manifestCachePath(artifactId: string): string { + return path.join(manifestsDir(), `${encodeURIComponent(artifactId)}.json`); +} + +// Validate the on-disk shape ourselves: any drift (older/newer CLI, hand-edit, +// truncation) is treated as a cache miss so the next publish is a correct full +// publish rather than a stale-base patch attempt. +function parseManifestCache(raw: string): ManifestCache | null { + let value: unknown; + try { + value = JSON.parse(raw); + } catch { + return null; + } + if (typeof value !== "object" || value === null) { + return null; + } + const candidate = value as { revision_id?: unknown; files?: unknown }; + // revision_id must be a well-formed id; a bad one (drift/corruption) is a cache + // miss so the next publish is a clean full publish, not a guaranteed bad base. + const revision = RevisionId.safeParse(candidate.revision_id); + if (!revision.success || !Array.isArray(candidate.files)) { + return null; + } + const files: ManifestCacheFile[] = []; + for (const file of candidate.files) { + if ( + typeof file !== "object" || + file === null || + typeof (file as ManifestCacheFile).path !== "string" || + typeof (file as ManifestCacheFile).sha256 !== "string" || + typeof (file as ManifestCacheFile).size_bytes !== "number" + ) { + return null; + } + const f = file as ManifestCacheFile; + files.push({ path: f.path, sha256: f.sha256, size_bytes: f.size_bytes }); + } + return { revision_id: revision.data, files }; +} + +export async function loadManifestCache(artifactId: string): Promise { + try { + return parseManifestCache(await fs.readFile(manifestCachePath(artifactId), "utf8")); + } catch (error) { + if (isNotFound(error)) { + return null; + } + throw error; + } +} + +export async function saveManifestCache(artifactId: string, cache: ManifestCache): Promise { + await fs.mkdir(manifestsDir(), { recursive: true, mode: 0o700 }); + const filePath = manifestCachePath(artifactId); + await rejectSymlink(filePath); + await fs.writeFile(filePath, JSON.stringify(cache), { mode: 0o600 }); + await fs.chmod(filePath, 0o600); +} + +function isNotFound(error: unknown): boolean { + return typeof error === "object" && error !== null && (error as { code?: string }).code === "ENOENT"; +} + +async function rejectSymlink(filePath: string): Promise { + try { + const stat = await fs.lstat(filePath); + if (stat.isSymbolicLink()) { + throw new Error(`Refusing to write manifest cache through symlink: ${filePath}`); + } + } catch (error) { + if (!isNotFound(error)) { + throw error; + } + } +} diff --git a/apps/cli/src/publish-format.ts b/apps/cli/src/publish-format.ts new file mode 100644 index 00000000..f975b16d --- /dev/null +++ b/apps/cli/src/publish-format.ts @@ -0,0 +1,106 @@ +import { formatBytes, hyperlink, type OutputMode, paint } from "./render.js"; + +export type PublishResultShape = { + artifact_id: string; + revision_id: string; + title: string; + private_url: string; + revision_content_url: string; + agent_view_url: string; + expires_at: string; + upload_stats?: { + total_files: number; + total_bytes: number; + uploaded_files: number; + uploaded_bytes: number; + reused_files: number; + reused_bytes: number; + }; +}; + +// Render expires_at as a plain calendar date when it parses as an ISO instant; +// otherwise pass the raw value through unchanged. Never fabricate a date. +export function formatExpiry(expiresAt: string): string { + const date = new Date(expiresAt); + return Number.isNaN(date.getTime()) ? expiresAt : date.toISOString().slice(0, 10); +} + +function uploadStatsLine(mode: OutputMode, stats: NonNullable): string { + const uploaded = paint(mode, "green", `${stats.uploaded_files}/${stats.total_files} uploaded`); + return ` ${paint(mode, "dim", "Upload")} ${uploaded}, ${stats.reused_files} reused · ${formatBytes(stats.uploaded_bytes)} sent, ${formatBytes(stats.reused_bytes)} cached`; +} + +// Human-readable publish result. The handoff leads with the live viewer URL, +// then shows the one command to revise this Artifact in place so the agent +// edits via add-revision (stable link, live-updates the open page) instead of +// republishing a new Artifact. Snapshot URLs stay on the JSON surface. +export function formatPublishResult(mode: OutputMode, result: PublishResultShape, updateCommand: string): string { + const label = (text: string) => paint(mode, "dim", text); + const privateUrl = result.private_url; + return [ + `${paint(mode, "green", "✓")} Published ${paint(mode, "bold", `"${result.title}"`)}`, + "", + ` ${label("View")} ${hyperlink(mode, privateUrl)}`, + ` ${label("Expires")} ${formatExpiry(result.expires_at)}`, + ...(result.upload_stats ? [uploadStatsLine(mode, result.upload_stats)] : []), + "", + ` ${label("Update")} ${updateCommand}`, + ` ${label("(revises this Artifact; same link live-updates the open page)")}`, + ...(privateUrl ? ["", paint(mode, "cyan", ` → open ${privateUrl}`)] : []), + ].join("\n"); +} + +export function ephemeralClaimUrl(claimToken: string): string { + const base = (process.env.AGENT_PASTE_WEB_URL ?? "https://app.agent-paste.sh").replace(/\/+$/, ""); + return `${base}/claim#${claimToken}`; +} + +export function formatEphemeralPublishResult(mode: OutputMode, result: PublishResultShape, claimUrl: string): string { + assertClaimTokenNotInPublicUrls(result, claimUrl); + const label = (text: string) => paint(mode, "dim", text); + const privateUrl = result.private_url; + return [ + `${paint(mode, "green", "✓")} Published ${paint(mode, "bold", `"${result.title}"`)}`, + "", + paint(mode, "dim", "Open this to view, keep, and unlock your artifact:"), + ` ${label("Claim")} ${hyperlink(mode, claimUrl)}`, + ` ${label("Expires")} ${formatExpiry(result.expires_at)}`, + ...(result.upload_stats ? [uploadStatsLine(mode, result.upload_stats)] : []), + "", + paint(mode, "dim", "The token lives in the URL hash only (never the query string)."), + ...(privateUrl + ? ["", ` ${label("View")} ${hyperlink(mode, privateUrl)} ${paint(mode, "dim", "(works after claiming)")}`] + : []), + "", + paint(mode, "cyan", ` → open ${claimUrl}`), + ].join("\n"); +} + +function assertClaimTokenNotInPublicUrls(result: PublishResultShape, claimUrl: string): void { + const claimToken = claimUrl.split("#")[1] ?? ""; + if (!claimToken || !claimUrl.includes("#")) { + throw new Error("Claim URL must carry the token in the URL hash"); + } + if (claimUrl.includes("?") && claimUrl.includes(claimToken)) { + throw new Error("Claim Token must not appear in the URL query string"); + } + if ( + result.private_url.includes(claimToken) || + result.revision_content_url.includes(claimToken) || + result.agent_view_url.includes(claimToken) + ) { + throw new Error("Claim Token must not appear in public Access Link Signed URLs"); + } +} + +export function formatMakePublic(mode: OutputMode, payload: { public_url: string }): string { + const label = (text: string) => paint(mode, "dim", text); + return [ + `${paint(mode, "green", "✓")} Public link created`, + "", + ` ${label("Public")} ${hyperlink(mode, payload.public_url)}`, + ` ${label("(anyone with this link can open it, no login; revoke to take it down)")}`, + "", + paint(mode, "cyan", ` → open ${payload.public_url}`), + ].join("\n"); +} diff --git a/apps/cli/src/revise.test.ts b/apps/cli/src/revise.test.ts new file mode 100644 index 00000000..8d35689c --- /dev/null +++ b/apps/cli/src/revise.test.ts @@ -0,0 +1,165 @@ +import { promises as fs } from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import type { ApiClient } from "@agent-paste/api-client"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import type { ManifestCache } from "./manifest-cache.js"; +import { buildRevisePlan, isBaseUnusableError, type LocalFileWithDigest } from "./revise.js"; + +let tmp: string; + +beforeEach(async () => { + tmp = await fs.mkdtemp(path.join(os.tmpdir(), "revise-test-")); +}); +afterEach(async () => { + await fs.rm(tmp, { recursive: true, force: true }); +}); + +async function writeFile(rel: string, content: string): Promise { + const abs = path.join(tmp, rel); + await fs.mkdir(path.dirname(abs), { recursive: true }); + await fs.writeFile(abs, content); + const { createHash } = await import("node:crypto"); + const bytes = new TextEncoder().encode(content); + return { + absolutePath: abs, + path: rel, + sizeBytes: bytes.byteLength, + sha256: createHash("sha256").update(bytes).digest("hex"), + }; +} + +function clientReturning(body: string, sha: string): ApiClient { + return { + artifacts: { + readFile: async () => ({ + path: "x", + sha256: sha, + size_bytes: body.length, + content_type: "text/plain", + is_binary: false, + body, + }), + }, + } as unknown as ApiClient; +} + +const ARTIFACT_ID = "art_1"; + +describe("buildRevisePlan", () => { + it("omits unchanged files (inherit) and uploads added files whole", async () => { + const unchanged = await writeFile("keep.txt", "same\n"); + const added = await writeFile("new.txt", "brand new\n"); + const cache: ManifestCache = { + revision_id: "rev_1", + files: [{ path: "keep.txt", sha256: unchanged.sha256, size_bytes: unchanged.sizeBytes }], + }; + + const plan = await buildRevisePlan({ + client: clientReturning("", ""), + artifactId: ARTIFACT_ID, + cache, + files: [unchanged, added], + entrypoint: "keep.txt", + }); + + expect(plan.publishFiles.map((f) => f.path)).toEqual(["new.txt"]); + expect(plan.baseRevisionId).toBe("rev_1"); + expect(plan.effectiveTree.map((f) => f.path).sort()).toEqual(["keep.txt", "new.txt"]); + }); + + it("sends a changed text file as a patch against the cached base", async () => { + // Large enough that a one-line diff is smaller than the whole file, so the + // patch path wins over whole-blob. + const lines = Array.from({ length: 200 }, (_, i) => `line number ${i} padded out a bit`); + const base = `${lines.join("\n")}\n`; + const nextLines = [...lines]; + nextLines[100] = "line number 100 EDITED"; + const next = `${nextLines.join("\n")}\n`; + const changed = await writeFile("doc.txt", next); + const { createHash } = await import("node:crypto"); + const baseSha = createHash("sha256").update(new TextEncoder().encode(base)).digest("hex"); + const cache: ManifestCache = { + revision_id: "rev_1", + files: [{ path: "doc.txt", sha256: baseSha, size_bytes: base.length }], + }; + + const plan = await buildRevisePlan({ + client: clientReturning(base, baseSha), + artifactId: ARTIFACT_ID, + cache, + files: [changed], + entrypoint: "doc.txt", + }); + + const entry = plan.publishFiles[0]; + expect(entry?.path).toBe("doc.txt"); + expect(entry?.patch?.baseSha256).toBe(baseSha); + expect(entry?.patch?.resultSha256).toBe(changed.sha256); + }); + + it("records deleted_paths for cached files absent locally, never the entrypoint", async () => { + const keep = await writeFile("index.html", "

hi

\n"); + const cache: ManifestCache = { + revision_id: "rev_1", + files: [ + { path: "index.html", sha256: keep.sha256, size_bytes: keep.sizeBytes }, + { path: "old.css", sha256: "a".repeat(64), size_bytes: 10 }, + { path: "stale.js", sha256: "b".repeat(64), size_bytes: 10 }, + ], + }; + + const plan = await buildRevisePlan({ + client: clientReturning("", ""), + artifactId: ARTIFACT_ID, + cache, + files: [keep], + entrypoint: "index.html", + }); + + expect(plan.deletedPaths.sort()).toEqual(["old.css", "stale.js"]); + }); + + it("falls back to a whole-blob upload when the read route reports binary base", async () => { + const changed = await writeFile("doc.txt", "new text\n"); + const cache: ManifestCache = { + revision_id: "rev_1", + files: [{ path: "doc.txt", sha256: "c".repeat(64), size_bytes: 5 }], + }; + const binaryClient = { + artifacts: { + readFile: async () => ({ + path: "doc.txt", + sha256: "c".repeat(64), + size_bytes: 5, + content_type: "text/plain", + is_binary: true, + }), + }, + } as unknown as ApiClient; + + const plan = await buildRevisePlan({ + client: binaryClient, + artifactId: ARTIFACT_ID, + cache, + files: [changed], + entrypoint: "doc.txt", + }); + + expect(plan.publishFiles[0]?.patch).toBeUndefined(); + }); +}); + +describe("isBaseUnusableError", () => { + it("matches base-unusable error codes", () => { + expect(isBaseUnusableError({ code: "patch_conflict" })).toBe(true); + expect(isBaseUnusableError({ code: "base_revision_not_found" })).toBe(true); + expect(isBaseUnusableError({ code: "inherited_path_not_blob_backed" })).toBe(true); + expect(isBaseUnusableError({ message: "patch_conflict: index.html: apply_failed" })).toBe(true); + }); + it("does not match unrelated errors", () => { + expect(isBaseUnusableError({ code: "not_authenticated" })).toBe(false); + expect(isBaseUnusableError(new Error("network down"))).toBe(false); + expect(isBaseUnusableError(null)).toBe(false); + }); +}); diff --git a/apps/cli/src/revise.ts b/apps/cli/src/revise.ts new file mode 100644 index 00000000..56c1c721 --- /dev/null +++ b/apps/cli/src/revise.ts @@ -0,0 +1,151 @@ +import { promises as fs } from "node:fs"; +import type { ApiClient, PublishFile } from "@agent-paste/api-client"; +import { contentTypeForLocalPath, isUtf8Text, type LocalFile } from "./local.js"; +import type { ManifestCache, ManifestCacheFile } from "./manifest-cache.js"; +import { diffWithSelfCheck } from "./unified-diff-gen.js"; + +export type LocalFileWithDigest = LocalFile & { sha256: string }; + +export type RevisePlan = { + // The files to send: changed + added only (some as patches). Unchanged files are + // omitted so they inherit from the base Revision by reference. + publishFiles: PublishFile[]; + baseRevisionId: string; + deletedPaths: string[]; + // The full effective tree of the new Revision (= the current working dir), to + // seed the manifest cache after a successful publish. + effectiveTree: ManifestCacheFile[]; +}; + +function wholeBlobFile(file: LocalFileWithDigest): PublishFile { + return { + path: file.path, + sizeBytes: file.sizeBytes, + sha256: file.sha256, + contentType: contentTypeForLocalPath(file.path), + read: () => fs.readFile(file.absolutePath), + }; +} + +function patchFile(file: LocalFileWithDigest, diffBytes: Uint8Array, baseSha256: string): PublishFile { + return { + path: file.path, + sizeBytes: diffBytes.byteLength, + // sha256 is omitted on the wire for a patched entry; this value is unused but + // keeps the PublishFile shape uniform. + sha256: file.sha256, + contentType: contentTypeForLocalPath(file.path), + read: () => diffBytes, + patch: { baseSha256, resultSha256: file.sha256 }, + }; +} + +// Decide how to send one changed text file: a unified diff when one can be +// generated and verified smaller, else the whole file. Any failure to read the +// base (oversize/binary base, 404, network) degrades to a whole-blob upload. +async function buildChangedFile( + client: ApiClient, + artifactId: string, + baseRevisionId: string, + file: LocalFileWithDigest, + baseSha256: string, +): Promise { + let nextBytes: Uint8Array; + try { + nextBytes = await fs.readFile(file.absolutePath); + } catch { + return wholeBlobFile(file); + } + if (!isUtf8Text(nextBytes)) { + return wholeBlobFile(file); + } + let base: Awaited>; + try { + base = await client.artifacts.readFile(artifactId, file.path, baseRevisionId); + } catch { + return wholeBlobFile(file); + } + if (base.is_binary || base.body === undefined) { + return wholeBlobFile(file); + } + const diffBytes = await diffWithSelfCheck({ + baseText: base.body, + baseSha256, + nextText: new TextDecoder().decode(nextBytes), + nextBytes, + expectedResultSha256: file.sha256, + }); + return diffBytes ? patchFile(file, diffBytes, baseSha256) : wholeBlobFile(file); +} + +/** + * Build a partial-manifest revise plan by diffing the working dir against the + * cached manifest (ADR 0089): unchanged files inherit by omission, changed + * text files are sent as verified unified diffs (whole-blob otherwise), added files + * upload whole, and removed files become deleted_paths. `entrypoint` is never + * deleted. Falls back to whole-blob for any file whose base cannot be diffed. + */ +export async function buildRevisePlan(input: { + client: ApiClient; + artifactId: string; + cache: ManifestCache; + files: LocalFileWithDigest[]; + entrypoint: string; +}): Promise { + const { client, artifactId, cache, files, entrypoint } = input; + const cachedBySha = new Map(cache.files.map((f) => [f.path, f])); + const localPaths = new Set(files.map((f) => f.path)); + + const publishFiles: PublishFile[] = []; + for (const file of files) { + const cached = cachedBySha.get(file.path); + if (cached && cached.sha256 === file.sha256) { + continue; // Unchanged: inherit from the base Revision by reference. + } + if (cached) { + publishFiles.push(await buildChangedFile(client, artifactId, cache.revision_id, file, cached.sha256)); + } else { + publishFiles.push(wholeBlobFile(file)); // Added file. + } + } + + const deletedPaths = cache.files.map((f) => f.path).filter((p) => !localPaths.has(p) && p !== entrypoint); + + const effectiveTree: ManifestCacheFile[] = files.map((f) => ({ + path: f.path, + sha256: f.sha256, + size_bytes: f.sizeBytes, + })); + + return { publishFiles, baseRevisionId: cache.revision_id, deletedPaths, effectiveTree }; +} + +// The repository kinds the server reports when a cached base is no longer usable +// (concurrent revise elsewhere, retained/deleted base, a non-inheritable base +// file). Any of these means "abandon the partial manifest and re-publish whole." +// These do not all arrive as distinct wire codes: a patch failure surfaces as wire +// code `patch_conflict`, but the five base-* kinds collapse to `invalid_request` +// with the kind attached as the error message detail (ADR 0089, finalize handler). +// So the message-substring match below is load-bearing, not just defensive — it is +// the only signal for the base-* kinds. +const BASE_UNUSABLE_CODES = new Set([ + "patch_conflict", + "patch_base_mismatch", + "base_revision_not_found", + "base_revision_not_publishable", + "base_revision_artifact_mismatch", + "deleted_path_not_in_base", + "inherited_path_not_blob_backed", +]); + +export function isBaseUnusableError(error: unknown): boolean { + const code = (error as { code?: unknown })?.code; + if (typeof code === "string" && BASE_UNUSABLE_CODES.has(code)) { + return true; + } + // Messages: `patch_conflict: : ` for a patch failure, or the bare + // base-* kind name for a collapsed-to-invalid_request base error. Both contain a + // BASE_UNUSABLE_CODES member as a substring. + const message = (error as { message?: unknown })?.message; + return typeof message === "string" && [...BASE_UNUSABLE_CODES].some((c) => message.includes(c)); +} diff --git a/apps/cli/src/unified-diff-gen.test.ts b/apps/cli/src/unified-diff-gen.test.ts new file mode 100644 index 00000000..9412b8dd --- /dev/null +++ b/apps/cli/src/unified-diff-gen.test.ts @@ -0,0 +1,83 @@ +import { createHash } from "node:crypto"; +import { applyUnifiedDiff } from "@agent-paste/storage"; +import { describe, expect, it } from "vitest"; +import { diffWithSelfCheck } from "./unified-diff-gen.js"; + +function sha(text: string): string { + return createHash("sha256").update(new TextEncoder().encode(text)).digest("hex"); +} + +async function roundTrip(baseText: string, nextText: string) { + const nextBytes = new TextEncoder().encode(nextText); + const diffBytes = await diffWithSelfCheck({ + baseText, + baseSha256: sha(baseText), + nextText, + nextBytes, + expectedResultSha256: sha(nextText), + }); + return { diffBytes, nextBytes }; +} + +// Every case: generate a diff, then independently apply it and assert the result is +// byte-identical to nextText (the same check the server runs at finalize). +const cases: Array<[name: string, base: string, next: string]> = [ + ["single line change", "hello world\n", "hello there\n"], + ["insert a line", "a\nb\nc\n", "a\nb\nB2\nc\n"], + ["delete a line", "a\nb\nc\n", "a\nc\n"], + [ + "replace middle of many", + Array.from({ length: 50 }, (_, i) => `line ${i}`).join("\n") + "\n", + Array.from({ length: 50 }, (_, i) => (i === 25 ? "CHANGED" : `line ${i}`)).join("\n") + "\n", + ], + ["CRLF preserved", "a\r\nb\r\nc\r\n", "a\r\nB\r\nc\r\n"], + ["BOM + non-ascii", "# Tïtle\ncafé\n", "# Tïtle\ncafé au lait\n"], + ["no trailing newline (base) -> newline", "a\nb", "a\nb\n"], + ["trailing newline -> no trailing newline", "a\nb\n", "a\nb"], + ["both no trailing newline", "a\nb", "a\nB"], + ["append without trailing newline", "a\n", "a\nb"], + ["empty base -> content", "", "new line\n"], + ["multibyte astral", "a\n😀\nb\n", "a\n😀🎉\nb\n"], +]; + +describe("diffWithSelfCheck", () => { + for (const [name, base, next] of cases) { + it(`round-trips: ${name}`, async () => { + const { diffBytes } = await roundTrip(base, next); + // Some cases produce a diff not smaller than the file (tiny files): then null + // is correct (caller whole-blobs). When a diff IS produced it must reconstruct. + if (diffBytes === null) { + return; + } + const applied = await applyUnifiedDiff({ + baseBytes: new TextEncoder().encode(base), + diffBytes, + expectedBaseSha256: sha(base), + expectedResultSha256: sha(next), + }); + expect(applied.ok).toBe(true); + if (applied.ok) { + expect(new TextDecoder().decode(applied.result)).toBe(next); + } + }); + } + + it("returns null for an unchanged file (no empty diff is ever emitted)", async () => { + const { diffBytes } = await roundTrip("same\n", "same\n"); + expect(diffBytes).toBeNull(); + }); + + it("returns null when the diff is not smaller than the new file", async () => { + // A total rewrite of a tiny file: the diff carries both sides, so it is larger. + const { diffBytes } = await roundTrip("x\n", "completely different content here\n"); + expect(diffBytes).toBeNull(); + }); + + it("produces a real saving on a large file with a one-line edit", async () => { + const base = Array.from({ length: 2000 }, (_, i) => `line number ${i} with some padding text`).join("\n") + "\n"; + const next = base.replace("line number 1000 with some padding text", "line number 1000 EDITED"); + const { diffBytes, nextBytes } = await roundTrip(base, next); + expect(diffBytes).not.toBeNull(); + expect(diffBytes!.byteLength).toBeLessThan(nextBytes.byteLength / 2); + }); +}); diff --git a/apps/cli/src/unified-diff-gen.ts b/apps/cli/src/unified-diff-gen.ts new file mode 100644 index 00000000..59f75d78 --- /dev/null +++ b/apps/cli/src/unified-diff-gen.ts @@ -0,0 +1,206 @@ +import { applyUnifiedDiff } from "@agent-paste/storage"; + +// Byte-exact unified-diff generator for the CLI patch-revise path (ADR 0089). +// It must produce output the storage applier (`applyUnifiedDiff`) reconstructs +// to the exact result bytes, so it NEVER normalizes line endings, BOM, or trailing +// newlines: lines split on raw LF (0x0a) only, a trailing CR stays in the line +// content, and a final line without a newline emits the "\ No newline" marker. +// +// The generator is best-effort, not minimal — `diffWithSelfCheck` applies the +// generated diff locally and verifies the result digest before any caller trusts +// it, so a suboptimal (but correct) diff is fine and a buggy one degrades to a +// whole-blob upload rather than a finalize conflict. + +const NO_NEWLINE_MARKER = "\\ No newline at end of file"; + +type Line = { text: string; hasTerminator: boolean }; + +// Split into lines on raw LF. The text excludes the terminating LF (matching the +// applier's content comparison) but keeps any CR, so CRLF round-trips. A trailing +// segment with no LF is a line without a terminator. +function splitLines(text: string): Line[] { + const lines: Line[] = []; + let start = 0; + for (let i = 0; i < text.length; i++) { + if (text[i] === "\n") { + lines.push({ text: text.slice(start, i), hasTerminator: true }); + start = i + 1; + } + } + if (start < text.length) { + lines.push({ text: text.slice(start), hasTerminator: false }); + } + return lines; +} + +// Longest-common-subsequence table over line text, walked back into an edit script. +type Op = { kind: "equal" | "delete" | "add"; oldIndex?: number; newIndex?: number }; + +function lcsOps(base: Line[], next: Line[]): Op[] { + const n = base.length; + const m = next.length; + const baseText = base.map((line) => line.text); + const nextText = next.map((line) => line.text); + // Flat (n+1)*(m+1) table; typed-array indexing is always a number (no undefined). + const width = m + 1; + const table = new Int32Array((n + 1) * width); + const at = (i: number, j: number) => table[i * width + j] ?? 0; + for (let i = n - 1; i >= 0; i--) { + for (let j = m - 1; j >= 0; j--) { + table[i * width + j] = baseText[i] === nextText[j] ? at(i + 1, j + 1) + 1 : Math.max(at(i + 1, j), at(i, j + 1)); + } + } + const ops: Op[] = []; + let i = 0; + let j = 0; + while (i < n && j < m) { + if (baseText[i] === nextText[j]) { + ops.push({ kind: "equal", oldIndex: i, newIndex: j }); + i++; + j++; + } else if (at(i + 1, j) >= at(i, j + 1)) { + ops.push({ kind: "delete", oldIndex: i }); + i++; + } else { + ops.push({ kind: "add", newIndex: j }); + j++; + } + } + while (i < n) ops.push({ kind: "delete", oldIndex: i++ }); + while (j < m) ops.push({ kind: "add", newIndex: j++ }); + return ops; +} + +const CONTEXT_LINES = 3; + +// A "\ No newline" marker is emitted immediately after the last line of a side when +// that line has no terminator. The applier reads it as "the preceding emitted line +// carries no trailing newline". +function lineBody(line: Line, prefix: string): string[] { + if (line.hasTerminator) { + return [`${prefix}${line.text}`]; + } + return [`${prefix}${line.text}`, NO_NEWLINE_MARKER]; +} + +type Hunk = { oldStart: number; oldLines: number; newStart: number; newLines: number; body: string[] }; + +// Group the LCS edit script into hunks, each carrying up to CONTEXT_LINES of +// unchanged context around its changes. Runs of >2*CONTEXT equal lines split the +// hunk so the diff stays small on large files. Line numbers are 1-based; oldLines +// counts context+deletes, newLines counts context+adds. +function buildHunks(base: Line[], next: Line[], ops: Op[]): Hunk[] { + const changeIndexes = ops.map((op, i) => (op.kind === "equal" ? -1 : i)).filter((i) => i >= 0); + if (changeIndexes.length === 0) { + return []; + } + const hunks: Hunk[] = []; + let i = 0; + while (i < changeIndexes.length) { + const startOp = changeIndexes[i] ?? 0; + const hunkStart = Math.max(0, startOp - CONTEXT_LINES); + // Extend through changes that are within 2*CONTEXT equal lines of each other. + let endOp = startOp; + let j = i; + while ( + j + 1 < changeIndexes.length && + (changeIndexes[j + 1] ?? 0) - (changeIndexes[j] ?? 0) <= 2 * CONTEXT_LINES + 1 + ) { + endOp = changeIndexes[j + 1] ?? endOp; + j++; + } + const hunkEnd = Math.min(ops.length - 1, endOp + CONTEXT_LINES); + hunks.push(materializeHunk(base, next, ops, hunkStart, hunkEnd)); + i = j + 1; + } + return hunks; +} + +type HunkAccumulator = { body: string[]; oldLines: number; newLines: number; oldStart: number; newStart: number }; + +function emitEqual(acc: HunkAccumulator, line: Line, oldIndex: number, newIndex: number): void { + if (acc.oldLines === 0) acc.oldStart = oldIndex; + if (acc.newLines === 0) acc.newStart = newIndex; + acc.body.push(...lineBody(line, " ")); + acc.oldLines++; + acc.newLines++; +} + +function emitDelete(acc: HunkAccumulator, line: Line, oldIndex: number): void { + if (acc.oldLines === 0) acc.oldStart = oldIndex; + acc.body.push(...lineBody(line, "-")); + acc.oldLines++; +} + +function emitAdd(acc: HunkAccumulator, line: Line, newIndex: number): void { + if (acc.newLines === 0) acc.newStart = newIndex; + acc.body.push(...lineBody(line, "+")); + acc.newLines++; +} + +function materializeHunk(base: Line[], next: Line[], ops: Op[], from: number, to: number): Hunk { + const acc: HunkAccumulator = { body: [], oldLines: 0, newLines: 0, oldStart: 0, newStart: 0 }; + for (let k = from; k <= to; k++) { + const op = ops[k]; + if (op?.kind === "equal" && op.oldIndex !== undefined && op.newIndex !== undefined) { + const line = base[op.oldIndex]; + if (line) emitEqual(acc, line, op.oldIndex, op.newIndex); + } else if (op?.kind === "delete" && op.oldIndex !== undefined) { + const line = base[op.oldIndex]; + if (line) emitDelete(acc, line, op.oldIndex); + } else if (op?.kind === "add" && op.newIndex !== undefined) { + const line = next[op.newIndex]; + if (line) emitAdd(acc, line, op.newIndex); + } + } + // 1-based; an empty side anchors at 0 so the applier's oldLines===0 rule applies. + return { + oldStart: acc.oldLines === 0 ? acc.oldStart : acc.oldStart + 1, + oldLines: acc.oldLines, + newStart: acc.newLines === 0 ? acc.newStart : acc.newStart + 1, + newLines: acc.newLines, + body: acc.body, + }; +} + +function buildDiff(base: Line[], next: Line[]): string { + const ops = lcsOps(base, next); + const hunks = buildHunks(base, next, ops); + const blocks = hunks.map( + (h) => `@@ -${h.oldStart},${h.oldLines} +${h.newStart},${h.newLines} @@\n${h.body.join("\n")}`, + ); + return `${blocks.join("\n")}\n`; +} + +/** + * Generate a unified diff from `baseText` to `nextText`, verify it reconstructs to + * the exact `nextBytes`, and return the diff bytes — or null when the file is + * unchanged, the diff is not smaller than the new file, or the self-check fails + * (caller should upload the whole file instead). `expectedResultSha256` is the + * plaintext digest the server will verify the reconstruction against. + */ +export async function diffWithSelfCheck(input: { + baseText: string; + baseSha256: string; + nextText: string; + nextBytes: Uint8Array; + expectedResultSha256: string; +}): Promise { + if (input.baseText === input.nextText) { + return null; + } + const base = splitLines(input.baseText); + const next = splitLines(input.nextText); + const diffText = buildDiff(base, next); + const diffBytes = new TextEncoder().encode(diffText); + if (diffBytes.byteLength >= input.nextBytes.byteLength) { + return null; + } + const applied = await applyUnifiedDiff({ + baseBytes: new TextEncoder().encode(input.baseText), + diffBytes, + expectedBaseSha256: input.baseSha256, + expectedResultSha256: input.expectedResultSha256, + }); + return applied.ok ? diffBytes : null; +} diff --git a/apps/cli/test/index.test.ts b/apps/cli/test/index.test.ts index 663071bb..ffa1c79a 100644 --- a/apps/cli/test/index.test.ts +++ b/apps/cli/test/index.test.ts @@ -2,7 +2,7 @@ import { promises as fs } from "node:fs"; import os from "node:os"; import path from "node:path"; import { pathToFileURL } from "node:url"; -import { afterEach, describe, expect, it, vi } from "vitest"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import type { Credential } from "../src/credentials.js"; import * as credentials from "../src/credentials.js"; import { isMainEntrypoint, logout, main, parseArgs, SCHEMA_VERSION, shellQuote } from "../src/index.js"; @@ -31,8 +31,27 @@ const revisionId = "rev_01HZY7Q8X9Y2S3T4V5W6X7Y8Z9"; const uploadSessionId = "upl_01HZY7Q8X9Y2S3T4V5W6X7Y8Z9"; const accessLinkId = "al_01HZY7Q8X9Y2S3T4V5W6X7Y8Z9"; -afterEach(() => { +// Sandbox the per-artifact manifest cache (ADR 0089) so publish tests do +// not write to the developer's real ~/.config/agent-paste. +let configHome: string | undefined; +let previousConfigHome: string | undefined; + +beforeEach(async () => { + previousConfigHome = process.env.XDG_CONFIG_HOME; + configHome = await fs.mkdtemp(path.join(os.tmpdir(), "agent-paste-cfg-")); + process.env.XDG_CONFIG_HOME = configHome; +}); + +afterEach(async () => { vi.restoreAllMocks(); + if (previousConfigHome === undefined) { + delete process.env.XDG_CONFIG_HOME; + } else { + process.env.XDG_CONFIG_HOME = previousConfigHome; + } + if (configHome) { + await fs.rm(configHome, { recursive: true, force: true }); + } }); function mockStdout() { @@ -510,6 +529,194 @@ describe("cli command dispatch", () => { } }); + it("retries a revise as a full whole-blob publish when the cached base is unusable", async () => { + mockStdout(); + const root = await fs.mkdtemp(path.join(os.tmpdir(), "agent-paste-cli-")); + try { + await fs.writeFile(path.join(root, "index.html"), "

Hello

"); + // Seed a manifest cache for this artifact whose base the server will reject. + const manifests = path.join(configHome ?? "", "agent-paste", "manifests"); + await fs.mkdir(manifests, { recursive: true }); + const staleRevisionId = "rev_01HZY7Q8X9Y2S3T4V5W6X7Y8Z0"; + await fs.writeFile( + path.join(manifests, `${encodeURIComponent(artifactId)}.json`), + JSON.stringify({ + revision_id: staleRevisionId, + files: [{ path: "gone.html", sha256: "a".repeat(64), size_bytes: 5 }], + }), + ); + const sessionResponse = { + upload_session_id: uploadSessionId, + artifact_id: artifactId, + revision_id: revisionId, + status: "pending", + expires_at: "2026-01-01T00:00:00.000Z", + files: [ + { + status: "upload_required", + path: "index.html", + put_url: "https://upload.test/index", + required_headers: {}, + expires_at: "2026-01-01T00:00:00.000Z", + }, + ], + }; + const create = vi + .fn() + .mockRejectedValueOnce(Object.assign(new Error("patch_conflict"), { code: "patch_conflict" })) + .mockResolvedValueOnce(sessionResponse); + const finalize = vi.fn().mockResolvedValue({ + upload_session_id: uploadSessionId, + artifact_id: artifactId, + revision_id: revisionId, + status: "draft", + title: "Published", + entrypoint: "index.html", + file_count: 1, + size_bytes: 14, + }); + const publish = vi.fn().mockResolvedValue({ + artifact_id: artifactId, + revision_id: revisionId, + title: "Published", + private_url: "https://app.test/v/art_1", + revision_content_url: "https://content.test/v/token/index.html", + agent_view_url: "https://api.test/agent-view", + expires_at: "2026-02-01T00:00:00.000Z", + }); + const client = fakeClient({ + usagePolicy: vi.fn().mockResolvedValue(usagePolicy), + uploadSessions: { create, finalize }, + revisions: { publish }, + putFile: vi.fn().mockResolvedValue(undefined), + }); + + await main(["publish", root, "--artifact-id", artifactId], client); + + expect(create).toHaveBeenCalledTimes(2); + // First attempt used the cached base; the retry dropped it and sent a full manifest. + expect(create.mock.calls[0]?.[0]).toMatchObject({ base_revision_id: staleRevisionId }); + expect(create.mock.calls[1]?.[0]).not.toHaveProperty("base_revision_id"); + } finally { + await removePublishFixture(root); + } + }); + + it("self-heals when finalize collapses a base-unusable error to invalid_request", async () => { + // The base-* repository kinds reach the wire as code `invalid_request` with the kind + // attached as the message detail (ADR 0089). This proves the CLI keys on that detail — + // rejecting on `finalize` (where base errors realistically fire), not `create`, and with + // a bare `invalid_request` code, so it fails if the detail signal regresses. + mockStdout(); + const root = await fs.mkdtemp(path.join(os.tmpdir(), "agent-paste-cli-")); + try { + await fs.writeFile(path.join(root, "index.html"), "

Hello

"); + const manifests = path.join(configHome ?? "", "agent-paste", "manifests"); + await fs.mkdir(manifests, { recursive: true }); + const staleRevisionId = "rev_01HZY7Q8X9Y2S3T4V5W6X7Y8Z0"; + await fs.writeFile( + path.join(manifests, `${encodeURIComponent(artifactId)}.json`), + JSON.stringify({ + revision_id: staleRevisionId, + files: [{ path: "gone.html", sha256: "a".repeat(64), size_bytes: 5 }], + }), + ); + const sessionResponse = { + upload_session_id: uploadSessionId, + artifact_id: artifactId, + revision_id: revisionId, + status: "pending", + expires_at: "2026-01-01T00:00:00.000Z", + files: [ + { + status: "upload_required", + path: "index.html", + put_url: "https://upload.test/index", + required_headers: {}, + expires_at: "2026-01-01T00:00:00.000Z", + }, + ], + }; + const create = vi.fn().mockResolvedValue(sessionResponse); + const finalize = vi + .fn() + // Collapsed wire shape: code is the generic invalid_request; the precise kind is the message. + .mockRejectedValueOnce(Object.assign(new Error("base_revision_not_found"), { code: "invalid_request" })) + .mockResolvedValueOnce({ + upload_session_id: uploadSessionId, + artifact_id: artifactId, + revision_id: revisionId, + status: "draft", + title: "Published", + entrypoint: "index.html", + file_count: 1, + size_bytes: 14, + }); + const publish = vi.fn().mockResolvedValue({ + artifact_id: artifactId, + revision_id: revisionId, + title: "Published", + private_url: "https://app.test/v/art_1", + revision_content_url: "https://content.test/v/token/index.html", + agent_view_url: "https://api.test/agent-view", + expires_at: "2026-02-01T00:00:00.000Z", + }); + const client = fakeClient({ + usagePolicy: vi.fn().mockResolvedValue(usagePolicy), + uploadSessions: { create, finalize }, + revisions: { publish }, + putFile: vi.fn().mockResolvedValue(undefined), + }); + + await main(["publish", root, "--artifact-id", artifactId], client); + + expect(create).toHaveBeenCalledTimes(2); + expect(finalize).toHaveBeenCalledTimes(2); + expect(create.mock.calls[0]?.[0]).toMatchObject({ base_revision_id: staleRevisionId }); + expect(create.mock.calls[1]?.[0]).not.toHaveProperty("base_revision_id"); + } finally { + await removePublishFixture(root); + } + }); + + it("pull writes the file body to stdout, and --quiet does not suppress it", async () => { + const body = "line one\nline two\n"; + const readFile = vi.fn().mockResolvedValue({ + path: "notes.md", + sha256: "b".repeat(64), + size_bytes: body.length, + content_type: "text/markdown", + is_binary: false, + body, + }); + const client = fakeClient({ artifacts: { readFile } }); + + const stdout = mockStdout(); + await main(["pull", artifactId, "notes.md"], client); + expect(stdoutValues(stdout).join("")).toBe(body); + stdout.mockRestore(); + + // The body IS the result (cat-like), so --quiet must not suppress it — otherwise + // `pull … --quiet > file` writes an empty file. + const quietStdout = mockStdout(); + await main(["pull", artifactId, "notes.md", "--quiet"], client); + expect(stdoutValues(quietStdout).join("")).toBe(body); + quietStdout.mockRestore(); + }); + + it("pull refuses a binary file in plain mode", async () => { + const readFile = vi.fn().mockResolvedValue({ + path: "logo.bin", + sha256: "c".repeat(64), + size_bytes: 4, + content_type: "application/octet-stream", + is_binary: true, + }); + const client = fakeClient({ artifacts: { readFile } }); + mockStdout(); + await expect(main(["pull", artifactId, "logo.bin"], client)).rejects.toThrow(/binary/); + }); + it("throws on unknown commands", async () => { await expect(main(["unknown"], fakeClient())).rejects.toThrow("Unknown command: unknown"); }); diff --git a/apps/mcp/src/tools.test.ts b/apps/mcp/src/tools.test.ts index d0e48f66..cbf1e8e3 100644 --- a/apps/mcp/src/tools.test.ts +++ b/apps/mcp/src/tools.test.ts @@ -211,6 +211,31 @@ describe("callMcpTool", () => { expect(result).toEqual({ ok: true, result: agentView }); }); + it("read_file forwards path + revision_id and returns the file content", async () => { + const artifactId = "art_01HZY7Q8X9Y2S3T4V5W6X7Y8Z9"; + const revisionId = "rev_01HZY7Q8X9Y2S3T4V5W6X7Y8Z9"; + const fileContent = { + path: "index.md", + sha256: "a".repeat(64), + size_bytes: 6, + content_type: "text/markdown", + is_binary: false, + body: "hello\n", + }; + const api = apiMock(["read"], Response.json(fileContent)); + const result = await callMcpTool( + "read_file", + { artifact_id: artifactId, path: "index.md", revision_id: revisionId }, + auth, + { api, upload, bearerToken: auth.bearerToken }, + ); + expect(result).toEqual({ ok: true, result: fileContent }); + const url = new URL(routeCall(api, 0).url); + expect(url.pathname.endsWith(`/artifacts/${artifactId}/file-content`)).toBe(true); + expect(url.searchParams.get("path")).toBe("index.md"); + expect(url.searchParams.get("revision_id")).toBe(revisionId); + }); + it("publish_artifact returns the private viewer link (content-only, private)", async () => { vi.stubGlobal( "fetch", diff --git a/apps/mcp/src/tools.ts b/apps/mcp/src/tools.ts index 90b978fb..f0bcd152 100644 --- a/apps/mcp/src/tools.ts +++ b/apps/mcp/src/tools.ts @@ -20,6 +20,8 @@ import { McpPublishArtifactOutput, type McpPublishRenderMode, type McpReadArtifactInput, + type McpReadFileInput, + McpReadFileOutput, type McpRevokeAccessLinkInput, McpRevokeAccessLinkOutput, type McpScope, @@ -93,6 +95,8 @@ export async function callMcpTool( return callListArtifacts(inputParsed.data as McpListArtifactsInput, deps); case "read_artifact": return callReadArtifact(inputParsed.data as McpReadArtifactInput, deps); + case "read_file": + return callReadFile(inputParsed.data as McpReadFileInput, deps); case "list_revisions": return callListRevisions(inputParsed.data as McpListRevisionsInput, deps); case "delete_artifact": @@ -287,6 +291,17 @@ async function callReadArtifact(input: McpReadArtifactInput, deps: McpToolDeps): return parseForwardResult(forwarded, AgentView, "agentView.getLatest"); } +async function callReadFile(input: McpReadFileInput, deps: McpToolDeps): Promise { + const forwarded = await forwardToApiRoute({ + api: deps.api, + routeId: "artifacts.fileContent", + params: { artifact_id: input.artifact_id }, + query: { path: input.path, revision_id: input.revision_id }, + bearerToken: deps.bearerToken, + }); + return parseForwardResult(forwarded, McpReadFileOutput, "artifacts.fileContent"); +} + async function callListRevisions(input: McpListRevisionsInput, deps: McpToolDeps): Promise { const forwarded = await forwardToApiRoute({ api: deps.api, diff --git a/apps/mcp/src/transport.test.ts b/apps/mcp/src/transport.test.ts index 0c655922..bcc53dbc 100644 --- a/apps/mcp/src/transport.test.ts +++ b/apps/mcp/src/transport.test.ts @@ -472,6 +472,7 @@ describe("MCP streamable HTTP transport", () => { "add_revision", "list_artifacts", "read_artifact", + "read_file", "list_revisions", "delete_artifact", "update_display_metadata", diff --git a/apps/upload/src/finalize.test.ts b/apps/upload/src/finalize.test.ts index 3a7cde21..329a06ec 100644 --- a/apps/upload/src/finalize.test.ts +++ b/apps/upload/src/finalize.test.ts @@ -261,6 +261,39 @@ describe("finalizeUploadSession", () => { await expectError(response, status, appCode); }); + // The five base-unusable kinds collapse to wire code invalid_request, so the precise + // kind must ride along as the message detail or the CLI cannot tell a stale base from + // a malformed request and never self-heals (ADR 0089). This is the server side of that + // contract: without the detail, message would be the bare "invalid_request". + it.each([ + "base_revision_not_found", + "base_revision_not_publishable", + "base_revision_artifact_mismatch", + "deleted_path_not_in_base", + "inherited_path_not_blob_backed", + ] as const)("surfaces base-unusable kind %s as invalid_request with the kind as detail", async (repositoryKind) => { + const db: Repository = { + async getUploadSession() { + return sessionRecord(); + }, + async finalizeUploadSession() { + throw new RepositoryError(repositoryKind); + }, + } as Repository; + + const response = await finalizeUploadSession( + await contextFor(SESSION_ID, { ARTIFACTS: completeArtifacts() }), + apiKeyPrincipal, + db, + guard, + ); + + expect(response.status).toBe(400); + const body = (await response.json()) as { error: { code: string; message: string } }; + expect(body.error.code).toBe("invalid_request"); + expect(body.error.message).toBe(repositoryKind); + }); + it("rethrows unmapped repository failures", async () => { const db: Repository = { async getUploadSession() { diff --git a/apps/upload/src/finalize.ts b/apps/upload/src/finalize.ts index 8aebd4c5..e0e93273 100644 --- a/apps/upload/src/finalize.ts +++ b/apps/upload/src/finalize.ts @@ -5,6 +5,7 @@ import { isRepositoryError, observeUploadSessionForFinalize, type Repository, + RepositoryErrorCode, repositoryErrorToAppError, } from "@agent-paste/db"; import { type GuardState, getBoundResponders, type Principal } from "@agent-paste/worker-runtime"; @@ -14,6 +15,20 @@ import { uploadSessionActor } from "./upload-actor.js"; type RouteId = (typeof routeContracts)[number]["id"]; type GuardFor = GuardState>; +// A cached partial-manifest base can become unusable between publish and finalize +// (a concurrent revise, a retained/GC'd base Revision, a non-blob inherited file). +// These all collapse to the wire code `invalid_request`, so we surface the precise +// repository kind as the error detail; the CLI keys on it to drop its manifest cache +// and re-publish the whole tree (ADR 0089). Without this, the agent's self-heal is +// indistinguishable from a genuinely malformed request and never fires. +const BASE_UNUSABLE_KINDS = new Set([ + RepositoryErrorCode.base_revision_not_found, + RepositoryErrorCode.base_revision_not_publishable, + RepositoryErrorCode.base_revision_artifact_mismatch, + RepositoryErrorCode.deleted_path_not_in_base, + RepositoryErrorCode.inherited_path_not_blob_backed, +]); + export async function finalizeUploadSession( context: AppContext, principal: Principal, @@ -58,17 +73,28 @@ export async function finalizeUploadSession( } const repositoryCode = repositoryErrorToAppError(error); if (repositoryCode) { - // A patch conflict carries the path + failure reason on the error cause so the - // agent learns which file to regenerate (ADR 0088). Other codes use their default - // message. - const detail = - repositoryCode === "patch_conflict" && isRepositoryError(error) && error.cause instanceof Error - ? error.cause.message - : undefined; - return getBoundResponders(context).respondError(repositoryCode, detail); + return getBoundResponders(context).respondError(repositoryCode, finalizeErrorDetail(repositoryCode, error)); } throw error; } return getBoundResponders(context).respondJson(FinalizeUploadSessionResponse.parse(result)); } + +// The error detail attached to a finalize failure so the agent can act on it. +// A patch conflict carries the path + failure reason on the error cause so the agent +// learns which file to regenerate (its message is already `patch_conflict: : `). +// A base-unusable kind is surfaced by name so the CLI can self-heal (see BASE_UNUSABLE_KINDS). +// Anything else falls through to the wire code's default message. +function finalizeErrorDetail(repositoryCode: string, error: unknown): string | undefined { + if (!isRepositoryError(error)) { + return undefined; + } + if (repositoryCode === "patch_conflict" && error.cause instanceof Error) { + return error.cause.message; + } + if (BASE_UNUSABLE_KINDS.has(error.kind)) { + return error.kind; + } + return undefined; +} diff --git a/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md b/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md index d845bb01..284bc783 100644 --- a/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md +++ b/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md @@ -7,7 +7,7 @@ gaps stand in the way today, both recorded as the missing half of 1. **No tree inheritance.** A new **Revision** must re-enumerate every path with its `sha256`. Workspace blob dedup ([ADR 0087](./0087-workspace-scoped-content-addressed-blob-deduplication.md)) - skips the unchanged *bytes*, but the client still walks and hashes the whole + skips the unchanged _bytes_, but the client still walks and hashes the whole directory and sends the full manifest. The smallest change an agent can express is "here is the entire new tree." 2. **No intra-file delta.** A blob is a whole file. One line changed in a 5 MB diff --git a/docs/adr/0089-agent-file-read-back-api-decrypts-member-plaintext.md b/docs/adr/0089-agent-file-read-back-api-decrypts-member-plaintext.md new file mode 100644 index 00000000..950fa7d8 --- /dev/null +++ b/docs/adr/0089-agent-file-read-back-api-decrypts-member-plaintext.md @@ -0,0 +1,137 @@ +# Agent File Read-Back: `api` Decrypts and Returns Member Plaintext + +[ADR 0088](./0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md) +gave the server everything it needs to accept a **partial-manifest publish with +per-file unified-diff patches**, and Stage 4 made reconstruction fail loud at +finalize (`patch_conflict`). But an agent can only _produce_ a correct unified +diff if it can read the file it is changing — with the exact bytes and the exact +plaintext `sha256` the server will validate the patch base against. An agent that +already has the working directory (the common CLI case) reads from disk; an agent +without it (MCP, a fresh session, another machine) had **no way to read a stored +file back**. `read_artifact` returns metadata only — paths, sizes, content types, +and a signed browser `url` — never the bytes and (until now) not even the `sha256`. + +Without a read-back, the patch loop cannot close for the no-working-dir case: the +agent would guess a base, the diff would fail to apply, and the Stage 4 conflict +flag-back would tell it "your base was wrong" with no way to fix it. + +## Decision + +Add a **member-authed file read-back** so an agent can fetch one stored file's +plaintext and its `sha256`, then diff against it. + +1. **`sha256` on the Agent View file entries.** `AgentViewFile` gains an optional + `sha256` (the plaintext content address). An agent compares it against its local + copy to decide what changed before reading anything back. Optional because + diff-only / draft rows have no materialized blob. + +2. **A new read route in `api`, not `content`.** `GET +/v1/artifacts/{artifact_id}/file-content?path=&revision_id=` (member-authed, + `read` scope, `actor` rate limit) returns + `{ path, sha256, size_bytes, content_type, is_binary, body? }`. The file path + travels as a query param because it may contain `/`, which route-path building + encodes and Hono `:param` will not match. `revision_id` pins the read to a + Revision (the CLI pins it to its cached base so the diff base and the inherit + base are the same Revision); absent means latest. + + - **`is_binary` is byte-derived, true binary only** (`decodeUtf8Strict(bytes) +=== null`, the same helper the diff applier uses). It is NOT "too big to + inline." A text file over the inline cap is `is_binary: false` with `body` + ABSENT; the agent reads that as "text, fetch via the content url or upload + whole, never a patch." + - **Oversize files skip the R2 read entirely.** If `size_bytes > 10 MiB` the + route returns metadata with no body WITHOUT reading or decrypting R2, so a + single request never buffers a multi-megabyte decrypt (honoring the ADR 0063 + no-buffering intent on the decrypt path). The inline cap reuses the existing + 10 MiB MCP text-body limit — no new magic number. + - Infra failures (missing ring/R2, decrypt/metadata error) map to + `storage_unavailable` (503), never `not_found` — a transient blob miss must + not look like a deleted file. + +3. **`api` now decrypts artifact bytes and returns plaintext.** This is the + boundary-relevant decision: until now only the **`content`** Worker decrypted + stored bytes on read ([ADR 0063](./0063-application-layer-encryption-for-artifact-bytes.md), + behind a content-gateway token, streamed). `api` already holds + `ARTIFACT_BYTES_ENCRYPTION_KEY` in its env but never exercised it on a read. + The new route reuses `readWorkspaceBlobBytes` (the Stage 4 helper) to decrypt a + single blob and return its plaintext over a member-authed JSON API. + + This does **not** widen the confidentiality boundary: the caller is the owning + **Workspace Member**, who already owns the artifact and can fetch the same bytes + through the signed `AgentViewFile.url`. Encryption defends the **platform tier** + (Cloudflare-side R2 misconfiguration / object-store insider), not the owning + member, so returning plaintext to that member is not a leak. `api` stays in the + [ADR 0045](./0045-secret-rotation-cadence-and-on-demand-tooling.md) key-rotation + set for `ARTIFACT_BYTES_*`. `content` is untouched — its no-DB isolation and + streamed whole-object decrypt are unchanged. + +4. **The blob key is never client-controlled.** The client supplies + `(artifact_id, path)`; the route resolves the file row under the actor's + **workspace scope** (RLS, via `getAgentView`), derives the object key from + `(workspace_id-from-actor, validated-row-sha256)`, and the AES-GCM AAD binds + both — a substituted key or sha cannot decrypt. A cross-tenant artifact returns + `not_found` with no existence oracle. This mirrors the Stage 4 reconstructor + seam exactly. + +5. **An MCP `read_file` tool** forwards to the same route (read-only parity for the + no-shell surface). MCP `add_revision` stays text-body-only: the patch-producing + path lives in the CLI, which has the working directory to diff. This is the + minimal change consistent with + [ADR 0084](./0084-cli-and-mcp-share-one-publish-path.md) (one shared publish + path, no duplicate implementation). + +6. **The CLI diff client (the ergonomics payoff).** The CLI caches the last + published manifest per artifact (`paths + sha256 + revision_id`) under + `configDir()`. On a revise (`publish --artifact-id`) it diffs the working dir + against the cache and sends only changed/added files + `deleted_paths` against + `base_revision_id`; unchanged files inherit by omission. A changed text file is + sent as a verified unified diff (whole-blob for binary or when the diff is not + smaller); the generator self-checks by applying its own diff and comparing the + result digest before attaching the patch, so a generator bug degrades to a + correct whole-blob upload, never a finalize conflict. **No size threshold** — + always diff changed text (KISS); the server byte-verifies regardless. + + A stale or unusable cached base (concurrent revise elsewhere, retained/deleted + base, a non-inheritable base file) makes finalize return one of a known set of + base-unusable errors; the CLI catches them, drops the cache, and re-publishes + the whole working directory once (the bytes are on disk). A corrupt or + schema-drifted cache is treated as a cache miss → full publish. The cache holds + no bytes and no secrets and is written `0o600`. + +## Considered Options + +- **Read-back in `content`.** Rejected. `content` has no DB (it cannot resolve + `(artifact_id, path)` → blob `sha256`) and serves via signed Access Link, not + member auth. Giving it a DB binding or a side channel breaks the isolation the + **Content Origin** exists to provide + ([ADR 0001](./0001-private-artifact-storage-behind-controlled-origin.md), + [ADR 0028](./0028-signed-url-tokens-for-content-gateway-authorization.md)). +- **Return raw base64 for every file.** Rejected. It bloats text reads ~33% and + pushes binary detection onto the agent; text body + an `is_binary` flag is the + ergonomic match for a diff-producing agent. +- **A patch byte-size threshold (only diff when much smaller).** Deferred. A + speculative magic number; the diff/self-check + the "not smaller → whole-blob" + guard already cap the worst case. Add a threshold only if a real workload proves + it pays off. +- **Extend MCP `add_revision` to carry a partial/patched file set.** Deferred. An + MCP agent sends a single inline body over JSON-RPC with no base bytes to diff; + the patch path belongs where the working directory is (the CLI). `read_file` + gives MCP the read half; the diff half stays CLI-only. + +## Consequences + +- An agent without the working directory can now read the true base and produce a + correct patch, closing the Stage 4 loop for the MCP / fresh-session case. +- `api` is a second byte-decryption surface. It is member-authed, per-actor rate + limited, size-capped before the read, and returns plaintext only to the owning + member. Future readers seeing `api` decrypt should not assume a leak — the + boundary is unchanged from ADR 0063. +- The `ArtifactFileContent` response is `.strict()` and is the MCP `read_file` + output contract; the route builds it from a fixed field set so no extra field + can leak and silently 500 the MCP parse (the class of bug behind earlier strict + envelope / null-revision incidents). A test asserts the real handler output + parses under the strict contract. +- Builds on [ADR 0088](./0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md); + amends [ADR 0063](./0063-application-layer-encryption-for-artifact-bytes.md)'s + "decrypt-on-read is `content`-only" note (now `content` + the member read route + in `api`). Defers Range serving, a patch threshold, and an MCP patch path. diff --git a/docs/adr/README.md b/docs/adr/README.md index 1df77fd0..df382772 100644 --- a/docs/adr/README.md +++ b/docs/adr/README.md @@ -39,8 +39,10 @@ This directory is the decision log for agent-paste: it records _why_ choices wer - [ADR 0083](./0083-local-repository-backend-enforces-run-scope.md) records that the local in-memory repository backend now enforces the **Run Scope** ([ADR 0070](./0070-repository-core-ports-and-adapters.md)) through a **Scoped View**, as a deliberate test-surface bug detector rather than a faithful RLS emulator. Under a workspace Run Scope a foreign read returns nothing (RLS-faithful), a foreign `insert` throws (loud and self-labeling, the one `set()` path), and every other foreign mutation no-ops; the platform Run Scope is unfiltered. It only closes a gap in the local backend's enforcement and does not change the production isolation model ([ADR 0044](./0044-workspace-isolation-via-postgres-rls.md) Postgres RLS is unchanged). - [ADR 0084](./0084-cli-and-mcp-share-one-publish-path.md) records that the **CLI** and **MCP** are two transports over one publish path: both call `runPublish` in `@agent-paste/api-client`, differing only behind a four-method `PublishTransport` seam (CLI over the HTTPS `ApiClient`, MCP over Worker service bindings). It forbids reintroducing a surface-specific publish implementation — the divergence that shipped the no-link-on-MCP and draft-`list_artifacts`-500 bugs. The shared module is exposed on the Worker-safe `@agent-paste/api-client/publish` subpath so the MCP bundle never pulls the Node-only `ApiClient`. The publish output is `{title, private_url, expires_at, upload_stats?}` with no `shared` field, per [ADR 0086](./0086-publish-is-content-only-private-first.md). It does not merge the two binaries; login/logout/upgrade, ephemeral provisioning, idempotency-key derivation, and output rendering stay caller-specific. - [ADR 0085](./0085-publish-returns-one-viewer-url.md) — **Status: Superseded by [ADR 0086](./0086-publish-is-content-only-private-first.md).** It recorded that publish (both surfaces, through [ADR 0084](./0084-cli-and-mcp-share-one-publish-path.md)) returns one `viewer_url` plus a `shared` boolean, private by default, where `viewer_url` switched between the authenticated **Private Link** and the public **Share Link**'s signed URL. ADR 0086 retired that switching field and the `share`/`shared` convention: the switching link lied on revise (it reported `shared:false` while a live Share Link still served the page) and the `share` flag put public-by-flag on the content-publish call. **Viewer URL** is removed from [`CONTEXT.md`](../../CONTEXT.md). -- [ADR 0086](./0086-publish-is-content-only-private-first.md) supersedes [ADR 0085](./0085-publish-returns-one-viewer-url.md): publish is content-only and private-first. `publish_artifact`, `add_revision`, and `agent-paste publish` accept no visibility input and return exactly one link — the **Private Link**, surfaced as `private_url`, a login-walled clean viewer at `/v/` for the owning **Workspace Member** (never the **Artifact Console** at `/artifacts/`). The `share`/`--share` inputs and the `shared` output bit are removed from every surface (CLI, MCP, the REST `PublishRevisionRequest` body, and `runPublish`); the server `PublishResult` renames `viewer_url`/`artifact_url` to `private_url` and drops `access_link_url`. Creating unauthenticated Share Link access is a separate explicit verb: `make_public` (MCP) and `agent-paste make-public` (CLI), replacing `create_share_link`, mint or reuse the one revocable **Share Link** and return its no-login **Access Link Signed URL**. `revoke_access_link`, `list_access_links`, and `create_revision_link` are unchanged; the [ADR 0047](./0047-access-link-signed-url-with-fragment-encoded-payload.md) Access Link grant model is untouched. [`CONTEXT.md`](../../CONTEXT.md) deletes **Viewer URL**, renames **Artifact URL** to **Artifact Console**, and retargets **Private Link** at the `/v` viewer. Amends [ADR 0084](./0084-cli-and-mcp-share-one-publish-path.md)'s output-shape note. -- [ADR 0087](./0087-public-artifacts-and-unlisted-share-links.md) records the planned split between unlisted Share Links and true Public Artifacts. Current shipped behavior is still ADR 0086: `make_public` / `agent-paste make-public` mint or reuse a Share Link. The future Public Artifact model gets a stable ID-only `/p/{publicId}` Public URL, frozen Public Version, soft Public Offline control, cacheable Public Version Assets, and operator-only Platform Lockdown for hard takedown. +- [ADR 0086](./0086-publish-is-content-only-private-first.md) supersedes [ADR 0085](./0085-publish-returns-one-viewer-url.md): publish is content-only and private-first. `publish_artifact`, `add_revision`, and `agent-paste publish` accept no visibility input and return exactly one link — the **Private Link**, surfaced as `private_url`, a login-walled clean viewer at `/v/` for the owning **Workspace Member** (never the **Artifact Console** at `/artifacts/`). The `share`/`--share` inputs and the `shared` output bit are removed from every surface (CLI, MCP, the REST `PublishRevisionRequest` body, and `runPublish`); the server `PublishResult` renames `viewer_url`/`artifact_url` to `private_url` and drops `access_link_url`. Going public is a separate explicit verb: `make_public` (MCP) and `agent-paste make-public` (CLI), replacing `create_share_link`, mint or reuse the one revocable **Share Link** and return its no-login **Access Link Signed URL**. `revoke_access_link`, `list_access_links`, and `create_revision_link` are unchanged; the [ADR 0047](./0047-access-link-signed-url-with-fragment-encoded-payload.md) Access Link grant model is untouched. [`CONTEXT.md`](../../CONTEXT.md) deletes **Viewer URL**, renames **Artifact URL** to **Artifact Console**, and retargets **Private Link** at the `/v` viewer. Amends [ADR 0084](./0084-cli-and-mcp-share-one-publish-path.md)'s output-shape note. +- [ADR 0087](./0087-workspace-scoped-content-addressed-blob-deduplication.md) records (retroactively) the shipped workspace-scoped content-addressed blob model: a client-supplied plaintext `sha256` lets `createUploadSession` return `reused` for files whose `(workspace_id, sha256, size_bytes)` blob already exists, so unchanged bytes skip the PUT. Blobs live at `workspaces/{wid}/blobs/sha256/{prefix}/{sha256}`, are encrypted under the [ADR 0063](./0063-application-layer-encryption-for-artifact-bytes.md) per-Workspace DEK with a path/revision-independent AAD `v2`, and are GC'd reference-counted without deleting the deterministic R2 object. Dedup is workspace-scoped (not global) to preserve the tenant boundary; the digest is verified on PUT. Whole-file only — the client must still re-enumerate the full manifest, which [ADR 0088](./0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md) addresses. +- [ADR 0088](./0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md) makes **Revisions** behave like Git commits so an agent can express "change this file" instead of the whole tree. Adds `revisions.parent_revision_id` and a `base_revision_id` + `deleted_paths` + partial-manifest publish contract where unlisted paths inherit the parent tree by reference (`api`-side merge, full `artifact_files` tree still materialized). Layers server-reconstructed intra-file delta on top: a changed file may be sent as a unified diff (text; whole-blob fallback for binary), reconstructed and re-hashed to a whole blob **synchronously at finalize in `upload`** so a patch that cannot apply fails the same publish call with an agent-visible `patch_conflict` (a broken patch never becomes a servable draft) and `content` plus the [ADR 0063](./0063-application-layer-encryption-for-artifact-bytes.md) encryption boundary are untouched. Builds on [ADR 0087](./0087-workspace-scoped-content-addressed-blob-deduplication.md); defers chunk stores, per-block AEAD, Range serving, global dedup, and dropping encryption. +- [ADR 0089](./0089-agent-file-read-back-api-decrypts-member-plaintext.md) closes the [ADR 0088](./0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md) patch loop for agents that lack the working directory. Adds an optional `sha256` to `AgentViewFile`, a member-authed `GET /v1/artifacts/{id}/file-content?path=&revision_id=` read route in **`api`** returning `{ path, sha256, size_bytes, content_type, is_binary, body? }` (text body when UTF-8 and ≤10 MiB; oversize/binary return metadata only, oversize skips the R2 read), and an MCP `read_file` tool. The boundary-relevant decision: **`api` now decrypts artifact bytes and returns plaintext** to the owning Member — it does not widen the [ADR 0063](./0063-application-layer-encryption-for-artifact-bytes.md) confidentiality boundary (the Member already owns the artifact and can fetch the same bytes via the signed `url`; encryption defends the platform tier, not the owner), and `content` is untouched. The blob key is derived from the RLS-scoped row's `sha256` + the actor's workspace, never client input. Adds the **CLI diff client** (per-artifact manifest cache + working-dir diff → partial manifest with verified unified diffs; stale-base → full-publish fallback) and an `agent-paste pull` verb; MCP `add_revision` stays text-body-only ([ADR 0084](./0084-cli-and-mcp-share-one-publish-path.md)). Amends ADR 0063's "decrypt-on-read is `content`-only" note. - [ADR 0021](./0021-id-based-r2-object-key-layout.md) is amended for revision file keys. The ADR originally described env-scoped file keys; shipped revision files and upload PUT targets use the legacy `artifacts/{artifactId}/revisions/{revisionId}/files/{path}` prefix. Derived bundles and env-scoped purge prefixes remain env-scoped. Current shapes are in [`docs/specs/data-model.md`](../specs/data-model.md#r2-object-key-layout). - [ADR 0062](./0062-two-layer-cache-for-hot-path-auth-lookups.md) is amended for the L2 synthetic cache URL. The ADR originally used `https://cache.agent-paste.internal/{namespace}/{key}`; the shipped helper uses `https://agent-paste.internal/cache/{namespace}/{key}`. Current behavior is in [`docs/specs/architecture.md`](../specs/architecture.md#auth-lookup-cache). - [`packages/contracts`](../../packages/contracts) and [`docs/specs/contracts.md`](../specs/contracts.md) are the canonical MVP implementation contract for Zod schemas, ID formats, and the route registry. ADRs provide rationale; contracts provide field-level implementation shape. diff --git a/docs/ops/git-like-revisions-todo.md b/docs/ops/git-like-revisions-todo.md index 40c047a2..abdd01a6 100644 --- a/docs/ops/git-like-revisions-todo.md +++ b/docs/ops/git-like-revisions-todo.md @@ -180,19 +180,35 @@ able to FAIL the finalize call. Finalize is also where the patch gate, the only - Done: big-file-small-edit uploads only the diff bytes; served file is byte-identical to applying the patch locally; `content` unchanged. -### Stage 5 - cli/mcp: the ergonomics payoff - -- CLI caches the last published manifest per artifact (paths + sha256 + revision - id) locally. On revise: diff the working dir against the cache; send only - changed/added files + `deleted_paths` against `base_revision_id`. **Unchanged - files are not re-hashed and not re-uploaded.** -- For a changed _text_ file above a size threshold, generate a unified diff - against the cached base and send the patch instead of the whole file. Below - threshold or binary: whole blob (cheaper than diff overhead). -- MCP `add_revision`: accept a partial file set + optional per-file patch, same - contract. This is the no-shell parity surface. -- Done: agent expresses "change one file" and the wire carries one diff; demo on - a multi-MB asset with a one-line edit. +### Stage 5 - cli/mcp: the ergonomics payoff + agent read-back (DONE) + +See [ADR 0089](../adr/0089-agent-file-read-back-api-decrypts-member-plaintext.md) +for the decision record. The headline gap Stage 5 surfaced: an agent could not +**read a stored file back** to diff against, which is the prerequisite for +producing a correct patch when it lacks the working dir. So Stage 5 shipped both +the read-back and the CLI diff client. + +- **Read-back.** `AgentViewFile` gains optional `sha256`. New member-authed `api` + route `GET /v1/artifacts/{id}/file-content?path=&revision_id=` returns + `{ path, sha256, size_bytes, content_type, is_binary, body? }` (text body when + UTF-8 and ≤10 MiB; oversize skips the R2 read and returns metadata; binary sets + `is_binary:true`, no body). `api` decrypts via `readWorkspaceBlobBytes` (the + Stage 4 helper) — the first `api` byte-decrypt surface, member-only, boundary + unchanged (ADR 0089). MCP gains a `read_file` tool forwarding to it. +- **CLI diff client.** The CLI caches the last published manifest per artifact + (`paths + sha256 + revision_id`) under `configDir()`. On revise + (`publish --artifact-id`): diff the working dir against the cache, send only + changed/added files + `deleted_paths` against `base_revision_id`; unchanged + files inherit by omission (not re-hashed, not re-uploaded). A changed text file + is sent as a unified diff (`apps/cli/src/unified-diff-gen.ts`) only when the + generator self-check (apply locally, verify result sha) passes AND the diff is + smaller; otherwise whole-blob. **No size threshold** (KISS). A stale/unusable + cached base → drop cache, re-publish whole once. New `agent-paste pull` verb + reads a file back cat-like. +- **MCP `add_revision` stays text-body-only** (ADR 0084): the patch path needs a + working dir, so it lives in the CLI; MCP gets read parity via `read_file`. +- Done: `pnpm smoke:local:patch` proves the partial+patch path end to end; a + large file with a one-line edit uploads only the diff and serves byte-identical. ## Non-goals / deferred @@ -205,10 +221,14 @@ able to FAIL the finalize call. Finalize is also where the patch gate, the only ## Open questions -- Patch byte threshold for choosing diff vs whole-blob upload (measure; start - conservative, e.g. only diff when `diff_size < 0.5 * file_size` AND file - > a few hundred KB). -- RESOLVED: reconstruction runs in `jobs` (seam-honest; `upload` is write-only - today, `jobs` already does the read-decrypt-transform-reencrypt-write shape). - Remaining sub-question: exact pending-state model for a Revision whose Publish - waits on reconstruction (reuse Bundle `pending` machinery vs a new state). +- RESOLVED: no patch byte threshold. The CLI always sends a unified diff for a + changed text file and a whole blob for binary (KISS; no magic numbers). The + server byte-verifies and flags conflicts regardless of diff size, so a + not-worth-it diff costs a few bytes of overhead, not correctness. Add a + threshold only if a real large-file-frequent-edit workload proves it pays off. +- RESOLVED: reconstruction runs SYNCHRONOUSLY at finalize in the `upload` worker, + not async in `jobs`. The conflict flag-back is the feature: a patch that cannot + apply must FAIL the same finalize call with an agent-visible `patch_conflict`, + so a broken patch never becomes a servable draft. There is therefore no + pending-state model and no `reconstruction_status`. See the ADR 0088 Stage 4 + implementation notes. diff --git a/docs/specs/api.md b/docs/specs/api.md index d9124ed0..0c9ed917 100644 --- a/docs/specs/api.md +++ b/docs/specs/api.md @@ -76,6 +76,7 @@ Authenticated `api` and `upload` routes enforce guards in a fixed order | `GET` | `/v1/whoami` | `cli_credential` | none | - | `WhoamiResponse` | | `GET` | `/v1/mcp/whoami` | `mcp_oauth` | none | - | `McpWhoamiResponse` | | `GET` | `/v1/artifacts/{artifact_id}/revisions` | `cli_or_mcp` | none | - | `RevisionListResponse` | +| `GET` | `/v1/artifacts/{artifact_id}/file-content` | `cli_or_mcp` | none | - | `ArtifactFileContent` | | `POST` | `/v1/artifacts/{artifact_id}/revisions/{revision_id}/publish` | `cli_or_mcp` | required | - | `PublishResult` | | `GET` | `/v1/public/agent-view/{token}` | `signed_agent_view_token` | none | - | `PublicAgentView` | @@ -85,6 +86,8 @@ Authenticated `api` and `upload` routes enforce guards in a fixed order `PublicAgentView` is public to anyone with the signed token. It returns full per-file signed content URLs, not `content_prefix`, and does not include lockdown metadata. Authenticated owner/member Agent View routes may include explicit lockdown metadata for dashboard-visible locked Artifacts. +`file-content` reads one stored file's decrypted plaintext for the owning Workspace Member so an agent can diff against it and revise with a unified-diff patch ([ADR 0089](../adr/0089-agent-file-read-back-api-decrypts-member-plaintext.md)). Inputs: `?path=` (required; query, not a path segment, since a file path may contain `/`) and `?revision_id=` (optional; defaults to latest). The response `ArtifactFileContent` is `{ path, sha256, size_bytes, content_type, is_binary, body? }`: `body` is the decoded UTF-8 text and is present only when the file is text and `≤ 10 MiB`. `is_binary` is byte-derived (true binary only); a text file over the inline cap returns `is_binary: false` with `body` absent (the agent fetches it via the content URL or uploads a whole blob), and an oversize file is returned as metadata **without reading R2**. This is the only `api` route that decrypts artifact bytes; the blob key is derived from the RLS-scoped row's plaintext `sha256` plus the actor's workspace, never from client input, and a missing/undecryptable blob is `storage_unavailable` (503), never `not_found`. `AgentView` file entries also carry an optional plaintext `sha256` so an agent can detect what changed before reading a file back. + ## Upload Routes | Method | Path | Auth | Idempotency | Request | Response | diff --git a/docs/specs/cli.md b/docs/specs/cli.md index 3b6cc0b4..77946e2d 100644 --- a/docs/specs/cli.md +++ b/docs/specs/cli.md @@ -32,7 +32,9 @@ automatic; flags override detection. `... --json | jq` and `... > out.json` clean. - `--quiet` suppresses the human summary on stdout. Errors and the exit code still apply. `--quiet --json` still prints the JSON object (the object is the - point of `--json`); `--quiet` without `--json` prints nothing on success. + point of `--json`); `--quiet` without `--json` prints nothing on success. The + exception is `pull`, whose file body _is_ the result (cat-like), so `--quiet` + never suppresses it — otherwise `pull … --quiet > file` would write an empty file. ## JSON contract @@ -43,9 +45,35 @@ automatic; flags override detection. - `publish` is content-only and private: it emits one handoff link, `private_url` (the login-walled clean viewer at `/v/` for a Workspace Member) — the same field the MCP server returns. There is no `--share` input and no - `shared` output bit. Creating an unlisted no-login handoff is the separate - `make-public` command, which currently mints or reuses the one Share Link and - prints its no-login Access Link Signed URL. + `shared` output bit. Making an Artifact public is the separate `make-public` + command, which mints or reuses the one Share Link and prints its no-login + Access Link Signed URL. +- `pull [--revision-id ]` reads one stored file back + ([ADR 0089](../adr/0089-agent-file-read-back-api-decrypts-member-plaintext.md)). + Default output is cat-like (the raw text body to stdout, so `pull … > file` + works); `--json` emits `{ schema_version, path, sha256, size_bytes, is_binary, +body? }`. A binary file has no inline body: `--json` reports `is_binary: true` + with no `body`, and plain mode errors (raw bytes would corrupt the stream). An + oversize text file likewise has no `body`; fetch it via the content URL. + +## Incremental revise (manifest cache + diffs) + +On a revise (`publish --artifact-id `), the CLI sends only what +changed instead of the whole tree ([ADR 0089](../adr/0089-agent-file-read-back-api-decrypts-member-plaintext.md)). +It caches the last published manifest per artifact (`paths + sha256 + revision_id`) +under the CLI config dir and, on the next revise, diffs the working dir against +that cache: unchanged files inherit by reference (not re-hashed, not re-uploaded), +removed files become `deleted_paths`, and changed text files are sent as a +verified unified diff against `base_revision_id` (whole blob for binary or when the +diff is not smaller). The diff generator self-checks (applies its own diff and +verifies the result digest) before attaching a patch, so a generator bug degrades +to a correct whole-blob upload, never a finalize conflict. There is no diff size +threshold. If the cached base is no longer usable on the server (a concurrent +revise elsewhere, a retained/deleted base, or a non-inheritable base file), the +CLI drops the cache and re-publishes the whole working directory once; a corrupt +or schema-drifted cache is treated as a cache miss. The cache holds no bytes and +no secrets and is written `0600`. + - Errors in `json` mode are emitted on **stderr** as `{ "error": { "code", "message", "docs?" } }` (no `schema_version` — it is an error envelope, not a result). diff --git a/packages/api-client/src/index.ts b/packages/api-client/src/index.ts index 2ad935f3..a6b03f82 100644 --- a/packages/api-client/src/index.ts +++ b/packages/api-client/src/index.ts @@ -1,6 +1,7 @@ import { type AccessLinkId, AccessLinkSignedUrl, + ArtifactFileContent, type ArtifactId, type CreateAccessLinkRequest, CreateAccessLinkResponse, @@ -157,6 +158,23 @@ export class ApiClient { this.request(RevisionListResponse, this.apiBaseUrl, `/v1/artifacts/${encodeURIComponent(artifactId)}/revisions`), }; + artifacts = { + // Read one stored file's decrypted plaintext + sha256 so the caller can diff + // against it for a patch revise (ADR 0089). revisionId pins the read + // to a specific Revision; omit for the latest. + readFile: (artifactId: ArtifactId | string, path: string, revisionId?: RevisionId | string) => { + const query = new URLSearchParams({ path }); + if (revisionId) { + query.set("revision_id", String(revisionId)); + } + return this.request( + ArtifactFileContent, + this.apiBaseUrl, + `/v1/artifacts/${encodeURIComponent(artifactId)}/file-content?${query.toString()}`, + ); + }, + }; + ephemeral = { provision: (options: EphemeralProvisionOptions = {}) => this.provisionEphemeralWorkspace(options), }; diff --git a/packages/api-client/src/publish.test.ts b/packages/api-client/src/publish.test.ts index dfcfeb8d..9cdb6e92 100644 --- a/packages/api-client/src/publish.test.ts +++ b/packages/api-client/src/publish.test.ts @@ -145,4 +145,35 @@ describe("runPublish", () => { await runPublish(transport, input({ onUploadProgress })); expect(onUploadProgress).toHaveBeenCalledWith({ uploadedFiles: 1, totalToUpload: 1, uploadedBytes: 11 }); }); + + it("sends base_revision_id + deleted_paths for a partial-manifest revise", async () => { + const createUploadSession = vi.fn(fakeTransport().transport.createUploadSession); + const { transport } = fakeTransport({ createUploadSession }); + await runPublish(transport, input({ baseRevisionId: REVISION_ID as never, deletedPaths: ["old.md" as never] })); + const body = createUploadSession.mock.calls[0]?.[0]; + expect(body).toMatchObject({ base_revision_id: REVISION_ID, deleted_paths: ["old.md"] }); + }); + + it("encodes a patched file as a diff descriptor and omits its sha256", async () => { + const createUploadSession = vi.fn(fakeTransport().transport.createUploadSession); + const { transport } = fakeTransport({ createUploadSession }); + const patched = textFile({ + patch: { baseSha256: "b".repeat(64) as never, resultSha256: "c".repeat(64) as never }, + }); + await runPublish(transport, input({ files: [patched], baseRevisionId: REVISION_ID as never })); + const entry = (createUploadSession.mock.calls[0]?.[0] as { files: Record[] }).files[0]; + expect(entry).toEqual({ + path: "index.md", + size_bytes: 11, + patch: { base_sha256: "b".repeat(64), format: "unified", result_sha256: "c".repeat(64) }, + }); + expect(entry).not.toHaveProperty("sha256"); + }); + + it("omits deleted_paths when empty", async () => { + const createUploadSession = vi.fn(fakeTransport().transport.createUploadSession); + const { transport } = fakeTransport({ createUploadSession }); + await runPublish(transport, input({ deletedPaths: [] })); + expect(createUploadSession.mock.calls[0]?.[0]).not.toHaveProperty("deleted_paths"); + }); }); diff --git a/packages/api-client/src/publish.ts b/packages/api-client/src/publish.ts index b85c2287..36b3f493 100644 --- a/packages/api-client/src/publish.ts +++ b/packages/api-client/src/publish.ts @@ -2,6 +2,7 @@ import type { ArtifactId, CreateUploadSessionRequest, CreateUploadSessionResponse, + FilePath, FinalizeUploadSessionResponse, IdempotencyKey, PlainTextTitle, @@ -10,13 +11,24 @@ import type { RenderMode, RevisionId, Sha256Hex, + UploadSessionFileInput, UploadSessionId, } from "@agent-paste/contracts"; +/** A unified-diff patch a changed file is sent as instead of whole bytes (ADR 0089). */ +export type PublishFilePatch = { + baseSha256: Sha256Hex; + resultSha256: Sha256Hex; +}; + /** * One file to publish, with its bytes available on demand. The caller computes * the digest (CLI from disk, MCP from the in-memory body); `read` is only * invoked for targets the server reports as `upload_required`. + * + * When `patch` is set, `read` returns the unified-diff bytes (not the whole file) + * and `sizeBytes`/`sha256` describe that diff; the server reconstructs and + * re-hashes the whole file to `patch.resultSha256` at finalize. */ export type PublishFile = { path: string; @@ -24,6 +36,7 @@ export type PublishFile = { sha256: Sha256Hex; contentType: string; read: () => Promise | Uint8Array; + patch?: PublishFilePatch; }; export type PublishInput = { @@ -34,6 +47,13 @@ export type PublishInput = { renderMode?: RenderMode; /** Present => publish a new Revision on an existing Artifact. */ artifactId?: ArtifactId; + /** + * Present => a partial-manifest publish: `files` lists only changed/added paths + * (some possibly as patches), `deletedPaths` drops paths, and every other path + * inherits from this base Revision by reference (ADR 0089). + */ + baseRevisionId?: RevisionId; + deletedPaths?: FilePath[]; /** Opaque, caller-supplied (CLI nonce, MCP deterministic). The module never derives its own. */ idempotencyKey: IdempotencyKey; /** Optional per-file upload progress (CLI rich-mode spinner). Called after each upload. */ @@ -136,13 +156,30 @@ export async function runPublish(transport: PublishTransport, input: PublishInpu function buildCreateSessionRequest(input: PublishInput): CreateUploadSessionRequest { return { ...(input.artifactId ? { artifact_id: input.artifactId } : {}), + ...(input.baseRevisionId ? { base_revision_id: input.baseRevisionId } : {}), title: input.title, entrypoint: input.entrypoint, ...(input.renderMode ? { render_mode: input.renderMode } : {}), - files: input.files.map((file) => ({ path: file.path, size_bytes: file.sizeBytes, sha256: file.sha256 })), + ...(input.deletedPaths && input.deletedPaths.length > 0 ? { deleted_paths: input.deletedPaths } : {}), + // A patched entry omits sha256 (the contract forbids both) and carries the + // diff descriptor; the uploaded bytes are the diff and size_bytes is its size. + files: input.files.map((file) => buildFileEntry(file)), } as CreateUploadSessionRequest; } +function buildFileEntry(file: PublishFile): UploadSessionFileInput { + const entry = file.patch + ? { + path: file.path, + size_bytes: file.sizeBytes, + patch: { base_sha256: file.patch.baseSha256, format: "unified", result_sha256: file.patch.resultSha256 }, + } + : { path: file.path, size_bytes: file.sizeBytes, sha256: file.sha256 }; + // path/sha256 are branded contract types; the runtime values are plain strings + // the server validates. The brand is erased at the wire boundary. + return entry as unknown as UploadSessionFileInput; +} + async function asBytes(value: Promise | Uint8Array): Promise { return value instanceof Uint8Array ? value : await value; } diff --git a/packages/contracts/openapi/api.json b/packages/contracts/openapi/api.json index e0d1499d..637c170f 100644 --- a/packages/contracts/openapi/api.json +++ b/packages/contracts/openapi/api.json @@ -702,6 +702,10 @@ "url": { "type": "string", "format": "uri" + }, + "sha256": { + "type": "string", + "pattern": "^[a-f0-9]{64}$" } }, "required": [ @@ -922,6 +926,10 @@ "url": { "type": "string", "format": "uri" + }, + "sha256": { + "type": "string", + "pattern": "^[a-f0-9]{64}$" } }, "required": [ @@ -1160,6 +1168,44 @@ "bundle" ] }, + "ArtifactFileContent": { + "type": "object", + "properties": { + "path": { + "type": "string", + "minLength": 1, + "maxLength": 4096 + }, + "sha256": { + "type": "string", + "pattern": "^[a-f0-9]{64}$" + }, + "size_bytes": { + "type": "integer", + "minimum": 0 + }, + "content_type": { + "type": "string", + "minLength": 1, + "maxLength": 200 + }, + "is_binary": { + "type": "boolean" + }, + "body": { + "type": "string", + "maxLength": 10485760 + } + }, + "required": [ + "path", + "sha256", + "size_bytes", + "content_type", + "is_binary" + ], + "additionalProperties": false + }, "AccessLinkResolveRequest": { "type": "object", "properties": { @@ -21811,6 +21857,552 @@ } } }, + "/v1/artifacts/{artifact_id}/file-content": { + "get": { + "operationId": "artifacts.fileContent", + "summary": "Read one stored file's decrypted plaintext for the owning member.", + "security": [ + { + "ApiKeyBearer": [] + } + ], + "parameters": [ + { + "schema": { + "type": "string" + }, + "required": true, + "name": "artifact_id", + "in": "path", + "description": "Artifact id." + }, + { + "schema": { + "type": "string" + }, + "required": true, + "name": "path", + "in": "query", + "description": "File path within the artifact tree." + }, + { + "schema": { + "type": "string" + }, + "required": false, + "name": "revision_id", + "in": "query", + "description": "Revision to read; defaults to the latest." + }, + { + "schema": { + "type": "string", + "minLength": 8, + "maxLength": 128, + "description": "Caller-supplied request id. Worker echoes it back; non-matching values are replaced with a UUID." + }, + "required": false, + "description": "Caller-supplied request id. Worker echoes it back; non-matching values are replaced with a UUID.", + "name": "X-Request-Id", + "in": "header" + } + ], + "responses": { + "200": { + "description": "Success (200)", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ArtifactFileContent" + } + } + } + }, + "400": { + "description": "Error envelope", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "object", + "properties": { + "code": { + "type": "string", + "enum": [ + "api_key_not_found", + "api_key_revoked", + "artifact_not_found", + "database_unavailable", + "draft_revision_conflict", + "entrypoint_not_in_revision", + "file_count_cap_exceeded", + "file_size_cap_exceeded", + "forbidden", + "idempotency_in_flight", + "invalid_auth", + "invalid_content_length", + "invalid_cursor", + "invalid_idempotency_key", + "invalid_request", + "not_authenticated", + "not_found", + "patch_conflict", + "pinned_artifact_cap_exceeded", + "rate_limited_actor", + "rate_limited_artifact", + "rate_limited_workspace", + "ephemeral_provision_rate_limited", + "ephemeral_provision_unavailable", + "pow_required", + "pow_invalid", + "revision_retained", + "revision_size_cap_exceeded", + "revision_unpublished", + "storage_unavailable", + "unexpected_upload_object", + "upload_incomplete", + "upload_session_expired", + "upload_session_not_found", + "usage_policy_exceeded", + "write_allowance_exceeded", + "revision_ceiling_exceeded" + ] + }, + "message": { + "type": "string" + }, + "docs": { + "type": "string", + "format": "uri" + }, + "request_id": { + "type": "string", + "minLength": 1 + } + }, + "required": [ + "code", + "message" + ] + } + }, + "required": [ + "error" + ] + } + } + } + }, + "401": { + "description": "Error envelope", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "object", + "properties": { + "code": { + "type": "string", + "enum": [ + "api_key_not_found", + "api_key_revoked", + "artifact_not_found", + "database_unavailable", + "draft_revision_conflict", + "entrypoint_not_in_revision", + "file_count_cap_exceeded", + "file_size_cap_exceeded", + "forbidden", + "idempotency_in_flight", + "invalid_auth", + "invalid_content_length", + "invalid_cursor", + "invalid_idempotency_key", + "invalid_request", + "not_authenticated", + "not_found", + "patch_conflict", + "pinned_artifact_cap_exceeded", + "rate_limited_actor", + "rate_limited_artifact", + "rate_limited_workspace", + "ephemeral_provision_rate_limited", + "ephemeral_provision_unavailable", + "pow_required", + "pow_invalid", + "revision_retained", + "revision_size_cap_exceeded", + "revision_unpublished", + "storage_unavailable", + "unexpected_upload_object", + "upload_incomplete", + "upload_session_expired", + "upload_session_not_found", + "usage_policy_exceeded", + "write_allowance_exceeded", + "revision_ceiling_exceeded" + ] + }, + "message": { + "type": "string" + }, + "docs": { + "type": "string", + "format": "uri" + }, + "request_id": { + "type": "string", + "minLength": 1 + } + }, + "required": [ + "code", + "message" + ] + } + }, + "required": [ + "error" + ] + } + } + } + }, + "404": { + "description": "Error envelope", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "object", + "properties": { + "code": { + "type": "string", + "enum": [ + "api_key_not_found", + "api_key_revoked", + "artifact_not_found", + "database_unavailable", + "draft_revision_conflict", + "entrypoint_not_in_revision", + "file_count_cap_exceeded", + "file_size_cap_exceeded", + "forbidden", + "idempotency_in_flight", + "invalid_auth", + "invalid_content_length", + "invalid_cursor", + "invalid_idempotency_key", + "invalid_request", + "not_authenticated", + "not_found", + "patch_conflict", + "pinned_artifact_cap_exceeded", + "rate_limited_actor", + "rate_limited_artifact", + "rate_limited_workspace", + "ephemeral_provision_rate_limited", + "ephemeral_provision_unavailable", + "pow_required", + "pow_invalid", + "revision_retained", + "revision_size_cap_exceeded", + "revision_unpublished", + "storage_unavailable", + "unexpected_upload_object", + "upload_incomplete", + "upload_session_expired", + "upload_session_not_found", + "usage_policy_exceeded", + "write_allowance_exceeded", + "revision_ceiling_exceeded" + ] + }, + "message": { + "type": "string" + }, + "docs": { + "type": "string", + "format": "uri" + }, + "request_id": { + "type": "string", + "minLength": 1 + } + }, + "required": [ + "code", + "message" + ] + } + }, + "required": [ + "error" + ] + } + } + } + }, + "409": { + "description": "Error envelope", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "object", + "properties": { + "code": { + "type": "string", + "enum": [ + "api_key_not_found", + "api_key_revoked", + "artifact_not_found", + "database_unavailable", + "draft_revision_conflict", + "entrypoint_not_in_revision", + "file_count_cap_exceeded", + "file_size_cap_exceeded", + "forbidden", + "idempotency_in_flight", + "invalid_auth", + "invalid_content_length", + "invalid_cursor", + "invalid_idempotency_key", + "invalid_request", + "not_authenticated", + "not_found", + "patch_conflict", + "pinned_artifact_cap_exceeded", + "rate_limited_actor", + "rate_limited_artifact", + "rate_limited_workspace", + "ephemeral_provision_rate_limited", + "ephemeral_provision_unavailable", + "pow_required", + "pow_invalid", + "revision_retained", + "revision_size_cap_exceeded", + "revision_unpublished", + "storage_unavailable", + "unexpected_upload_object", + "upload_incomplete", + "upload_session_expired", + "upload_session_not_found", + "usage_policy_exceeded", + "write_allowance_exceeded", + "revision_ceiling_exceeded" + ] + }, + "message": { + "type": "string" + }, + "docs": { + "type": "string", + "format": "uri" + }, + "request_id": { + "type": "string", + "minLength": 1 + } + }, + "required": [ + "code", + "message" + ] + } + }, + "required": [ + "error" + ] + } + } + } + }, + "429": { + "description": "Actor or workspace rate limit exceeded. Error code is rate_limited_actor or rate_limited_workspace.", + "headers": { + "Retry-After": { + "schema": { + "type": "string", + "description": "Seconds to wait before retrying." + }, + "required": true, + "description": "Seconds to wait before retrying." + } + }, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RateLimitErrorEnvelope" + } + } + } + }, + "500": { + "description": "Error envelope", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "object", + "properties": { + "code": { + "type": "string", + "enum": [ + "api_key_not_found", + "api_key_revoked", + "artifact_not_found", + "database_unavailable", + "draft_revision_conflict", + "entrypoint_not_in_revision", + "file_count_cap_exceeded", + "file_size_cap_exceeded", + "forbidden", + "idempotency_in_flight", + "invalid_auth", + "invalid_content_length", + "invalid_cursor", + "invalid_idempotency_key", + "invalid_request", + "not_authenticated", + "not_found", + "patch_conflict", + "pinned_artifact_cap_exceeded", + "rate_limited_actor", + "rate_limited_artifact", + "rate_limited_workspace", + "ephemeral_provision_rate_limited", + "ephemeral_provision_unavailable", + "pow_required", + "pow_invalid", + "revision_retained", + "revision_size_cap_exceeded", + "revision_unpublished", + "storage_unavailable", + "unexpected_upload_object", + "upload_incomplete", + "upload_session_expired", + "upload_session_not_found", + "usage_policy_exceeded", + "write_allowance_exceeded", + "revision_ceiling_exceeded" + ] + }, + "message": { + "type": "string" + }, + "docs": { + "type": "string", + "format": "uri" + }, + "request_id": { + "type": "string", + "minLength": 1 + } + }, + "required": [ + "code", + "message" + ] + } + }, + "required": [ + "error" + ] + } + } + } + }, + "503": { + "description": "Error envelope", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "object", + "properties": { + "code": { + "type": "string", + "enum": [ + "api_key_not_found", + "api_key_revoked", + "artifact_not_found", + "database_unavailable", + "draft_revision_conflict", + "entrypoint_not_in_revision", + "file_count_cap_exceeded", + "file_size_cap_exceeded", + "forbidden", + "idempotency_in_flight", + "invalid_auth", + "invalid_content_length", + "invalid_cursor", + "invalid_idempotency_key", + "invalid_request", + "not_authenticated", + "not_found", + "patch_conflict", + "pinned_artifact_cap_exceeded", + "rate_limited_actor", + "rate_limited_artifact", + "rate_limited_workspace", + "ephemeral_provision_rate_limited", + "ephemeral_provision_unavailable", + "pow_required", + "pow_invalid", + "revision_retained", + "revision_size_cap_exceeded", + "revision_unpublished", + "storage_unavailable", + "unexpected_upload_object", + "upload_incomplete", + "upload_session_expired", + "upload_session_not_found", + "usage_policy_exceeded", + "write_allowance_exceeded", + "revision_ceiling_exceeded" + ] + }, + "message": { + "type": "string" + }, + "docs": { + "type": "string", + "format": "uri" + }, + "request_id": { + "type": "string", + "minLength": 1 + } + }, + "required": [ + "code", + "message" + ] + } + }, + "required": [ + "error" + ] + } + } + } + } + } + } + }, "/v1/artifacts/{artifact_id}/revisions": { "get": { "operationId": "revisions.list", diff --git a/packages/contracts/src/agentView.ts b/packages/contracts/src/agentView.ts index 66217b26..72096c10 100644 --- a/packages/contracts/src/agentView.ts +++ b/packages/contracts/src/agentView.ts @@ -6,6 +6,7 @@ import { PlainTextDescription, PlainTextTitle, RevisionId, + Sha256Hex, UrlString, } from "./primitives.js"; import { z } from "./zod.js"; @@ -41,6 +42,11 @@ export const AgentViewFile = z.object({ size_bytes: z.number().int().nonnegative(), content_type: z.string().min(1).max(200), url: UrlString, + // Plaintext content address. Optional because diff-only/draft rows have no + // materialized blob; an agent uses it to detect what changed before reading a + // file back to diff against (ADR 0089). Object is not strict, so this + // is a non-breaking add the MCP read_artifact safeParse already accepts. + sha256: Sha256Hex.optional(), }); export type AgentViewFile = z.infer; diff --git a/packages/contracts/src/artifacts.ts b/packages/contracts/src/artifacts.ts index 76f6c5a7..629f3876 100644 --- a/packages/contracts/src/artifacts.ts +++ b/packages/contracts/src/artifacts.ts @@ -1,6 +1,6 @@ -import { PageInfo } from "./common.js"; +import { Mebibytes, PageInfo } from "./common.js"; import { ArtifactStatus } from "./enums.js"; -import { ArtifactId, FilePath, IsoDateTime, PlainTextTitle, RevisionId } from "./primitives.js"; +import { ArtifactId, FilePath, IsoDateTime, PlainTextTitle, RevisionId, Sha256Hex } from "./primitives.js"; import { z } from "./zod.js"; export const ArtifactSummary = z.object({ @@ -47,3 +47,24 @@ export const DeleteArtifactResponse = z.object({ deleted_at: IsoDateTime, }); export type DeleteArtifactResponse = z.infer; + +// A member reading one stored file's decrypted plaintext so an agent can diff +// against it to produce a unified-diff patch revise (ADR 0089). +// `is_binary` is byte-derived (true binary only); `content_type` is path-derived, +// so they may disagree (e.g. binary saved as .txt) — `is_binary` is authoritative +// for deciding whether `body` is patchable text. `body` is the decoded UTF-8 text +// and is present iff the file is text AND <= 10 MiB. When `body` is absent and +// `is_binary` is false, the file is text but too large to inline: fetch it via the +// signed content url or upload a whole blob (never a patch). `sha256` is the +// plaintext content address an agent declares as a patch's `base_sha256`. +export const ArtifactFileContent = z + .object({ + path: FilePath, + sha256: Sha256Hex, + size_bytes: z.number().int().nonnegative(), + content_type: z.string().min(1).max(200), + is_binary: z.boolean(), + body: z.string().max(Mebibytes.ten).optional(), + }) + .strict(); +export type ArtifactFileContent = z.infer; diff --git a/packages/contracts/src/mcp.test.ts b/packages/contracts/src/mcp.test.ts index 5131316a..1cad3db8 100644 --- a/packages/contracts/src/mcp.test.ts +++ b/packages/contracts/src/mcp.test.ts @@ -32,6 +32,7 @@ describe("MCP tool registry", () => { "add_revision", "list_artifacts", "read_artifact", + "read_file", "list_revisions", "delete_artifact", "update_display_metadata", diff --git a/packages/contracts/src/mcp/registry.ts b/packages/contracts/src/mcp/registry.ts index 312d251c..dd85a7b4 100644 --- a/packages/contracts/src/mcp/registry.ts +++ b/packages/contracts/src/mcp/registry.ts @@ -82,6 +82,27 @@ export const mcpToolContracts = [ ], errors: readErrors, }, + { + name: "read_file", + description: + "Read one file's stored content from an Artifact so you can edit it and revise. Returns the decoded text body plus its sha256 for text files up to 10 MiB; for binary or larger files it returns sha256/size/is_binary with no body (fetch those via the file url or re-upload whole). Use the returned body as the base when producing an edited Revision; the sha256 is the exact base the server validates a diff against.", + auth: "mcp_oauth", + requiredScopes: ["read"], + idempotency: "none", + inputSchema: "read_file", + outputSchema: "read_file", + forwardedCalls: [ + { + routeId: "artifacts.fileContent", + auth: "mcp_bearer", + }, + ], + // read group + storage_unavailable: reading a file decrypts a blob, which the + // base read tools never do, so this tool can surface a transient blob-read + // failure the others cannot. Declared so the MCP forward maps it to 503 + // instead of the 500 fallback. + errors: [...readErrors, "storage_unavailable"] as const, + }, { name: "list_revisions", description: "List revisions for an artifact.", diff --git a/packages/contracts/src/mcp/schemas.ts b/packages/contracts/src/mcp/schemas.ts index 021a8dbb..47e7b8dc 100644 --- a/packages/contracts/src/mcp/schemas.ts +++ b/packages/contracts/src/mcp/schemas.ts @@ -1,11 +1,12 @@ import { AccessLinkSignedUrl, AccessLinkType } from "../accessLinks.js"; import { AgentView, DisplayMetadata } from "../agentView.js"; -import { ArtifactListResponse, DeleteArtifactResponse } from "../artifacts.js"; +import { ArtifactFileContent, ArtifactListResponse, DeleteArtifactResponse } from "../artifacts.js"; import { Mebibytes, PaginationRequest } from "../common.js"; import { AccessLinkId, ArtifactId, Cursor, + FilePath, IdempotencyKey, IsoDateTime, PlainTextTitle, @@ -70,6 +71,11 @@ export type McpListArtifactsInput = z.infer; export const McpReadArtifactInput = z.object({ artifact_id: ArtifactId }).strict(); export type McpReadArtifactInput = z.infer; +export const McpReadFileInput = z + .object({ artifact_id: ArtifactId, path: FilePath, revision_id: RevisionId.optional() }) + .strict(); +export type McpReadFileInput = z.infer; + export const McpListRevisionsInput = z .object({ artifact_id: ArtifactId, @@ -143,6 +149,9 @@ export type McpListArtifactsOutput = z.infer; export const McpReadArtifactOutput = AgentView; export type McpReadArtifactOutput = z.infer; +export const McpReadFileOutput = ArtifactFileContent; +export type McpReadFileOutput = z.infer; + export const McpListRevisionsOutput = RevisionListResponse; export type McpListRevisionsOutput = z.infer; @@ -204,6 +213,7 @@ export const McpToolName = z.enum([ "add_revision", "list_artifacts", "read_artifact", + "read_file", "list_revisions", "delete_artifact", "update_display_metadata", diff --git a/packages/contracts/src/mcp/tool-schemas.ts b/packages/contracts/src/mcp/tool-schemas.ts index 5e0efb11..ffcc9ce8 100644 --- a/packages/contracts/src/mcp/tool-schemas.ts +++ b/packages/contracts/src/mcp/tool-schemas.ts @@ -17,6 +17,8 @@ import { McpPublishArtifactOutput, McpReadArtifactInput, McpReadArtifactOutput, + McpReadFileInput, + McpReadFileOutput, McpRevokeAccessLinkInput, McpRevokeAccessLinkOutput, type McpToolName, @@ -31,6 +33,7 @@ export const mcpToolInputSchemas = { add_revision: McpAddRevisionInput, list_artifacts: McpListArtifactsInput, read_artifact: McpReadArtifactInput, + read_file: McpReadFileInput, list_revisions: McpListRevisionsInput, delete_artifact: McpDeleteArtifactInput, update_display_metadata: McpUpdateDisplayMetadataInput, @@ -46,6 +49,7 @@ export const mcpToolOutputSchemas = { add_revision: McpPublishArtifactOutput, list_artifacts: McpListArtifactsOutput, read_artifact: McpReadArtifactOutput, + read_file: McpReadFileOutput, list_revisions: McpListRevisionsOutput, delete_artifact: McpDeleteArtifactOutput, update_display_metadata: McpUpdateDisplayMetadataOutput, diff --git a/packages/contracts/src/mvp-contracts.test.ts b/packages/contracts/src/mvp-contracts.test.ts index 7432927e..ba170ba0 100644 --- a/packages/contracts/src/mvp-contracts.test.ts +++ b/packages/contracts/src/mvp-contracts.test.ts @@ -48,6 +48,7 @@ describe("MVP route registry", () => { "accessLinks.revoke", "agentView.getLatest", "agentView.getRevision", + "artifacts.fileContent", "revisions.list", "revisions.publish", "web.auth.callback", @@ -105,6 +106,7 @@ describe("MVP route registry", () => { "agentView.getLatest", "agentView.getRevision", "apiKeys.revokeCurrent", + "artifacts.fileContent", "revisions.list", "revisions.publish", "uploadSessions.create", diff --git a/packages/contracts/src/openapi/api.artifacts.ts b/packages/contracts/src/openapi/api.artifacts.ts index 7e446f16..3818bdda 100644 --- a/packages/contracts/src/openapi/api.artifacts.ts +++ b/packages/contracts/src/openapi/api.artifacts.ts @@ -1,4 +1,5 @@ import type { OpenAPIRegistry } from "@asteasolutions/zod-to-openapi"; +import { z } from "../zod.js"; import type { ApiPathHelpers } from "./api.helpers.js"; import { schemaRef, standardJsonResponses } from "./responses.js"; @@ -7,7 +8,8 @@ import { schemaRef, standardJsonResponses } from "./responses.js"; * each file under the `noExcessiveLinesPerFile` limit. */ export function registerArtifactPaths(registry: OpenAPIRegistry, helpers: ApiPathHelpers): void { - const { params, pathStringParam, idempotencyKeyHeader, requestIdHeader } = helpers; + const { params, pathStringParam, queryStringParam, queryOptionalStringParam, idempotencyKeyHeader, requestIdHeader } = + helpers; registry.registerPath({ method: "get", @@ -38,6 +40,23 @@ export function registerArtifactPaths(registry: OpenAPIRegistry, helpers: ApiPat responses: standardJsonResponses(schemaRef("AgentView")), }); + registry.registerPath({ + method: "get", + path: "/v1/artifacts/{artifact_id}/file-content", + operationId: "artifacts.fileContent", + summary: "Read one stored file's decrypted plaintext for the owning member.", + security: [{ ApiKeyBearer: [] }], + request: { + params: params({ artifact_id: pathStringParam("artifact_id", "Artifact id.") }), + query: z.object({ + path: queryStringParam("path", "File path within the artifact tree."), + revision_id: queryOptionalStringParam("revision_id", "Revision to read; defaults to the latest."), + }), + headers: [requestIdHeader], + }, + responses: standardJsonResponses(schemaRef("ArtifactFileContent")), + }); + registry.registerPath({ method: "get", path: "/v1/artifacts/{artifact_id}/revisions", diff --git a/packages/contracts/src/openapi/api.helpers.ts b/packages/contracts/src/openapi/api.helpers.ts index 4b221358..375eed4e 100644 --- a/packages/contracts/src/openapi/api.helpers.ts +++ b/packages/contracts/src/openapi/api.helpers.ts @@ -12,6 +12,19 @@ export const pathEnumParam = (name: string, values: readonly [string, ...string[ param: { name, in: "path", required: true, description }, }); +export const queryStringParam = (name: string, description: string) => + z.string().openapi({ + param: { name, in: "query", required: true, description }, + }); + +export const queryOptionalStringParam = (name: string, description: string) => + z + .string() + .optional() + .openapi({ + param: { name, in: "query", required: false, description }, + }); + export const queryCursorParam = (name: string, description: string) => Cursor.openapi({ param: { name, in: "query", required: false, description }, @@ -34,6 +47,8 @@ export type ApiPathHelpers = { params: typeof params; pathStringParam: typeof pathStringParam; pathEnumParam: typeof pathEnumParam; + queryStringParam: typeof queryStringParam; + queryOptionalStringParam: typeof queryOptionalStringParam; queryCursorParam: typeof queryCursorParam; queryPageSizeParam: typeof queryPageSizeParam; idempotencyKeyHeader: typeof idempotencyKeyHeader; @@ -45,6 +60,8 @@ export function createApiPathHelpers(): ApiPathHelpers { params, pathStringParam, pathEnumParam, + queryStringParam, + queryOptionalStringParam, queryCursorParam, queryPageSizeParam, idempotencyKeyHeader, diff --git a/packages/contracts/src/openapi/shared.ts b/packages/contracts/src/openapi/shared.ts index 850e0e6b..68317511 100644 --- a/packages/contracts/src/openapi/shared.ts +++ b/packages/contracts/src/openapi/shared.ts @@ -8,7 +8,13 @@ import { import { RevokeApiKeyResponse } from "../admin.js"; import { AgentView, PublicAgentView } from "../agentView.js"; import { ApiKeySummary, CreateApiKeyRequest, CreateApiKeyResponse } from "../apiKeys.js"; -import { ArtifactDetail, ArtifactListResponse, ArtifactSummary, DeleteArtifactResponse } from "../artifacts.js"; +import { + ArtifactDetail, + ArtifactFileContent, + ArtifactListResponse, + ArtifactSummary, + DeleteArtifactResponse, +} from "../artifacts.js"; import { BillingInvoiceListResponse, BillingStatusResponse, @@ -92,6 +98,7 @@ export function registerApiSchemas(registry: OpenAPIRegistry, options: RegisterA registry.register("CliVersionResponse", CliVersionResponse); const registeredPublicAgentView = registry.register("PublicAgentView", PublicAgentView); registry.register("AgentView", AgentView); + registry.register("ArtifactFileContent", ArtifactFileContent); registry.register("AccessLinkResolveRequest", AccessLinkResolveRequest); registry.register("PowChallenge", PowChallenge); registry.register("EphemeralProvisionRequest", EphemeralProvisionRequest); diff --git a/packages/contracts/src/primitives.ts b/packages/contracts/src/primitives.ts index 52ed297e..987aff48 100644 --- a/packages/contracts/src/primitives.ts +++ b/packages/contracts/src/primitives.ts @@ -81,6 +81,9 @@ export const FilePath = z .brand<"FilePath">(); export type FilePath = z.infer; +export const Sha256Hex = z.string().regex(/^[a-f0-9]{64}$/); +export type Sha256Hex = z.infer; + export const PlainTextTitle = z.string().trim().min(1).max(160); export type PlainTextTitle = z.infer; diff --git a/packages/contracts/src/routes/registry.artifacts.ts b/packages/contracts/src/routes/registry.artifacts.ts index 4b46a826..7217591f 100644 --- a/packages/contracts/src/routes/registry.artifacts.ts +++ b/packages/contracts/src/routes/registry.artifacts.ts @@ -118,6 +118,26 @@ export const artifactRouteContracts = [ responseSchema: "AgentView", errors: [...apiKeyActorReadErrors, "forbidden", "not_found", "revision_retained"], }, + { + id: "artifacts.fileContent", + app: "api", + method: "GET", + // The file path travels as ?path= (not a path segment): FilePath may contain + // '/', which route-path building encodes and Hono ':param' will not match. + // ?revision_id= pins the read to a specific Revision so a CLI diff base and + // its inherit base are the same Revision; absent => latest (ADR 0089). + path: "/v1/artifacts/{artifact_id}/file-content", + auth: "api_key_or_mcp_oauth", + scopes: ["read"], + idempotency: "none", + rateLimit: "actor", + responseSchema: "ArtifactFileContent", + // `forbidden` is required by the registrar guard-error invariant for any + // api_key_or_mcp_oauth read route (mirrors agentView.getLatest), even though + // this handler resolves access via getAgentView and only ever returns + // not_found / storage_unavailable. + errors: [...apiKeyActorReadErrors, "forbidden", "not_found", "storage_unavailable"], + }, { id: "revisions.list", app: "api", diff --git a/packages/contracts/src/uploadSessions.ts b/packages/contracts/src/uploadSessions.ts index 3bb94ecd..b5b28619 100644 --- a/packages/contracts/src/uploadSessions.ts +++ b/packages/contracts/src/uploadSessions.ts @@ -7,14 +7,16 @@ import { IsoDateTime, PlainTextTitle, RevisionId, + Sha256Hex, UploadSessionId, UrlString, } from "./primitives.js"; import { RenderMode } from "./revisions.js"; import { z } from "./zod.js"; -export const Sha256Hex = z.string().regex(/^[a-f0-9]{64}$/); -export type Sha256Hex = z.infer; +// Re-exported from primitives so existing importers (@agent-paste/contracts +// Sha256Hex) keep working; agentView.ts and artifacts.ts also need it. +export { Sha256Hex } from "./primitives.js"; // A changed file may arrive as a patch against a base Revision's file (ADR 0088) // instead of whole bytes. When present, the bytes uploaded for this file entry are diff --git a/packages/db/src/agent-view.ts b/packages/db/src/agent-view.ts index 1a109c39..44560253 100644 --- a/packages/db/src/agent-view.ts +++ b/packages/db/src/agent-view.ts @@ -65,6 +65,9 @@ export function buildAgentView( content_type: file.content_type, object_key: file.r2_key, url: `${prefix}/${encodePath(file.path)}`, + // Plaintext content address so an agent can detect changes and declare a + // patch base (ADR 0089). Omitted for non-blob/diff-only rows. + ...(file.sha256 ? { sha256: file.sha256 } : {}), })), safety_warnings: warnings.slice(0, 100).map(toAgentViewSafetyWarning), bundle: buildBundleAvailability(revision), diff --git a/packages/storage/src/index.ts b/packages/storage/src/index.ts index b8bcea15..7318f882 100644 --- a/packages/storage/src/index.ts +++ b/packages/storage/src/index.ts @@ -36,7 +36,12 @@ export { migrateWorkspaceBlobsForReparent, type WorkspaceBlobRef, } from "./reparent-workspace-blobs.js"; -export { type ApplyConflictReason, type ApplyUnifiedDiffResult, applyUnifiedDiff } from "./unified-diff.js"; +export { + type ApplyConflictReason, + type ApplyUnifiedDiffResult, + applyUnifiedDiff, + decodeUtf8Strict, +} from "./unified-diff.js"; export { type ArtifactBytesSigningRing, type R2GetObjectBody, diff --git a/packages/storage/src/unified-diff.ts b/packages/storage/src/unified-diff.ts index 773b254f..9c6762ed 100644 --- a/packages/storage/src/unified-diff.ts +++ b/packages/storage/src/unified-diff.ts @@ -56,7 +56,7 @@ async function sha256Hex(bytes: Uint8Array): Promise { // is the obvious tool but its option type is not in every Worker TS lib config, so we // decode lossily then verify the decode round-trips to the same bytes — a replacement // character inserted for an invalid sequence re-encodes to different bytes. -function decodeUtf8Strict(bytes: Uint8Array): string | null { +export function decodeUtf8Strict(bytes: Uint8Array): string | null { const text = new TextDecoder().decode(asBufferSource(bytes)); if (!bytesEqual(new TextEncoder().encode(text), bytes)) { return null; diff --git a/packages/worker-runtime/src/route-repository-errors.ts b/packages/worker-runtime/src/route-repository-errors.ts index 3a99ded0..2595b95d 100644 --- a/packages/worker-runtime/src/route-repository-errors.ts +++ b/packages/worker-runtime/src/route-repository-errors.ts @@ -36,6 +36,7 @@ export const routeRepositorySurfaces = { "accessLinks.revoke": ["not_found"], "agentView.getLatest": [], "agentView.getRevision": [], + "artifacts.fileContent": [], "revisions.list": [], "revisions.publish": [ "artifact_not_found", @@ -96,7 +97,14 @@ export const routeRepositorySurfaces = { ], "uploadSessions.putFile": [], "uploadSessions.finalize": [ + // The five base-* kinds collapse to invalid_request on the wire; declared here so + // this surface reflects what finalizeUploadSession can actually throw (ADR 0089). + "base_revision_artifact_mismatch", + "base_revision_not_found", + "base_revision_not_publishable", + "deleted_path_not_in_base", "draft_revision_conflict", + "inherited_path_not_blob_backed", "patch_base_mismatch", "patch_conflict", "upload_incomplete", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 9554d31f..3a5c9de9 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -204,6 +204,9 @@ importers: '@agent-paste/rotation': specifier: workspace:* version: link:../../packages/rotation + '@agent-paste/storage': + specifier: workspace:* + version: link:../../packages/storage '@agent-paste/tokens': specifier: workspace:* version: link:../../packages/tokens diff --git a/scripts/smoke-local-patch.mjs b/scripts/smoke-local-patch.mjs index a89af0b0..903ceac3 100644 --- a/scripts/smoke-local-patch.mjs +++ b/scripts/smoke-local-patch.mjs @@ -1,14 +1,19 @@ #!/usr/bin/env node import { spawn } from "node:child_process"; -// End-to-end smoke for ADR 0088 Stage 4 intra-file patch reconstruction. Unlike the +// End-to-end smoke for the Git-like revision model: Stage 4 intra-file patch +// reconstruction (ADR 0088) plus Stage 5 agent read-back (ADR 0089). Unlike the // unit/integration tests (which use a fake reconstructor), this drives the REAL path: // boots the local MVP server (real encryption ring + in-memory R2 that round-trips // ciphertext), publishes a base Revision with known bytes, then create-session with a // real `base_revision_id` + unified-diff `patch`, PUTs the diff bytes (encrypted under // revision AAD), finalizes (the real RevisionReconstructor decrypts the base blob, // applies the diff, hash-verifies, re-encrypts under blob AAD), publishes, and fetches -// the served content asserting it is byte-identical to applying the patch locally. Also -// asserts the conflict path: a diff whose result digest is wrong fails with patch_conflict. +// the served content asserting it is byte-identical to applying the patch locally. +// Stage 5: reads the stored file back through the api worker (api decrypts the member's +// plaintext, returns body + sha256), builds a fresh diff from those served bytes (the +// path an agent without a working copy takes), publishes it, and asserts the server +// reconstructs that diff byte-exactly too. Also asserts the conflict path: a diff whose +// declared result digest is wrong fails loud with patch_conflict. import { createHash } from "node:crypto"; import { once } from "node:events"; import { setTimeout as delay } from "node:timers/promises"; @@ -142,6 +147,36 @@ try { const servedIndex = await fetchArtifactFile(revised, "index.html"); assertBytesEqual(servedIndex, indexBytes, "inherited index.html still serves byte-identically"); + // --- Stage 5 read-back: an agent reads the stored file + sha256 to diff against. --- + const readBack = await client.artifacts.readFile(revised.artifact_id, "big.txt"); + assert(readBack.is_binary === false, "read-back of a text file reports is_binary:false"); + assert(readBack.sha256 === sha256Hex(resultBig), "read-back sha256 is the plaintext content address"); + assertBytesEqual(enc.encode(readBack.body), resultBig, "read-back body is byte-identical to the stored file"); + + // --- Stage 5 diff-from-read-back: the agent path. Build the next edit and its diff + // from the bytes the server handed back (not a local working copy), publish the patch + // against the read-back digest, and assert the server reconstructs it byte-exactly. + // (The CLI's own diff generator is byte-exactness-tested in apps/cli/src/unified-diff-gen.test.ts; + // this proves the read-back → diff → patch → content loop end to end against the live ring.) --- + const readBackBytes = enc.encode(readBack.body); + assertBytesEqual(readBackBytes, resultBig, "read-back body re-encodes to the stored bytes"); + const nextBytes = enc.encode(readBack.body.replace("LINE FIVE HUNDRED\n", "edited via read-back\n")); + const readBackDiff = unifiedDiffLineSwap(500, "LINE FIVE HUNDRED", "edited via read-back", readBackBytes); + const cliRevised = await publishPatch(client, { + artifactId: revised.artifact_id, + baseRevisionId: revised.revision_id, + path: "big.txt", + diffBytes: enc.encode(readBackDiff), + baseSha256: readBack.sha256, + resultSha256: sha256Hex(nextBytes), + }); + const servedCli = await fetchArtifactFile(cliRevised, "big.txt"); + assertBytesEqual( + servedCli, + nextBytes, + "diff built from read-back bytes reconstructs byte-identically through content", + ); + // --- Conflict path: a diff whose declared result digest is wrong must fail loud. --- let conflict; try { @@ -169,9 +204,12 @@ try { process.stdout.write(`Patch smoke passed (${target}). - Base revision: ${base.revision_id} - Patched revision: ${revised.revision_id} + Base revision: ${base.revision_id} + Patched revision: ${revised.revision_id} + Read-back revision: ${cliRevised.revision_id} Reconstructed big.txt served byte-exact (${resultBig.byteLength} bytes from a ${diff.length}-byte diff). + Read-back: big.txt body + sha256 matched the stored file. + Diff-from-read-back: ${readBackDiff.length}-byte diff built from served bytes reconstructed byte-exact. Conflict path: ${conflict.code} (${conflict.status}) — "${conflict.message}" `); From 085533354f69be29d5b8fda5fe41bb7f604ca084 Mon Sep 17 00:00:00 2001 From: Isaac Suttell Date: Sun, 14 Jun 2026 18:27:47 -0700 Subject: [PATCH 08/14] docs(adr): renumber revision stack to 0088/0089/0090 (0087 taken by public-artifacts) Main merged ADR 0087 (public-artifacts-and-unlisted-share-links, #528) while this revision stack was off-branch, so the prior renumber onto 0087/0088 collided. Shift the stack up one and restore main's 0087 index row: workspace-scoped blob dedup 0087 -> 0088 revision commit chain + delta 0088 -> 0089 agent file read-back 0089 -> 0090 Renames the three ADR files and rewrites every in-tree reference (filename tokens + bare "ADR 00NN" comments across code, migrations, specs, CI, and CONTEXT.md). Bare "ADR 0087" references that mean main's public-artifacts ADR (CONTEXT.md, project-status.md, 0086) are preserved untouched. README index re-adds the 0087 public-artifacts row dropped during the rebase. Also drops four dead imports in apps/cli/src/index.ts (PublishResultShape, formatBytes, hyperlink, paint) left unused by the Stage 5 work. Co-Authored-By: Claude Opus 4.8 --- .github/workflows/ci.yml | 2 +- .github/workflows/pr-preview.yml | 2 +- apps/api/src/env.ts | 2 +- .../api/src/routes/artifact-file-content.test.ts | 2 +- apps/api/src/routes/artifact-file-content.ts | 4 ++-- apps/cli/src/index.ts | 14 ++------------ apps/cli/src/local.ts | 2 +- apps/cli/src/manifest-cache.ts | 2 +- apps/cli/src/revise.ts | 4 ++-- apps/cli/src/unified-diff-gen.ts | 2 +- apps/cli/test/index.test.ts | 4 ++-- apps/upload/src/env.ts | 2 +- apps/upload/src/finalize.test.ts | 2 +- apps/upload/src/finalize.ts | 2 +- ...oped-content-addressed-blob-deduplication.md} | 8 ++++---- ...nheritance-and-server-reconstructed-delta.md} | 6 +++--- ...e-read-back-api-decrypts-member-plaintext.md} | 4 ++-- docs/adr/README.md | 7 ++++--- docs/development.md | 2 +- docs/ops/git-like-revisions-todo.md | 8 ++++---- docs/specs/api.md | 4 ++-- docs/specs/cli.md | 4 ++-- docs/specs/data-model.md | 8 ++++---- packages/api-client/src/index.ts | 2 +- packages/api-client/src/publish.ts | 4 ++-- packages/contracts/src/agentView.ts | 2 +- packages/contracts/src/artifacts.ts | 2 +- packages/contracts/src/mcp.test.ts | 2 +- .../contracts/src/routes/registry.artifacts.ts | 2 +- packages/contracts/src/uploadSessions.ts | 4 ++-- .../0024_revisions_parent_revision_id.sql | 2 +- ...25_upload_session_base_revision_and_patch.sql | 2 +- packages/db/src/agent-view.ts | 2 +- packages/db/src/index.test.ts | 4 ++-- .../db/src/postgres/revision-reconstructor.ts | 2 +- packages/db/src/repository-error.ts | 2 +- .../src/repository/upload-session-lifecycle.ts | 16 ++++++++-------- packages/db/src/schema.ts | 4 ++-- packages/db/src/types.ts | 8 ++++---- packages/db/src/validation.ts | 2 +- packages/storage/src/unified-diff.ts | 2 +- packages/storage/src/workspace-blob-bytes.ts | 2 +- .../src/route-repository-errors.ts | 2 +- scripts/smoke-local-patch.mjs | 2 +- 44 files changed, 79 insertions(+), 88 deletions(-) rename docs/adr/{0087-workspace-scoped-content-addressed-blob-deduplication.md => 0088-workspace-scoped-content-addressed-blob-deduplication.md} (95%) rename docs/adr/{0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md => 0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md} (98%) rename docs/adr/{0089-agent-file-read-back-api-decrypts-member-plaintext.md => 0090-agent-file-read-back-api-decrypts-member-plaintext.md} (98%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 854e1c98..81e29d67 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -199,7 +199,7 @@ jobs: if: ${{ env.RUN_CODE == 'true' }} run: pnpm smoke:local - # Real intra-file patch reconstruction through the in-memory harness (ADR 0088 + # Real intra-file patch reconstruction through the in-memory harness (ADR 0089 # stage 4): the unit tests use a fake reconstructor, so this is the only check # that exercises real decrypt -> apply diff -> hash-verify -> re-encrypt -> serve. - name: Local patch smoke diff --git a/.github/workflows/pr-preview.yml b/.github/workflows/pr-preview.yml index 375bd8af..cac0a58c 100644 --- a/.github/workflows/pr-preview.yml +++ b/.github/workflows/pr-preview.yml @@ -228,7 +228,7 @@ jobs: AGENT_PASTE_EPHEMERAL_SMOKE_WORKOS_ACCESS_TOKEN: ${{ secrets.AGENT_PASTE_EPHEMERAL_SMOKE_WORKOS_ACCESS_TOKEN }} run: node scripts/smoke-hosted-ephemeral.mjs pr - # Real intra-file patch reconstruction against the deployed PR preview (ADR 0088 + # Real intra-file patch reconstruction against the deployed PR preview (ADR 0089 # stage 4): exercises decrypt -> apply diff -> hash-verify -> re-encrypt -> serve # byte-exact + the patch_conflict path through the live upload/api/content Workers. - name: Hosted patch reconstruction smoke diff --git a/apps/api/src/env.ts b/apps/api/src/env.ts index bfeb426c..7131c7bf 100644 --- a/apps/api/src/env.ts +++ b/apps/api/src/env.ts @@ -25,7 +25,7 @@ export type R2GetObjectBody = { export type R2Bucket = { list(options: { prefix?: string; cursor?: string; limit?: number }): Promise; delete(keys: string | string[]): Promise; - // ADR 0089: the file-content read route decrypts a stored blob. This is + // ADR 0090: the file-content read route decrypts a stored blob. This is // the only read on api's R2 binding; every other api op lists or deletes. get(key: string): Promise; }; diff --git a/apps/api/src/routes/artifact-file-content.test.ts b/apps/api/src/routes/artifact-file-content.test.ts index ff6e1e75..302c22ce 100644 --- a/apps/api/src/routes/artifact-file-content.test.ts +++ b/apps/api/src/routes/artifact-file-content.test.ts @@ -165,7 +165,7 @@ describe("artifacts.fileContent route", () => { it("returns storage_unavailable (not 500) when decryption fails on tampered ciphertext", async () => { // A corrupt/auth-tag-rejected ciphertext throws a plain Error from the ring, not a - // WorkspaceBlob* error. It must still degrade to 503 (retryable), never a 500 (ADR 0089). + // WorkspaceBlob* error. It must still degrade to 503 (retryable), never a 500 (ADR 0090). const plaintext = "secret\n"; const sha = await sha256Hex(plaintext); const seeded = await seedEncryptedWorkspaceBlob({ workspaceId, sha256: sha, plaintext }); diff --git a/apps/api/src/routes/artifact-file-content.ts b/apps/api/src/routes/artifact-file-content.ts index d3613e07..a88d3c79 100644 --- a/apps/api/src/routes/artifact-file-content.ts +++ b/apps/api/src/routes/artifact-file-content.ts @@ -11,7 +11,7 @@ import { contentBaseUrl } from "../runtime.js"; type FileContentParams = { artifactId: string; path: string; revisionId?: string }; // Reads one stored file's decrypted plaintext for the owning Workspace Member so -// an agent can diff against it and revise with a unified-diff patch (ADR 0089). +// an agent can diff against it and revise with a unified-diff patch (ADR 0090). // The agent already owns the artifact and can fetch the same bytes via // the signed content url, so returning plaintext here adds no confidentiality // exposure; it just gives an agent without the working dir a base to diff. @@ -76,7 +76,7 @@ export async function readArtifactFileContent( // present, well-formed blob). Every throw — missing object, bad/absent metadata, // an unknown kid or AAD/auth-tag rejection from the ring — is an operational or // crypto condition on a row we already validated, not a client error. All map to - // storage_unavailable (503, retryable), never a 500 (ADR 0089). + // storage_unavailable (503, retryable), never a 500 (ADR 0090). return responders.respondError("storage_unavailable"); } diff --git a/apps/cli/src/index.ts b/apps/cli/src/index.ts index e35ab667..5505d646 100644 --- a/apps/cli/src/index.ts +++ b/apps/cli/src/index.ts @@ -28,23 +28,13 @@ import { formatEphemeralPublishResult, formatMakePublic, formatPublishResult, - type PublishResultShape, } from "./publish-format.js"; import { apiClientTransport } from "./publish-transport.js"; // Re-exported for tests that import it from the CLI entrypoint. export { ephemeralClaimUrl } from "./publish-format.js"; -import { - createProgress, - exitCodeFor, - formatBytes, - formatError, - hyperlink, - type OutputMode, - paint, - resolveMode, -} from "./render.js"; +import { createProgress, exitCodeFor, formatError, type OutputMode, resolveMode } from "./render.js"; import { buildRevisePlan, isBaseUnusableError, type LocalFileWithDigest, type RevisePlan } from "./revise.js"; import { commandInvocation, detectChannel, runUpdateCheck, signedOutHint } from "./update-check.js"; import { runUpgrade } from "./upgrade.js"; @@ -406,7 +396,7 @@ async function makePublic(parsed: Parsed, client: ApiClient) { return output(payload, parsed.global, formatMakePublic(outputModeFor(parsed.global), payload)); } -// Read one stored file's content for the owning member (ADR 0089). Default +// Read one stored file's content for the owning member (ADR 0090). Default // output is cat-like: the raw text body to stdout, so `agent-paste pull // > file` works. --json emits structured metadata; binary content is base64 in // json and refused in plain (raw bytes would corrupt a terminal / piped text). diff --git a/apps/cli/src/local.ts b/apps/cli/src/local.ts index 4ee38db4..115850a6 100644 --- a/apps/cli/src/local.ts +++ b/apps/cli/src/local.ts @@ -150,7 +150,7 @@ export async function sha256HexForFile(absolutePath: string): Promise { it("self-heals when finalize collapses a base-unusable error to invalid_request", async () => { // The base-* repository kinds reach the wire as code `invalid_request` with the kind - // attached as the message detail (ADR 0089). This proves the CLI keys on that detail — + // attached as the message detail (ADR 0090). This proves the CLI keys on that detail — // rejecting on `finalize` (where base errors realistically fire), not `create`, and with // a bare `invalid_request` code, so it fails if the detail signal regresses. mockStdout(); diff --git a/apps/upload/src/env.ts b/apps/upload/src/env.ts index d6231dd9..eae2238f 100644 --- a/apps/upload/src/env.ts +++ b/apps/upload/src/env.ts @@ -33,7 +33,7 @@ export type R2Bucket = { options?: { httpMetadata?: Record; customMetadata?: Record }, ): Promise; head(key: string): Promise; - // Reconstruction (ADR 0088) reads a base blob + the uploaded diff back at finalize to + // Reconstruction (ADR 0089) reads a base blob + the uploaded diff back at finalize to // apply the patch. This is the only read on upload's R2 binding; every other op writes. get(key: string): Promise; }; diff --git a/apps/upload/src/finalize.test.ts b/apps/upload/src/finalize.test.ts index 329a06ec..23d198e7 100644 --- a/apps/upload/src/finalize.test.ts +++ b/apps/upload/src/finalize.test.ts @@ -263,7 +263,7 @@ describe("finalizeUploadSession", () => { // The five base-unusable kinds collapse to wire code invalid_request, so the precise // kind must ride along as the message detail or the CLI cannot tell a stale base from - // a malformed request and never self-heals (ADR 0089). This is the server side of that + // a malformed request and never self-heals (ADR 0090). This is the server side of that // contract: without the detail, message would be the bare "invalid_request". it.each([ "base_revision_not_found", diff --git a/apps/upload/src/finalize.ts b/apps/upload/src/finalize.ts index e0e93273..6e177457 100644 --- a/apps/upload/src/finalize.ts +++ b/apps/upload/src/finalize.ts @@ -19,7 +19,7 @@ type GuardFor = GuardState([ RepositoryErrorCode.base_revision_not_found, diff --git a/docs/adr/0087-workspace-scoped-content-addressed-blob-deduplication.md b/docs/adr/0088-workspace-scoped-content-addressed-blob-deduplication.md similarity index 95% rename from docs/adr/0087-workspace-scoped-content-addressed-blob-deduplication.md rename to docs/adr/0088-workspace-scoped-content-addressed-blob-deduplication.md index 30eee716..07426f32 100644 --- a/docs/adr/0087-workspace-scoped-content-addressed-blob-deduplication.md +++ b/docs/adr/0088-workspace-scoped-content-addressed-blob-deduplication.md @@ -3,7 +3,7 @@ Status: Accepted (retroactive). Records a decision already shipped in code and [`data-model.md`](../specs/data-model.md)/[`api.md`](../specs/api.md) but not previously captured as an ADR. Drafted because the next decision -([ADR 0088](./0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)) +([ADR 0089](./0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)) builds directly on it and an implementer should not have to reconstruct this from the schema and commit history. @@ -70,7 +70,7 @@ Files are deduplicated within a **Workspace** by the SHA-256 of their plaintext. client re-declares them with their `sha256`. The dedup saves the bytes on the wire; it does not remove the requirement to re-enumerate the full file list, and the system does not detect unchanged files on its own. Closing that gap is - [ADR 0088](./0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md). + [ADR 0089](./0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md). - **Two storage kinds coexist.** `artifact_files.storage_kind` is `blob` (shared workspace object) or `revision` (legacy per-Revision object). Byte purge, bundle generation, and content serving treat both transparently. @@ -90,7 +90,7 @@ Files are deduplicated within a **Workspace** by the SHA-256 of their plaintext. - Not intra-file deduplication. The unit is the whole file; one changed byte yields a new digest and a new blob. Sub-file delta is - [ADR 0088](./0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md). + [ADR 0089](./0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md). - Not a Revision-level content address or commit graph. Only individual files are content-addressed; Revisions remain a flat numbered list until - [ADR 0088](./0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md). + [ADR 0089](./0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md). diff --git a/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md b/docs/adr/0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md similarity index 98% rename from docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md rename to docs/adr/0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md index 284bc783..2e6f5d33 100644 --- a/docs/adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md +++ b/docs/adr/0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md @@ -3,10 +3,10 @@ An agent that has already published an **Artifact** and wants to change one file should be able to say "change this one file," not re-describe the whole tree. Two gaps stand in the way today, both recorded as the missing half of -[ADR 0087](./0087-workspace-scoped-content-addressed-blob-deduplication.md): +[ADR 0088](./0088-workspace-scoped-content-addressed-blob-deduplication.md): 1. **No tree inheritance.** A new **Revision** must re-enumerate every path with - its `sha256`. Workspace blob dedup ([ADR 0087](./0087-workspace-scoped-content-addressed-blob-deduplication.md)) + its `sha256`. Workspace blob dedup ([ADR 0088](./0088-workspace-scoped-content-addressed-blob-deduplication.md)) skips the unchanged _bytes_, but the client still walks and hashes the whole directory and sends the full manifest. The smallest change an agent can express is "here is the entire new tree." @@ -67,7 +67,7 @@ change.** in unified diffs, so it is the ergonomic match for the driver, and it is reviewable. Binary files rarely take tiny edits, and a byte-splice format is fiddly for an agent to produce; a changed binary file just uploads a new whole -blob (the [ADR 0087](./0087-workspace-scoped-content-addressed-blob-deduplication.md) +blob (the [ADR 0088](./0088-workspace-scoped-content-addressed-blob-deduplication.md) status quo). The CLI/MCP choose patch vs whole-blob per file: patch only when the file is large enough and the diff is small enough to be worth it; otherwise whole-blob. diff --git a/docs/adr/0089-agent-file-read-back-api-decrypts-member-plaintext.md b/docs/adr/0090-agent-file-read-back-api-decrypts-member-plaintext.md similarity index 98% rename from docs/adr/0089-agent-file-read-back-api-decrypts-member-plaintext.md rename to docs/adr/0090-agent-file-read-back-api-decrypts-member-plaintext.md index 950fa7d8..72bc0f3b 100644 --- a/docs/adr/0089-agent-file-read-back-api-decrypts-member-plaintext.md +++ b/docs/adr/0090-agent-file-read-back-api-decrypts-member-plaintext.md @@ -1,6 +1,6 @@ # Agent File Read-Back: `api` Decrypts and Returns Member Plaintext -[ADR 0088](./0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md) +[ADR 0089](./0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md) gave the server everything it needs to accept a **partial-manifest publish with per-file unified-diff patches**, and Stage 4 made reconstruction fail loud at finalize (`patch_conflict`). But an agent can only _produce_ a correct unified @@ -131,7 +131,7 @@ plaintext and its `sha256`, then diff against it. can leak and silently 500 the MCP parse (the class of bug behind earlier strict envelope / null-revision incidents). A test asserts the real handler output parses under the strict contract. -- Builds on [ADR 0088](./0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md); +- Builds on [ADR 0089](./0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md); amends [ADR 0063](./0063-application-layer-encryption-for-artifact-bytes.md)'s "decrypt-on-read is `content`-only" note (now `content` + the member read route in `api`). Defers Range serving, a patch threshold, and an MCP patch path. diff --git a/docs/adr/README.md b/docs/adr/README.md index df382772..31ffc438 100644 --- a/docs/adr/README.md +++ b/docs/adr/README.md @@ -40,9 +40,10 @@ This directory is the decision log for agent-paste: it records _why_ choices wer - [ADR 0084](./0084-cli-and-mcp-share-one-publish-path.md) records that the **CLI** and **MCP** are two transports over one publish path: both call `runPublish` in `@agent-paste/api-client`, differing only behind a four-method `PublishTransport` seam (CLI over the HTTPS `ApiClient`, MCP over Worker service bindings). It forbids reintroducing a surface-specific publish implementation — the divergence that shipped the no-link-on-MCP and draft-`list_artifacts`-500 bugs. The shared module is exposed on the Worker-safe `@agent-paste/api-client/publish` subpath so the MCP bundle never pulls the Node-only `ApiClient`. The publish output is `{title, private_url, expires_at, upload_stats?}` with no `shared` field, per [ADR 0086](./0086-publish-is-content-only-private-first.md). It does not merge the two binaries; login/logout/upgrade, ephemeral provisioning, idempotency-key derivation, and output rendering stay caller-specific. - [ADR 0085](./0085-publish-returns-one-viewer-url.md) — **Status: Superseded by [ADR 0086](./0086-publish-is-content-only-private-first.md).** It recorded that publish (both surfaces, through [ADR 0084](./0084-cli-and-mcp-share-one-publish-path.md)) returns one `viewer_url` plus a `shared` boolean, private by default, where `viewer_url` switched between the authenticated **Private Link** and the public **Share Link**'s signed URL. ADR 0086 retired that switching field and the `share`/`shared` convention: the switching link lied on revise (it reported `shared:false` while a live Share Link still served the page) and the `share` flag put public-by-flag on the content-publish call. **Viewer URL** is removed from [`CONTEXT.md`](../../CONTEXT.md). - [ADR 0086](./0086-publish-is-content-only-private-first.md) supersedes [ADR 0085](./0085-publish-returns-one-viewer-url.md): publish is content-only and private-first. `publish_artifact`, `add_revision`, and `agent-paste publish` accept no visibility input and return exactly one link — the **Private Link**, surfaced as `private_url`, a login-walled clean viewer at `/v/` for the owning **Workspace Member** (never the **Artifact Console** at `/artifacts/`). The `share`/`--share` inputs and the `shared` output bit are removed from every surface (CLI, MCP, the REST `PublishRevisionRequest` body, and `runPublish`); the server `PublishResult` renames `viewer_url`/`artifact_url` to `private_url` and drops `access_link_url`. Going public is a separate explicit verb: `make_public` (MCP) and `agent-paste make-public` (CLI), replacing `create_share_link`, mint or reuse the one revocable **Share Link** and return its no-login **Access Link Signed URL**. `revoke_access_link`, `list_access_links`, and `create_revision_link` are unchanged; the [ADR 0047](./0047-access-link-signed-url-with-fragment-encoded-payload.md) Access Link grant model is untouched. [`CONTEXT.md`](../../CONTEXT.md) deletes **Viewer URL**, renames **Artifact URL** to **Artifact Console**, and retargets **Private Link** at the `/v` viewer. Amends [ADR 0084](./0084-cli-and-mcp-share-one-publish-path.md)'s output-shape note. -- [ADR 0087](./0087-workspace-scoped-content-addressed-blob-deduplication.md) records (retroactively) the shipped workspace-scoped content-addressed blob model: a client-supplied plaintext `sha256` lets `createUploadSession` return `reused` for files whose `(workspace_id, sha256, size_bytes)` blob already exists, so unchanged bytes skip the PUT. Blobs live at `workspaces/{wid}/blobs/sha256/{prefix}/{sha256}`, are encrypted under the [ADR 0063](./0063-application-layer-encryption-for-artifact-bytes.md) per-Workspace DEK with a path/revision-independent AAD `v2`, and are GC'd reference-counted without deleting the deterministic R2 object. Dedup is workspace-scoped (not global) to preserve the tenant boundary; the digest is verified on PUT. Whole-file only — the client must still re-enumerate the full manifest, which [ADR 0088](./0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md) addresses. -- [ADR 0088](./0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md) makes **Revisions** behave like Git commits so an agent can express "change this file" instead of the whole tree. Adds `revisions.parent_revision_id` and a `base_revision_id` + `deleted_paths` + partial-manifest publish contract where unlisted paths inherit the parent tree by reference (`api`-side merge, full `artifact_files` tree still materialized). Layers server-reconstructed intra-file delta on top: a changed file may be sent as a unified diff (text; whole-blob fallback for binary), reconstructed and re-hashed to a whole blob **synchronously at finalize in `upload`** so a patch that cannot apply fails the same publish call with an agent-visible `patch_conflict` (a broken patch never becomes a servable draft) and `content` plus the [ADR 0063](./0063-application-layer-encryption-for-artifact-bytes.md) encryption boundary are untouched. Builds on [ADR 0087](./0087-workspace-scoped-content-addressed-blob-deduplication.md); defers chunk stores, per-block AEAD, Range serving, global dedup, and dropping encryption. -- [ADR 0089](./0089-agent-file-read-back-api-decrypts-member-plaintext.md) closes the [ADR 0088](./0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md) patch loop for agents that lack the working directory. Adds an optional `sha256` to `AgentViewFile`, a member-authed `GET /v1/artifacts/{id}/file-content?path=&revision_id=` read route in **`api`** returning `{ path, sha256, size_bytes, content_type, is_binary, body? }` (text body when UTF-8 and ≤10 MiB; oversize/binary return metadata only, oversize skips the R2 read), and an MCP `read_file` tool. The boundary-relevant decision: **`api` now decrypts artifact bytes and returns plaintext** to the owning Member — it does not widen the [ADR 0063](./0063-application-layer-encryption-for-artifact-bytes.md) confidentiality boundary (the Member already owns the artifact and can fetch the same bytes via the signed `url`; encryption defends the platform tier, not the owner), and `content` is untouched. The blob key is derived from the RLS-scoped row's `sha256` + the actor's workspace, never client input. Adds the **CLI diff client** (per-artifact manifest cache + working-dir diff → partial manifest with verified unified diffs; stale-base → full-publish fallback) and an `agent-paste pull` verb; MCP `add_revision` stays text-body-only ([ADR 0084](./0084-cli-and-mcp-share-one-publish-path.md)). Amends ADR 0063's "decrypt-on-read is `content`-only" note. +- [ADR 0087](./0087-public-artifacts-and-unlisted-share-links.md) records the planned split between unlisted Share Links and true Public Artifacts. Current shipped behavior is still ADR 0086: `make_public` / `agent-paste make-public` mint or reuse a Share Link. The future Public Artifact model gets a stable ID-only `/p/{publicId}` Public URL, frozen Public Version, soft Public Offline control, cacheable Public Version Assets, and operator-only Platform Lockdown for hard takedown. +- [ADR 0088](./0088-workspace-scoped-content-addressed-blob-deduplication.md) records (retroactively) the shipped workspace-scoped content-addressed blob model: a client-supplied plaintext `sha256` lets `createUploadSession` return `reused` for files whose `(workspace_id, sha256, size_bytes)` blob already exists, so unchanged bytes skip the PUT. Blobs live at `workspaces/{wid}/blobs/sha256/{prefix}/{sha256}`, are encrypted under the [ADR 0063](./0063-application-layer-encryption-for-artifact-bytes.md) per-Workspace DEK with a path/revision-independent AAD `v2`, and are GC'd reference-counted without deleting the deterministic R2 object. Dedup is workspace-scoped (not global) to preserve the tenant boundary; the digest is verified on PUT. Whole-file only — the client must still re-enumerate the full manifest, which [ADR 0089](./0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md) addresses. +- [ADR 0089](./0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md) makes **Revisions** behave like Git commits so an agent can express "change this file" instead of the whole tree. Adds `revisions.parent_revision_id` and a `base_revision_id` + `deleted_paths` + partial-manifest publish contract where unlisted paths inherit the parent tree by reference (`api`-side merge, full `artifact_files` tree still materialized). Layers server-reconstructed intra-file delta on top: a changed file may be sent as a unified diff (text; whole-blob fallback for binary), reconstructed and re-hashed to a whole blob **synchronously at finalize in `upload`** so a patch that cannot apply fails the same publish call with an agent-visible `patch_conflict` (a broken patch never becomes a servable draft) and `content` plus the [ADR 0063](./0063-application-layer-encryption-for-artifact-bytes.md) encryption boundary are untouched. Builds on [ADR 0088](./0088-workspace-scoped-content-addressed-blob-deduplication.md); defers chunk stores, per-block AEAD, Range serving, global dedup, and dropping encryption. +- [ADR 0090](./0090-agent-file-read-back-api-decrypts-member-plaintext.md) closes the [ADR 0089](./0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md) patch loop for agents that lack the working directory. Adds an optional `sha256` to `AgentViewFile`, a member-authed `GET /v1/artifacts/{id}/file-content?path=&revision_id=` read route in **`api`** returning `{ path, sha256, size_bytes, content_type, is_binary, body? }` (text body when UTF-8 and ≤10 MiB; oversize/binary return metadata only, oversize skips the R2 read), and an MCP `read_file` tool. The boundary-relevant decision: **`api` now decrypts artifact bytes and returns plaintext** to the owning Member — it does not widen the [ADR 0063](./0063-application-layer-encryption-for-artifact-bytes.md) confidentiality boundary (the Member already owns the artifact and can fetch the same bytes via the signed `url`; encryption defends the platform tier, not the owner), and `content` is untouched. The blob key is derived from the RLS-scoped row's `sha256` + the actor's workspace, never client input. Adds the **CLI diff client** (per-artifact manifest cache + working-dir diff → partial manifest with verified unified diffs; stale-base → full-publish fallback) and an `agent-paste pull` verb; MCP `add_revision` stays text-body-only ([ADR 0084](./0084-cli-and-mcp-share-one-publish-path.md)). Amends ADR 0063's "decrypt-on-read is `content`-only" note. - [ADR 0021](./0021-id-based-r2-object-key-layout.md) is amended for revision file keys. The ADR originally described env-scoped file keys; shipped revision files and upload PUT targets use the legacy `artifacts/{artifactId}/revisions/{revisionId}/files/{path}` prefix. Derived bundles and env-scoped purge prefixes remain env-scoped. Current shapes are in [`docs/specs/data-model.md`](../specs/data-model.md#r2-object-key-layout). - [ADR 0062](./0062-two-layer-cache-for-hot-path-auth-lookups.md) is amended for the L2 synthetic cache URL. The ADR originally used `https://cache.agent-paste.internal/{namespace}/{key}`; the shipped helper uses `https://agent-paste.internal/cache/{namespace}/{key}`. Current behavior is in [`docs/specs/architecture.md`](../specs/architecture.md#auth-lookup-cache). - [`packages/contracts`](../../packages/contracts) and [`docs/specs/contracts.md`](../specs/contracts.md) are the canonical MVP implementation contract for Zod schemas, ID formats, and the route registry. ADRs provide rationale; contracts provide field-level implementation shape. diff --git a/docs/development.md b/docs/development.md index 1e206bb5..d38e19a4 100644 --- a/docs/development.md +++ b/docs/development.md @@ -158,7 +158,7 @@ deploy production from a laptop. | Command | Purpose | | --------------------------------- | ----------------------------------------------------------------------------------------------------------------------- | | `pnpm smoke:local` | Build and run the local publish/content/delete smoke path (also gated in CI `Validate`). | -| `pnpm smoke:local:patch` | Build and run the local ADR 0088 intra-file patch reconstruction smoke (real diff apply + serve byte-exact + conflict). | +| `pnpm smoke:local:patch` | Build and run the local ADR 0089 intra-file patch reconstruction smoke (real diff apply + serve byte-exact + conflict). | | `pnpm smoke:ci:postgres` | Build, migrate a job-local Postgres database, and run the local CLI smoke through the Postgres/RLS-backed harness. | | `pnpm smoke:web` | Build and run local web API auth/dashboard smoke assertions. | | `pnpm smoke:mcp` | Build and run local MCP transport + OAuth + publish/read/delete smoke. | diff --git a/docs/ops/git-like-revisions-todo.md b/docs/ops/git-like-revisions-todo.md index abdd01a6..12a15357 100644 --- a/docs/ops/git-like-revisions-todo.md +++ b/docs/ops/git-like-revisions-todo.md @@ -139,7 +139,7 @@ result_sha256 }` plus the diff bytes uploaded like any file body. Absence = `sha256` omitted from the signed PUT. Stateful validation (published base, same workspace/artifact, blob-backed-only inheritance, deleted-path-in-base, patch base match) with six new repo error codes mapped to `invalid_request`. - See the ADR 0088 Stage 3 implementation notes for the decisions. + See the ADR 0089 Stage 3 implementation notes for the decisions. ### Stage 4 - synchronous reconstruct-at-finalize (DONE) @@ -182,7 +182,7 @@ able to FAIL the finalize call. Finalize is also where the patch gate, the only ### Stage 5 - cli/mcp: the ergonomics payoff + agent read-back (DONE) -See [ADR 0089](../adr/0089-agent-file-read-back-api-decrypts-member-plaintext.md) +See [ADR 0090](../adr/0090-agent-file-read-back-api-decrypts-member-plaintext.md) for the decision record. The headline gap Stage 5 surfaced: an agent could not **read a stored file back** to diff against, which is the prerequisite for producing a correct patch when it lacks the working dir. So Stage 5 shipped both @@ -194,7 +194,7 @@ the read-back and the CLI diff client. UTF-8 and ≤10 MiB; oversize skips the R2 read and returns metadata; binary sets `is_binary:true`, no body). `api` decrypts via `readWorkspaceBlobBytes` (the Stage 4 helper) — the first `api` byte-decrypt surface, member-only, boundary - unchanged (ADR 0089). MCP gains a `read_file` tool forwarding to it. + unchanged (ADR 0090). MCP gains a `read_file` tool forwarding to it. - **CLI diff client.** The CLI caches the last published manifest per artifact (`paths + sha256 + revision_id`) under `configDir()`. On revise (`publish --artifact-id`): diff the working dir against the cache, send only @@ -230,5 +230,5 @@ the read-back and the CLI diff client. not async in `jobs`. The conflict flag-back is the feature: a patch that cannot apply must FAIL the same finalize call with an agent-visible `patch_conflict`, so a broken patch never becomes a servable draft. There is therefore no - pending-state model and no `reconstruction_status`. See the ADR 0088 Stage 4 + pending-state model and no `reconstruction_status`. See the ADR 0089 Stage 4 implementation notes. diff --git a/docs/specs/api.md b/docs/specs/api.md index 0c9ed917..9107d3bb 100644 --- a/docs/specs/api.md +++ b/docs/specs/api.md @@ -86,7 +86,7 @@ Authenticated `api` and `upload` routes enforce guards in a fixed order `PublicAgentView` is public to anyone with the signed token. It returns full per-file signed content URLs, not `content_prefix`, and does not include lockdown metadata. Authenticated owner/member Agent View routes may include explicit lockdown metadata for dashboard-visible locked Artifacts. -`file-content` reads one stored file's decrypted plaintext for the owning Workspace Member so an agent can diff against it and revise with a unified-diff patch ([ADR 0089](../adr/0089-agent-file-read-back-api-decrypts-member-plaintext.md)). Inputs: `?path=` (required; query, not a path segment, since a file path may contain `/`) and `?revision_id=` (optional; defaults to latest). The response `ArtifactFileContent` is `{ path, sha256, size_bytes, content_type, is_binary, body? }`: `body` is the decoded UTF-8 text and is present only when the file is text and `≤ 10 MiB`. `is_binary` is byte-derived (true binary only); a text file over the inline cap returns `is_binary: false` with `body` absent (the agent fetches it via the content URL or uploads a whole blob), and an oversize file is returned as metadata **without reading R2**. This is the only `api` route that decrypts artifact bytes; the blob key is derived from the RLS-scoped row's plaintext `sha256` plus the actor's workspace, never from client input, and a missing/undecryptable blob is `storage_unavailable` (503), never `not_found`. `AgentView` file entries also carry an optional plaintext `sha256` so an agent can detect what changed before reading a file back. +`file-content` reads one stored file's decrypted plaintext for the owning Workspace Member so an agent can diff against it and revise with a unified-diff patch ([ADR 0090](../adr/0090-agent-file-read-back-api-decrypts-member-plaintext.md)). Inputs: `?path=` (required; query, not a path segment, since a file path may contain `/`) and `?revision_id=` (optional; defaults to latest). The response `ArtifactFileContent` is `{ path, sha256, size_bytes, content_type, is_binary, body? }`: `body` is the decoded UTF-8 text and is present only when the file is text and `≤ 10 MiB`. `is_binary` is byte-derived (true binary only); a text file over the inline cap returns `is_binary: false` with `body` absent (the agent fetches it via the content URL or uploads a whole blob), and an oversize file is returned as metadata **without reading R2**. This is the only `api` route that decrypts artifact bytes; the blob key is derived from the RLS-scoped row's plaintext `sha256` plus the actor's workspace, never from client input, and a missing/undecryptable blob is `storage_unavailable` (503), never `not_found`. `AgentView` file entries also carry an optional plaintext `sha256` so an agent can detect what changed before reading a file back. ## Upload Routes @@ -148,7 +148,7 @@ Rules: hex SHA-256 for each file. Legacy clients that omit it keep the full-upload revision-object path and do not participate in deduplication. - `base_revision_id`, `deleted_paths`, and per-file `patch` are the optional - commit-chain / partial-manifest inputs ([ADR 0088](../adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)). + commit-chain / partial-manifest inputs ([ADR 0089](../adr/0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)). When `base_revision_id` is set, `files` lists only changed and added paths, `deleted_paths` drops paths, and every other path inherits from the base Revision by reference. A per-file `patch` (`{ base_sha256, format: "unified", diff --git a/docs/specs/cli.md b/docs/specs/cli.md index 77946e2d..af850c20 100644 --- a/docs/specs/cli.md +++ b/docs/specs/cli.md @@ -49,7 +49,7 @@ automatic; flags override detection. command, which mints or reuses the one Share Link and prints its no-login Access Link Signed URL. - `pull [--revision-id ]` reads one stored file back - ([ADR 0089](../adr/0089-agent-file-read-back-api-decrypts-member-plaintext.md)). + ([ADR 0090](../adr/0090-agent-file-read-back-api-decrypts-member-plaintext.md)). Default output is cat-like (the raw text body to stdout, so `pull … > file` works); `--json` emits `{ schema_version, path, sha256, size_bytes, is_binary, body? }`. A binary file has no inline body: `--json` reports `is_binary: true` @@ -59,7 +59,7 @@ body? }`. A binary file has no inline body: `--json` reports `is_binary: true` ## Incremental revise (manifest cache + diffs) On a revise (`publish --artifact-id `), the CLI sends only what -changed instead of the whole tree ([ADR 0089](../adr/0089-agent-file-read-back-api-decrypts-member-plaintext.md)). +changed instead of the whole tree ([ADR 0090](../adr/0090-agent-file-read-back-api-decrypts-member-plaintext.md)). It caches the last published manifest per artifact (`paths + sha256 + revision_id`) under the CLI config dir and, on the next revise, diffs the working dir against that cache: unchanged files inherit by reference (not re-hashed, not re-uploaded), diff --git a/docs/specs/data-model.md b/docs/specs/data-model.md index 18430132..da1f2af4 100644 --- a/docs/specs/data-model.md +++ b/docs/specs/data-model.md @@ -96,7 +96,7 @@ First-class revision rows for multi-revision Artifacts ([0009](../../packages/db | `id` | `TEXT PRIMARY KEY` | `rev_...`. | | `workspace_id` | `UUID NOT NULL REFERENCES workspaces(id) ON DELETE RESTRICT` | Tenant scope. | | `artifact_id` | `TEXT NOT NULL REFERENCES artifacts(id) ON DELETE CASCADE` | Parent Artifact; deleting the Artifact deletes its revisions. | -| `parent_revision_id` | `TEXT NULL` | Commit-chain parent ([0088](../adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)); deferrable self-FK on `(workspace_id, artifact_id, parent_revision_id)` → `revisions(workspace_id, artifact_id, id)`, `ON DELETE SET NULL (parent_revision_id)`. `NULL` for roots. | +| `parent_revision_id` | `TEXT NULL` | Commit-chain parent ([0089](../adr/0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)); deferrable self-FK on `(workspace_id, artifact_id, parent_revision_id)` → `revisions(workspace_id, artifact_id, id)`, `ON DELETE SET NULL (parent_revision_id)`. `NULL` for roots. | | `revision_number` | `INTEGER NULL` | Assigned on publish; unique per Artifact when not null. Null while `status = 'draft'`. | | `status` | `TEXT NOT NULL` | `draft`, `published`, or `retained`. | | `entrypoint` | `TEXT NOT NULL` | Normalized file path. | @@ -135,7 +135,7 @@ For `storage_kind = 'revision'`, `r2_key` points at the legacy `storage_kind = 'blob'`, `r2_key` points at a workspace shared blob object under `workspaces/{workspaceId}/blobs/sha256/{prefix}/{sha256}`. -Under tree inheritance ([0088](../adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)), +Under tree inheritance ([0089](../adr/0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)), a Revision published against a base copies forward the base's unchanged `artifact_files` rows by reference (same `sha256` / `r2_key` / `storage_kind = 'blob'`), so a one-file change yields a full file tree but only one new blob. Only @@ -203,7 +203,7 @@ exposing scanner internals. | `expires_at` | `TIMESTAMPTZ NOT NULL` | Upload session TTL, typically 24 hours. | | `created_at` | `TIMESTAMPTZ NOT NULL` | | | `finalized_at` | `TIMESTAMPTZ NULL` | | -| `base_revision_id` | `TEXT NULL` | Base Revision this publish inherits from ([0088](../adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)). `NULL` is a full manifest. Copied to `revisions.parent_revision_id` when the tree merge runs at finalize. | +| `base_revision_id` | `TEXT NULL` | Base Revision this publish inherits from ([0089](../adr/0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)). `NULL` is a full manifest. Copied to `revisions.parent_revision_id` when the tree merge runs at finalize. | | `deleted_paths` | `JSONB NOT NULL DEFAULT '[]'` | Base paths this publish drops. Lets finalize tell a deleted path apart from an inherited one (both are base paths absent from the file manifest). | ### `upload_session_files` @@ -220,7 +220,7 @@ exposing scanner internals. | `storage_kind` | `TEXT NOT NULL DEFAULT 'revision'` | `revision` or `blob`. | | `uploaded_at` | `TIMESTAMPTZ NULL` | Set after successful PUT or existing blob reuse. | | `put_url_expires_at` | `TIMESTAMPTZ NOT NULL` | Session-level upper bound for PUT writes. Set to `upload_sessions.expires_at` at session creation. | -| `patch_base_sha256` | `TEXT NULL` | Intra-file delta ([0088](../adr/0088-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)): digest of the base Revision's file the uploaded unified diff applies to. | +| `patch_base_sha256` | `TEXT NULL` | Intra-file delta ([0089](../adr/0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)): digest of the base Revision's file the uploaded unified diff applies to. | | `patch_result_sha256` | `TEXT NULL` | Digest of the whole file the server reconstructs from the diff (applied synchronously at finalize); the committed `artifact_files` row is an ordinary `storage_kind='blob'` row at this sha. Both patch columns are `NULL` (whole-file upload) or both set (a `CHECK` enforces it). | Primary key `(upload_session_id, path)`. diff --git a/packages/api-client/src/index.ts b/packages/api-client/src/index.ts index a6b03f82..2092b9b8 100644 --- a/packages/api-client/src/index.ts +++ b/packages/api-client/src/index.ts @@ -160,7 +160,7 @@ export class ApiClient { artifacts = { // Read one stored file's decrypted plaintext + sha256 so the caller can diff - // against it for a patch revise (ADR 0089). revisionId pins the read + // against it for a patch revise (ADR 0090). revisionId pins the read // to a specific Revision; omit for the latest. readFile: (artifactId: ArtifactId | string, path: string, revisionId?: RevisionId | string) => { const query = new URLSearchParams({ path }); diff --git a/packages/api-client/src/publish.ts b/packages/api-client/src/publish.ts index 36b3f493..aba99a57 100644 --- a/packages/api-client/src/publish.ts +++ b/packages/api-client/src/publish.ts @@ -15,7 +15,7 @@ import type { UploadSessionId, } from "@agent-paste/contracts"; -/** A unified-diff patch a changed file is sent as instead of whole bytes (ADR 0089). */ +/** A unified-diff patch a changed file is sent as instead of whole bytes (ADR 0090). */ export type PublishFilePatch = { baseSha256: Sha256Hex; resultSha256: Sha256Hex; @@ -50,7 +50,7 @@ export type PublishInput = { /** * Present => a partial-manifest publish: `files` lists only changed/added paths * (some possibly as patches), `deletedPaths` drops paths, and every other path - * inherits from this base Revision by reference (ADR 0089). + * inherits from this base Revision by reference (ADR 0090). */ baseRevisionId?: RevisionId; deletedPaths?: FilePath[]; diff --git a/packages/contracts/src/agentView.ts b/packages/contracts/src/agentView.ts index 72096c10..0a6305ab 100644 --- a/packages/contracts/src/agentView.ts +++ b/packages/contracts/src/agentView.ts @@ -44,7 +44,7 @@ export const AgentViewFile = z.object({ url: UrlString, // Plaintext content address. Optional because diff-only/draft rows have no // materialized blob; an agent uses it to detect what changed before reading a - // file back to diff against (ADR 0089). Object is not strict, so this + // file back to diff against (ADR 0090). Object is not strict, so this // is a non-breaking add the MCP read_artifact safeParse already accepts. sha256: Sha256Hex.optional(), }); diff --git a/packages/contracts/src/artifacts.ts b/packages/contracts/src/artifacts.ts index 629f3876..d79a3ce7 100644 --- a/packages/contracts/src/artifacts.ts +++ b/packages/contracts/src/artifacts.ts @@ -49,7 +49,7 @@ export const DeleteArtifactResponse = z.object({ export type DeleteArtifactResponse = z.infer; // A member reading one stored file's decrypted plaintext so an agent can diff -// against it to produce a unified-diff patch revise (ADR 0089). +// against it to produce a unified-diff patch revise (ADR 0090). // `is_binary` is byte-derived (true binary only); `content_type` is path-derived, // so they may disagree (e.g. binary saved as .txt) — `is_binary` is authoritative // for deciding whether `body` is patchable text. `body` is the decoded UTF-8 text diff --git a/packages/contracts/src/mcp.test.ts b/packages/contracts/src/mcp.test.ts index 1cad3db8..c6aac65d 100644 --- a/packages/contracts/src/mcp.test.ts +++ b/packages/contracts/src/mcp.test.ts @@ -478,7 +478,7 @@ describe("MCP error mapping", () => { }); it("declares patch_conflict on every tool that forwards a finalize call", () => { - // finalize can surface patch_conflict (ADR 0088); a tool that forwards it must + // finalize can surface patch_conflict (ADR 0089); a tool that forwards it must // declare it, or an agent sees an error its contract never advertised (it slipped // out of publishChain once). Scoped to patch_conflict + the finalize route rather // than a full superset assertion, which would relitigate the deliberate exclusion diff --git a/packages/contracts/src/routes/registry.artifacts.ts b/packages/contracts/src/routes/registry.artifacts.ts index 7217591f..ae34a076 100644 --- a/packages/contracts/src/routes/registry.artifacts.ts +++ b/packages/contracts/src/routes/registry.artifacts.ts @@ -125,7 +125,7 @@ export const artifactRouteContracts = [ // The file path travels as ?path= (not a path segment): FilePath may contain // '/', which route-path building encodes and Hono ':param' will not match. // ?revision_id= pins the read to a specific Revision so a CLI diff base and - // its inherit base are the same Revision; absent => latest (ADR 0089). + // its inherit base are the same Revision; absent => latest (ADR 0090). path: "/v1/artifacts/{artifact_id}/file-content", auth: "api_key_or_mcp_oauth", scopes: ["read"], diff --git a/packages/contracts/src/uploadSessions.ts b/packages/contracts/src/uploadSessions.ts index b5b28619..c671bf22 100644 --- a/packages/contracts/src/uploadSessions.ts +++ b/packages/contracts/src/uploadSessions.ts @@ -18,7 +18,7 @@ import { z } from "./zod.js"; // Sha256Hex) keep working; agentView.ts and artifacts.ts also need it. export { Sha256Hex } from "./primitives.js"; -// A changed file may arrive as a patch against a base Revision's file (ADR 0088) +// A changed file may arrive as a patch against a base Revision's file (ADR 0089) // instead of whole bytes. When present, the bytes uploaded for this file entry are // the diff (so the entry's size_bytes/sha256 describe the diff), base_sha256 is the // digest of the file in the base Revision the diff applies to, and result_sha256 is @@ -43,7 +43,7 @@ export type UploadSessionFileInput = z.infer; // client input. Clients (CLI, MCP) cannot request or influence artifact lifetime. // render_mode is an explicit client override; when absent the server infers it // from the entrypoint extension at publish time. -// base_revision_id turns this into a partial-manifest publish (ADR 0088): files +// base_revision_id turns this into a partial-manifest publish (ADR 0089): files // lists only changed/added paths, deleted_paths drops paths, and every other path // inherits from the base Revision by reference. deleted_paths and per-file patches // are only meaningful against a base. Structural checks live here; the stateful diff --git a/packages/db/migrations/0024_revisions_parent_revision_id.sql b/packages/db/migrations/0024_revisions_parent_revision_id.sql index 72251485..748b3455 100644 --- a/packages/db/migrations/0024_revisions_parent_revision_id.sql +++ b/packages/db/migrations/0024_revisions_parent_revision_id.sql @@ -1,6 +1,6 @@ begin; --- Revision commit chain (ADR 0088): a Revision may point at the Revision it was +-- Revision commit chain (ADR 0089): a Revision may point at the Revision it was -- published against. NULL means a root (every pre-existing row is a root; no -- backfill). The composite self-FK references (workspace_id, artifact_id, id) so -- a parent is structurally guaranteed to live in the same Workspace and Artifact. diff --git a/packages/db/migrations/0025_upload_session_base_revision_and_patch.sql b/packages/db/migrations/0025_upload_session_base_revision_and_patch.sql index 86055bda..e263a371 100644 --- a/packages/db/migrations/0025_upload_session_base_revision_and_patch.sql +++ b/packages/db/migrations/0025_upload_session_base_revision_and_patch.sql @@ -1,4 +1,4 @@ --- ADR 0088 Stage 3: partial-manifest publish carriers on the upload session. +-- ADR 0089 Stage 3: partial-manifest publish carriers on the upload session. -- -- upload_sessions.base_revision_id records the Revision this publish inherits -- from; the tree merge runs at finalize and copies it to diff --git a/packages/db/src/agent-view.ts b/packages/db/src/agent-view.ts index 44560253..9eeda16f 100644 --- a/packages/db/src/agent-view.ts +++ b/packages/db/src/agent-view.ts @@ -66,7 +66,7 @@ export function buildAgentView( object_key: file.r2_key, url: `${prefix}/${encodePath(file.path)}`, // Plaintext content address so an agent can detect changes and declare a - // patch base (ADR 0089). Omitted for non-blob/diff-only rows. + // patch base (ADR 0090). Omitted for non-blob/diff-only rows. ...(file.sha256 ? { sha256: file.sha256 } : {}), })), safety_warnings: warnings.slice(0, 100).map(toAgentViewSafetyWarning), diff --git a/packages/db/src/index.test.ts b/packages/db/src/index.test.ts index 8995d50a..b8a21cc8 100644 --- a/packages/db/src/index.test.ts +++ b/packages/db/src/index.test.ts @@ -2318,7 +2318,7 @@ function fakeReconstructor(options?: { } // Publish a base Revision whose files are blob-backed (sha256 set + uploaded), so -// they are eligible to inherit forward under ADR 0088 tree inheritance. +// they are eligible to inherit forward under ADR 0089 tree inheritance. async function publishBlobBackedBase( repo: LocalRepository, actor: ApiActor, @@ -2368,7 +2368,7 @@ async function publishBlobBackedBase( return { artifactId: published.artifact_id, revisionId: published.revision_id }; } -describe("ADR 0088 tree inheritance", () => { +describe("ADR 0089 tree inheritance", () => { it("inherits unchanged blob-backed files from the base and adds one new blob", async () => { const { repo, actor } = await localRepoWithApiActor(); const base = await publishBlobBackedBase( diff --git a/packages/db/src/postgres/revision-reconstructor.ts b/packages/db/src/postgres/revision-reconstructor.ts index c2406556..f67e6613 100644 --- a/packages/db/src/postgres/revision-reconstructor.ts +++ b/packages/db/src/postgres/revision-reconstructor.ts @@ -8,7 +8,7 @@ import { } from "@agent-paste/storage"; import { RevisionReconstructionConflict, type RevisionReconstructor } from "../types.js"; -// ADR 0088 Stage 4: builds the reconstructor that applies an agent-uploaded unified diff +// ADR 0089 Stage 4: builds the reconstructor that applies an agent-uploaded unified diff // to a base blob and stores the whole result as an ordinary content-addressed blob, // SYNCHRONOUSLY at finalize and BEFORE the new Revision commits. A clean patch yields a // blob the rest of the system treats like any other; a patch that cannot apply throws an diff --git a/packages/db/src/repository-error.ts b/packages/db/src/repository-error.ts index eae4a657..6433b834 100644 --- a/packages/db/src/repository-error.ts +++ b/packages/db/src/repository-error.ts @@ -104,7 +104,7 @@ const repositoryErrorToAppErrorMap: Record Promise; - // Applies intra-file unified-diff patches before commit (ADR 0088 Stage 4). Only + // Applies intra-file unified-diff patches before commit (ADR 0089 Stage 4). Only // exercised when the session has patched files; absent on full-manifest finalizes. revisionReconstructor?: RevisionReconstructor; }, @@ -461,7 +461,7 @@ export async function finalizeUploadSessionInEntities( repositoryError("upload_incomplete"); } } - // Tree inheritance (ADR 0088): against a base Revision the committed tree is the + // Tree inheritance (ADR 0089): against a base Revision the committed tree is the // merged base + delta, so file_count/size_bytes and the artifact_files rows come // from the merge (the session row counts only the changed manifest). validateUpload // re-checks caps + entrypoint against the real published tree (an inherited path @@ -492,7 +492,7 @@ export async function finalizeUploadSessionInEntities( id: session.revision_id, workspace_id: session.workspace_id, artifact_id: session.artifact_id, - // Set when publishing against a base Revision (ADR 0088 tree inheritance). + // Set when publishing against a base Revision (ADR 0089 tree inheritance). parent_revision_id: parentRevisionId, revision_number: null, status: "draft", diff --git a/packages/db/src/schema.ts b/packages/db/src/schema.ts index d34fd937..55874026 100644 --- a/packages/db/src/schema.ts +++ b/packages/db/src/schema.ts @@ -197,7 +197,7 @@ export const uploadSessions = pgTable( expiresAt: timestamp("expires_at", { withTimezone: true }).notNull(), createdAt: timestamp("created_at", { withTimezone: true }).notNull(), finalizedAt: timestamp("finalized_at", { withTimezone: true }), - // Base Revision this publish inherits from (ADR 0088 tree inheritance). Null = full + // Base Revision this publish inherits from (ADR 0089 tree inheritance). Null = full // manifest. Copied to revisions.parent_revision_id when the merge runs at finalize. baseRevisionId: text("base_revision_id"), // Base paths this publish drops. Needed to tell a deleted path apart from an @@ -231,7 +231,7 @@ export const uploadSessionFiles = pgTable( storageKind: text("storage_kind").notNull().default("revision"), uploadedAt: timestamp("uploaded_at", { withTimezone: true }), putUrlExpiresAt: timestamp("put_url_expires_at", { withTimezone: true }).notNull(), - // Intra-file delta descriptor (ADR 0088). When set, the uploaded bytes are a + // Intra-file delta descriptor (ADR 0089). When set, the uploaded bytes are a // unified diff against the base file; jobs reconstructs the whole result blob // (Stage 4). base = digest of the base Revision's file, result = digest of the // reconstructed whole file. Both null (whole-file upload) or both set. diff --git a/packages/db/src/types.ts b/packages/db/src/types.ts index 49f4596c..5c950d5a 100644 --- a/packages/db/src/types.ts +++ b/packages/db/src/types.ts @@ -209,7 +209,7 @@ export type UploadSession = { expires_at: string; created_at: string; finalized_at: string | null; - // Base Revision this publish inherits from (ADR 0088); null = full manifest. + // Base Revision this publish inherits from (ADR 0089); null = full manifest. base_revision_id: string | null; // Base paths this publish drops (distinguishes deleted from inherited at finalize). deleted_paths: string[]; @@ -239,7 +239,7 @@ export type StoredFile = { storage_kind?: StoredFileStorageKind; uploaded_at: string | null; put_url_expires_at?: string; - // ADR 0088 intra-file delta descriptor (recorded on upload_session_files only). + // ADR 0089 intra-file delta descriptor (recorded on upload_session_files only). // Both null (whole-file upload) or both set; jobs reconstructs the result blob. patch_base_sha256?: string | null; patch_result_sha256?: string | null; @@ -291,7 +291,7 @@ export type ReparentBlobMigrator = { migrate(input: { fromWorkspaceId: string; toWorkspaceId: string; blobs: readonly WorkspaceBlobRef[] }): Promise; }; -// ADR 0088 Stage 4: a patched file in a partial-manifest publish uploads only a unified +// ADR 0089 Stage 4: a patched file in a partial-manifest publish uploads only a unified // diff. Before the new Revision can commit, the diff is applied to the base blob and the // whole result stored as an ordinary content-addressed blob. The reconstructor takes // VALIDATED descriptors (base/result sha already checked against the base Revision's own @@ -347,6 +347,6 @@ export type RepositoryOptions = { billingEnabled?: boolean; /** Copies workspace blob bytes into the destination tenant before claim reparent commits. */ reparentBlobMigrator?: ReparentBlobMigrator; - /** Applies intra-file unified-diff patches to base blobs before a partial-manifest finalize commits (ADR 0088). */ + /** Applies intra-file unified-diff patches to base blobs before a partial-manifest finalize commits (ADR 0089). */ revisionReconstructor?: RevisionReconstructor; }; diff --git a/packages/db/src/validation.ts b/packages/db/src/validation.ts index e64d051c..a7f3f3e8 100644 --- a/packages/db/src/validation.ts +++ b/packages/db/src/validation.ts @@ -6,7 +6,7 @@ export function validateUpload( files: Array<{ path: string; size_bytes: number }>, usagePolicy: Pick, entrypoint = "index.html", - // A partial-manifest publish (ADR 0088) validates the uploaded delta here for + // A partial-manifest publish (ADR 0089) validates the uploaded delta here for // per-file/count caps only; the entrypoint and artifact-size cap are checked // against the merged tree at finalize, where the inherited paths are known. options: { wholeTree?: boolean } = { wholeTree: true }, diff --git a/packages/storage/src/unified-diff.ts b/packages/storage/src/unified-diff.ts index 9c6762ed..900ea432 100644 --- a/packages/storage/src/unified-diff.ts +++ b/packages/storage/src/unified-diff.ts @@ -1,4 +1,4 @@ -// ADR 0088 Stage 4 intra-file delta: apply an agent-uploaded unified diff to a base +// ADR 0089 Stage 4 intra-file delta: apply an agent-uploaded unified diff to a base // blob and commit the whole reconstructed result. A patch that cannot be applied // cleanly is a first-class, agent-visible CONFLICT (the agent re-submits a corrected // diff), never a silent failure. Reconstruction is byte-exact: the result digest must diff --git a/packages/storage/src/workspace-blob-bytes.ts b/packages/storage/src/workspace-blob-bytes.ts index 21a631be..505ec2db 100644 --- a/packages/storage/src/workspace-blob-bytes.ts +++ b/packages/storage/src/workspace-blob-bytes.ts @@ -1,5 +1,5 @@ // Read and write workspace content-addressed blobs through the ADR 0063 encryption -// ring. ADR 0088 Stage 4 reconstruction (decrypt a base blob, apply a patch, store +// ring. ADR 0089 Stage 4 reconstruction (decrypt a base blob, apply a patch, store // the result blob) and Bundle generation both perform exactly this encrypt-and-store // / decrypt-by-sha dance; these helpers are the shared ends so neither re-derives the // blob AAD context or the object key by hand. diff --git a/packages/worker-runtime/src/route-repository-errors.ts b/packages/worker-runtime/src/route-repository-errors.ts index 2595b95d..34e00e64 100644 --- a/packages/worker-runtime/src/route-repository-errors.ts +++ b/packages/worker-runtime/src/route-repository-errors.ts @@ -98,7 +98,7 @@ export const routeRepositorySurfaces = { "uploadSessions.putFile": [], "uploadSessions.finalize": [ // The five base-* kinds collapse to invalid_request on the wire; declared here so - // this surface reflects what finalizeUploadSession can actually throw (ADR 0089). + // this surface reflects what finalizeUploadSession can actually throw (ADR 0090). "base_revision_artifact_mismatch", "base_revision_not_found", "base_revision_not_publishable", diff --git a/scripts/smoke-local-patch.mjs b/scripts/smoke-local-patch.mjs index 903ceac3..4c9847b1 100644 --- a/scripts/smoke-local-patch.mjs +++ b/scripts/smoke-local-patch.mjs @@ -1,7 +1,7 @@ #!/usr/bin/env node import { spawn } from "node:child_process"; // End-to-end smoke for the Git-like revision model: Stage 4 intra-file patch -// reconstruction (ADR 0088) plus Stage 5 agent read-back (ADR 0089). Unlike the +// reconstruction (ADR 0089) plus Stage 5 agent read-back (ADR 0090). Unlike the // unit/integration tests (which use a fake reconstructor), this drives the REAL path: // boots the local MVP server (real encryption ring + in-memory R2 that round-trips // ciphertext), publishes a base Revision with known bytes, then create-session with a From 0c946639630ecfb7d5f85563d987ab2ad4b87765 Mon Sep 17 00:00:00 2001 From: Isaac Suttell Date: Sun, 14 Jun 2026 19:03:13 -0700 Subject: [PATCH 09/14] =?UTF-8?q?docs(adr):=20record=20ADR=200091=20?= =?UTF-8?q?=E2=80=94=20shared=20revise=20engine=20+=20literal=20multi-edit?= =?UTF-8?q?=20tools?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Design for @agent-paste/revise-core: a pure applyEdits core, a RevisionReader read-side seam (twin of PublishTransport), and a reviseOnePath orchestrator that both the CLI `edit` verb and an MCP `multi_edit` tool drive, plus a rebuilt MCP `add_revision` that preserves the artifact title (fixing the "Revision" overwrite bug) and sends a verified patch. Strict fail-fast; moves diffWithSelfCheck out of apps/cli so MCP can share it; finalize render_mode inheritance invariant. Records the planned spec in docs/ops/git-like-revisions-todo.md (live cli/mcp specs update when the code lands). Builds on ADR 0090; reverses its "diff half stays CLI-only" deferral. Co-Authored-By: Claude Opus 4.8 --- ...de-revise-engine-and-literal-edit-tools.md | 149 ++++++++++++++++++ docs/adr/README.md | 1 + docs/ops/git-like-revisions-todo.md | 65 ++++++++ 3 files changed, 215 insertions(+) create mode 100644 docs/adr/0091-client-side-revise-engine-and-literal-edit-tools.md diff --git a/docs/adr/0091-client-side-revise-engine-and-literal-edit-tools.md b/docs/adr/0091-client-side-revise-engine-and-literal-edit-tools.md new file mode 100644 index 00000000..aa13c41a --- /dev/null +++ b/docs/adr/0091-client-side-revise-engine-and-literal-edit-tools.md @@ -0,0 +1,149 @@ +# Client-Side Revise Engine and Literal Multi-Edit Tools + +[ADR 0090](./0090-agent-file-read-back-api-decrypts-member-plaintext.md) gave an +agent the read half of the patch loop (`read_file` returns a stored file's +plaintext + `sha256`) but deliberately left the diff half **CLI-only**: the CLI +diffs the working directory against a local manifest cache, and MCP `add_revision` +still re-uploads the whole entrypoint body every time. That split has two costs. + +First, **MCP cannot revise incrementally at all.** `add_revision` builds exactly +one whole-file `PublishFile` from the inline `body` and sends it — no +`base_revision_id`, no patch — even though the shared `runPublish` +([ADR 0084](./0084-cli-and-mcp-share-one-publish-path.md)) and the +[ADR 0089](./0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md) +upload contract already accept patched, partial-manifest input. The capability +exists end to end; the MCP caller just never populates it. + +Second, **there is no surface-shared "edit this file" verb.** An agent that wants +to change a few lines of an existing artifact has to re-send the whole file (MCP) +or have a working directory to diff against (CLI). Neither matches how agents +actually express edits — as `{old_string, new_string}` replacements. The +read-back from ADR 0090 makes a true edit verb possible: read the server's bytes, +apply literal replacements, send a verified diff. But building it twice (once per +surface) would re-introduce exactly the CLI/MCP divergence ADR 0084 forbids. + +The shared write path already proves the pattern: one `runPublish` sequence behind +a narrow four-method `PublishTransport` seam, two adapters (CLI over HTTPS, MCP +over Worker service bindings), all error mapping pushed to the adapters. The read +path has no such seam — each surface reads ad hoc — so a shared edit engine has +nothing symmetric to stand on. + +## Decision + +Extract a single **client-side revise engine** that both surfaces drive, and route +three entry points through it: the CLI `edit` verb, an MCP `multi_edit` tool, and a +**rebuilt** MCP `add_revision`. + +1. **New package `@agent-paste/revise-core`.** Pure, transport-agnostic. Depends + only on `@agent-paste/storage` (the byte-exact applier) and + `@agent-paste/contracts` (branded types). No network, no fs, no `ApiClient`, + no Worker bindings — so both the Node CLI and the Worker MCP bundle can import + it. + +2. **`applyEdits(body, edits[])` — the literal-edit core.** Ordered + `{ oldString, newString, replaceAll? }` replacements. Matching is **literal** + (`indexOf` scan, never a constructed `RegExp` — no escaping bugs, no ReDoS). An + `oldString` that does not occur is `not_found`; one that occurs more than once + without `replaceAll` is `not_unique`; an empty `oldString` is rejected. Edits + apply in sequence (edit _n_ sees edit _n−1_'s output). Pure string in, result + or typed failure out — it never hashes, reads, or publishes. + +3. **`RevisionReader` — the read-side seam, twin of `PublishTransport`.** + `{ readArtifact, readFile }`. `readArtifact` resolves the base revision's + identity (`base_revision_id`, `entrypoint`, `title`) from the Agent View; + `readFile` returns a stored file's plaintext + `sha256` (ADR 0090). The CLI + adapter calls the HTTPS `ApiClient`; the MCP adapter forwards + `agentView.getLatest` + `artifacts.fileContent` over service bindings — exactly + how each already implements `PublishTransport`. + +4. **`reviseOnePath({ reader, transport }, …)` — the orchestrator.** Read base + identity + body → `applyEdits` → `diffWithSelfCheck` (moved into this package + from the CLI) → build a partial-manifest `PublishInput` (only the edited path; + every other path inherits from `base_revision_id`) → `runPublish`. It + interprets nothing it does not own; reader and transport adapters map their own + errors, mirroring `runPublish`. + +5. **Strict fail-fast.** For an edit, `not_found` / `not_unique` / empty + `oldString` (client, before any network), a binary or oversize base (no inline + body to diff), a missing target path, and a server `patch_conflict` at finalize + are all **hard errors** — never a silent whole-blob fallback. The single + non-error fallback is the legitimate "the generated diff is not smaller than the + whole file" case, which sends a whole-file entry **still under + `base_revision_id`** (the result is `sha256`-verified, so this is not a + conflict swallow). A `patch_conflict` from a base that moved underneath + (TOCTOU) is retried **once** by re-reading the now-current base and re-applying + the literal edits; if the edit no longer matches the fresh base, that surfaces + as `not_found` — correct, the edit is stale and the agent must re-read. + +6. **MCP `add_revision` rebuilt to preserve identity and patch.** Today it + overwrites the artifact title with the literal `"Revision"` on every call — a + **bug**, not a contract. Rebuilt, it reads the base revision and **preserves the + existing title** (use `update_display_metadata` to rename). When the call's + `render_mode` selects an entrypoint that differs from the base's, it bypasses + the diff path and publishes a whole-file fresh-entrypoint revision (the only way + a whole-body replace is meaningful); when the entrypoint matches, it diffs the + new body against the stored entrypoint and sends a verified patch. A body + identical to the current entrypoint (`sha256` equal) is a no-op that mints no + revision. The idempotency key stays a pure function of the tool arguments — the + added base read never feeds it. + +7. **`multi_edit` (MCP) and `edit` (CLI) are thin entry points.** Both take + `{ artifact_id, path, edits[] }` (CLI via repeatable `--old`/`--new` pairs plus + `--replace-all`) and call `reviseOnePath`. `multi_edit` requires both `read` and + `publish` scopes (it reads the base and publishes the revision). + +8. **`render_mode` inheritance invariant.** Finalize resolves a revision's + `render_mode` as `session ?? base ?? infer(entrypoint)` so a partial-manifest + revise that does not re-send `render_mode` inherits the base revision's mode + rather than silently re-inferring it from the entrypoint. The edit verbs never + change the entrypoint, so they dodge the trap; the invariant closes it for any + future server-read revise. + +## Considered Options + +- **A thin `applyEdits` helper with four injected callbacks** (`readArtifact`, + `readFile`, `diff`, `sha256`). Rejected as speculative indirection: four ad-hoc + callbacks is not a seam, and it fails the deletion test. The symmetric + `RevisionReader` port — the read-side twin of the existing `PublishTransport`, + with two real adapters — is the honest boundary. +- **A server-side edit route** (`POST` `{old,new}[]`, server reads + replaces + + commits). Rejected. The client already holds the bytes via `read_file`; a server + route duplicates a capability the client can assemble and puts string-replace on + the byte-decryption tier for no isolation gain. Correctness is already enforced + server-side by the finalize `sha256` re-validation, so the replace can live on + the client without trusting it. +- **Keep the diff generator CLI-private and inject it into MCP.** Rejected. MCP + cannot import from `apps/cli`; parity requires MCP generate the same verified + diffs, so the generator must move into the shared package. The move is what makes + "CLI and MCP have identical functionality" true in code rather than as two + reimplementations that drift ([ADR 0084](./0084-cli-and-mcp-share-one-publish-path.md)). +- **Agent sends a unified diff directly** (instead of old/new pairs). Rejected. + Agents hand-write incorrect diffs — the reason `diffWithSelfCheck` exists. Literal + old/new with server-verified reconstruction is the ergonomic, safe contract. +- **A whole-blob fallback on `patch_conflict` for `edit`** (as `publish` does). + Rejected. `publish`'s working directory is the source of truth, so a stale base + → re-publish the whole dir is correct. `edit`'s intent is "apply these exact + replacements to the server's content"; papering a conflict with a whole upload + would discard the conflict the agent must see. Edit is strict where publish is + forgiving. +- **Preserve the `"Revision"` title behavior.** Rejected — it is a bug. The + rebuild fixes it and a regression test asserts the title is preserved. + +## Consequences + +- One revise engine, three entry points. MCP gains incremental patch-revise + (closing the loop ADR 0090 deferred), the CLI gains a working-dir-free `edit`, + and `add_revision` stops clobbering titles — all from shared code, so the two + surfaces cannot diverge. +- `add_revision`'s title change is the one observable public-contract change. It is + recorded in `docs/specs/mcp.md` and `docs/specs/cli.md`; existing tests pinning + `"Revision"` are removed (they pinned a bug). +- `diffWithSelfCheck` and the unified-diff generator move out of `apps/cli` into + `@agent-paste/revise-core`; the CLI's existing working-dir revise imports them + from there, eliminating the would-be second copy. +- The `render_mode` inheritance fix changes finalize's inherit semantics; it is + gated behind the finalize re-validation tests. +- Builds on [ADR 0090](./0090-agent-file-read-back-api-decrypts-member-plaintext.md) + (read-back), [ADR 0089](./0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md) + (commit chain + delta), and [ADR 0084](./0084-cli-and-mcp-share-one-publish-path.md) + (one publish path); reverses ADR 0090's "the diff half stays CLI-only" deferral. diff --git a/docs/adr/README.md b/docs/adr/README.md index 31ffc438..8fb2b2f1 100644 --- a/docs/adr/README.md +++ b/docs/adr/README.md @@ -44,6 +44,7 @@ This directory is the decision log for agent-paste: it records _why_ choices wer - [ADR 0088](./0088-workspace-scoped-content-addressed-blob-deduplication.md) records (retroactively) the shipped workspace-scoped content-addressed blob model: a client-supplied plaintext `sha256` lets `createUploadSession` return `reused` for files whose `(workspace_id, sha256, size_bytes)` blob already exists, so unchanged bytes skip the PUT. Blobs live at `workspaces/{wid}/blobs/sha256/{prefix}/{sha256}`, are encrypted under the [ADR 0063](./0063-application-layer-encryption-for-artifact-bytes.md) per-Workspace DEK with a path/revision-independent AAD `v2`, and are GC'd reference-counted without deleting the deterministic R2 object. Dedup is workspace-scoped (not global) to preserve the tenant boundary; the digest is verified on PUT. Whole-file only — the client must still re-enumerate the full manifest, which [ADR 0089](./0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md) addresses. - [ADR 0089](./0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md) makes **Revisions** behave like Git commits so an agent can express "change this file" instead of the whole tree. Adds `revisions.parent_revision_id` and a `base_revision_id` + `deleted_paths` + partial-manifest publish contract where unlisted paths inherit the parent tree by reference (`api`-side merge, full `artifact_files` tree still materialized). Layers server-reconstructed intra-file delta on top: a changed file may be sent as a unified diff (text; whole-blob fallback for binary), reconstructed and re-hashed to a whole blob **synchronously at finalize in `upload`** so a patch that cannot apply fails the same publish call with an agent-visible `patch_conflict` (a broken patch never becomes a servable draft) and `content` plus the [ADR 0063](./0063-application-layer-encryption-for-artifact-bytes.md) encryption boundary are untouched. Builds on [ADR 0088](./0088-workspace-scoped-content-addressed-blob-deduplication.md); defers chunk stores, per-block AEAD, Range serving, global dedup, and dropping encryption. - [ADR 0090](./0090-agent-file-read-back-api-decrypts-member-plaintext.md) closes the [ADR 0089](./0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md) patch loop for agents that lack the working directory. Adds an optional `sha256` to `AgentViewFile`, a member-authed `GET /v1/artifacts/{id}/file-content?path=&revision_id=` read route in **`api`** returning `{ path, sha256, size_bytes, content_type, is_binary, body? }` (text body when UTF-8 and ≤10 MiB; oversize/binary return metadata only, oversize skips the R2 read), and an MCP `read_file` tool. The boundary-relevant decision: **`api` now decrypts artifact bytes and returns plaintext** to the owning Member — it does not widen the [ADR 0063](./0063-application-layer-encryption-for-artifact-bytes.md) confidentiality boundary (the Member already owns the artifact and can fetch the same bytes via the signed `url`; encryption defends the platform tier, not the owner), and `content` is untouched. The blob key is derived from the RLS-scoped row's `sha256` + the actor's workspace, never client input. Adds the **CLI diff client** (per-artifact manifest cache + working-dir diff → partial manifest with verified unified diffs; stale-base → full-publish fallback) and an `agent-paste pull` verb; MCP `add_revision` stays text-body-only ([ADR 0084](./0084-cli-and-mcp-share-one-publish-path.md)). Amends ADR 0063's "decrypt-on-read is `content`-only" note. +- [ADR 0091](./0091-client-side-revise-engine-and-literal-edit-tools.md) extracts a shared client-side **revise engine** (`@agent-paste/revise-core`) so the CLI and MCP express edits identically: a pure `applyEdits` (ordered literal `{old,new,replace_all?}`, `indexOf` not regex, unique-or-throw), a `RevisionReader` read-side seam (the twin of `PublishTransport`), and a `reviseOnePath` orchestrator (read base → apply → `diffWithSelfCheck` → partial-manifest `runPublish`). Three entry points flow through it: the CLI `edit` verb, an MCP `multi_edit` tool, and a **rebuilt** MCP `add_revision` that now reads the base and **preserves the artifact title** (fixing the bug where it overwrote it with `"Revision"`) and sends a verified patch (whole-file fallback only when `render_mode` changes the entrypoint or the diff is not smaller). Strict fail-fast: no-match / non-unique / binary or oversize base / `patch_conflict` are hard errors (no silent whole-blob fallback), with a single re-read+retry on a TOCTOU `patch_conflict`. Moves `diffWithSelfCheck` + the diff generator out of `apps/cli` into the package (MCP cannot import `apps/cli`), and makes finalize inherit `render_mode` as `session ?? base ?? infer(entrypoint)`. Reverses ADR 0090's "the diff half stays CLI-only" deferral; builds on [ADR 0090](./0090-agent-file-read-back-api-decrypts-member-plaintext.md) and [ADR 0084](./0084-cli-and-mcp-share-one-publish-path.md). - [ADR 0021](./0021-id-based-r2-object-key-layout.md) is amended for revision file keys. The ADR originally described env-scoped file keys; shipped revision files and upload PUT targets use the legacy `artifacts/{artifactId}/revisions/{revisionId}/files/{path}` prefix. Derived bundles and env-scoped purge prefixes remain env-scoped. Current shapes are in [`docs/specs/data-model.md`](../specs/data-model.md#r2-object-key-layout). - [ADR 0062](./0062-two-layer-cache-for-hot-path-auth-lookups.md) is amended for the L2 synthetic cache URL. The ADR originally used `https://cache.agent-paste.internal/{namespace}/{key}`; the shipped helper uses `https://agent-paste.internal/cache/{namespace}/{key}`. Current behavior is in [`docs/specs/architecture.md`](../specs/architecture.md#auth-lookup-cache). - [`packages/contracts`](../../packages/contracts) and [`docs/specs/contracts.md`](../specs/contracts.md) are the canonical MVP implementation contract for Zod schemas, ID formats, and the route registry. ADRs provide rationale; contracts provide field-level implementation shape. diff --git a/docs/ops/git-like-revisions-todo.md b/docs/ops/git-like-revisions-todo.md index 12a15357..5fb5cbe8 100644 --- a/docs/ops/git-like-revisions-todo.md +++ b/docs/ops/git-like-revisions-todo.md @@ -232,3 +232,68 @@ the read-back and the CLI diff client. so a broken patch never becomes a servable draft. There is therefore no pending-state model and no `reconstruction_status`. See the ADR 0089 Stage 4 implementation notes. + +## Next phase: shared revise engine + literal multi-edit (ADR 0091) + +Status: design accepted (ADR 0091), not yet implemented. Lands AFTER the Stage +1–5 foundation (PR #529) merges, so the engine is built on settled code. This +section is the planned spec; the `docs/specs/cli.md` and `docs/mcp.md` live +sections are updated only when the code lands (specs are current truth). + +**Supersedes** the "MCP `add_revision` stays text-body-only" line above: MCP gets +a real patch-revise path, and both surfaces express edits identically. + +- **New package `@agent-paste/revise-core`** — pure, transport-agnostic (deps: + `@agent-paste/storage`, `@agent-paste/contracts` only). Importable by both the + Node CLI and the Worker MCP bundle. +- **`applyEdits(body, edits[])`** — ordered literal `{oldString, newString, +replaceAll?}`. `indexOf` matching (never a constructed `RegExp`). `not_found` if + absent, `not_unique` if >1 without `replaceAll`, reject empty `oldString`. Edit + _n_ sees edit _n−1_'s output. Pure in/out; never hashes/reads/publishes. +- **`RevisionReader { readArtifact, readFile }`** — the read-side seam, the twin + of `PublishTransport`. CLI adapter over `ApiClient`; MCP adapter forwards + `agentView.getLatest` + `artifacts.fileContent` over service bindings. +- **`reviseOnePath({reader, transport}, …)`** — read base identity + body → + `applyEdits` → `diffWithSelfCheck` → partial-manifest `runPublish` (only the + edited path; others inherit from `base_revision_id`). +- **Move `diffWithSelfCheck` + the unified-diff generator** out of `apps/cli` into + the package (MCP cannot import `apps/cli`); the CLI working-dir revise imports + them from there. No second copy. +- **Strict fail-fast** (the distinction from working-dir `publish`): `not_found` / + `not_unique` / binary base / oversize base / missing path / `patch_conflict` are + HARD errors — no silent whole-blob fallback. Only "diff not smaller than file" + sends a whole-file entry (still under `base_revision_id`, result-`sha256` + verified). A TOCTOU `patch_conflict` is retried ONCE by re-reading the current + base and re-applying the literal edits; if the edit no longer matches → surface + `not_found` (edit is stale, agent re-reads). +- **CLI `edit --old --new [...] [--replace-all]`** — + repeatable old/new pairs, strict pairing, routes through `reviseOnePath` via the + server read (no manifest cache needed; works on a fresh machine). +- **MCP `multi_edit { artifact_id, path, edits[] }`** — requires `read` + `publish` + scopes. Contracts wiring: input/output schemas, `McpToolName` enum, tool-schemas + maps, registry entry (forwards `agentView.getLatest` + `artifacts.fileContent` + then the publish chain; declares `patch_conflict`), `mcp.test.ts` registry + assertions. +- **MCP `add_revision` REBUILT** — reads the base and PRESERVES the existing title + (fixes the bug where it overwrote it with the literal `"Revision"`; rename via + `update_display_metadata`). Same-entrypoint → verified patch of the body against + the stored entrypoint; `render_mode` change → whole-file fresh-entrypoint publish + (the only meaningful whole-body replace); `sha256`-equal body → no-op, no + revision. Idempotency key stays a pure function of tool args (the read never + feeds it). Regression test: title preserved, not `"Revision"`. This is the one + observable public-contract change — record in `docs/mcp.md` + `docs/specs/cli.md` + when it lands; delete tests pinning `"Revision"` (they pinned a bug). +- **`render_mode` inheritance invariant** — finalize resolves `render_mode` as + `session ?? base ?? infer(entrypoint)` so a partial-manifest revise that omits + `render_mode` inherits the base's mode instead of silently re-inferring from the + entrypoint. Gated behind finalize re-validation tests. + +**Ship split:** PR1 = package + engine + move diff-gen + rebuild CLI/MCP onto it + +`add_revision` title fix + `render_mode` hardening (refactor, behavior-preserving +except the bug fix). PR2 = the `edit` + `multi_edit` verbs on top. + +**Done (planned):** `pnpm verify` + `test:coverage` (88/82/88/88) + a preview e2e +against a REAL multi-file artifact — `cli:dev edit` patches one file (others +reused, link stable), `mcporter multi_edit` patches another, `add_revision` +preserves title + live-updates, a `render_mode` flip uses the whole-file fallback, +a repeated identical edit is a no-op, and the viewer reflects the change live. From 733f649b2b95667801ccef8b990c2d0dc9e46af1 Mon Sep 17 00:00:00 2001 From: Isaac Suttell Date: Sun, 14 Jun 2026 19:06:33 -0700 Subject: [PATCH 10/14] fix(upload): allow delete-only revise (empty files) against a base revision A partial-manifest revise where every remaining file is unchanged but some paths are deleted produced an empty files manifest. Both CreateUploadSessionRequest (files.min(1)) and validateUpload (files.length === 0 -> file_count_cap_exceeded) rejected it, so delete-only revises failed instead of inheriting the base tree and dropping the paths. Make the min-1 file rule conditional on the publish kind: a whole publish (no base_revision_id) still requires at least one file; a base delta may send zero files as long as it deletes at least one path. validateUpload mirrors this for the partial-manifest path; the merged tree is still re-checked with the whole-tree caps (entrypoint + total size) at finalize. Bugbot finding on PR #529. Co-Authored-By: Claude Opus 4.8 --- packages/contracts/openapi/upload.json | 2 - packages/contracts/src/uploadSessions.test.ts | 22 ++++++++++ packages/contracts/src/uploadSessions.ts | 20 +++++++++- packages/db/src/index.test.ts | 40 +++++++++++++++++++ packages/db/src/validation.ts | 7 +++- 5 files changed, 87 insertions(+), 4 deletions(-) diff --git a/packages/contracts/openapi/upload.json b/packages/contracts/openapi/upload.json index 053dcc35..29804804 100644 --- a/packages/contracts/openapi/upload.json +++ b/packages/contracts/openapi/upload.json @@ -355,7 +355,6 @@ "size_bytes" ] }, - "minItems": 1, "maxItems": 100 } }, @@ -994,7 +993,6 @@ "size_bytes" ] }, - "minItems": 1, "maxItems": 100 } }, diff --git a/packages/contracts/src/uploadSessions.test.ts b/packages/contracts/src/uploadSessions.test.ts index 2fef2e3d..1cd7027b 100644 --- a/packages/contracts/src/uploadSessions.test.ts +++ b/packages/contracts/src/uploadSessions.test.ts @@ -78,6 +78,28 @@ describe("CreateUploadSessionRequest partial-manifest + patch", () => { expect(result.success).toBe(false); }); + it("accepts a delete-only delta (empty files) against a base revision", () => { + const parsed = CreateUploadSessionRequest.parse( + baseRequest({ + base_revision_id: baseRevisionId, + deleted_paths: ["old/page.html"], + files: [], + }), + ); + expect(parsed.files).toEqual([]); + expect(parsed.deleted_paths).toEqual(["old/page.html"]); + }); + + it("rejects a base delta with no changed files and no deletions", () => { + const result = CreateUploadSessionRequest.safeParse(baseRequest({ base_revision_id: baseRevisionId, files: [] })); + expect(result.success).toBe(false); + }); + + it("rejects an empty files manifest without base_revision_id", () => { + const result = CreateUploadSessionRequest.safeParse(baseRequest({ files: [] })); + expect(result.success).toBe(false); + }); + it("rejects deleted_paths with no base_revision_id", () => { const result = CreateUploadSessionRequest.safeParse(baseRequest({ deleted_paths: ["gone.html"] })); expect(result.success).toBe(false); diff --git a/packages/contracts/src/uploadSessions.ts b/packages/contracts/src/uploadSessions.ts index c671bf22..9a9c74a8 100644 --- a/packages/contracts/src/uploadSessions.ts +++ b/packages/contracts/src/uploadSessions.ts @@ -58,10 +58,20 @@ export const CreateUploadSessionRequest = z entrypoint: FilePath, render_mode: RenderMode.optional(), deleted_paths: z.array(FilePath).max(100).optional(), - files: z.array(UploadSessionFileInput).min(1).max(100), + // A whole publish needs at least one file; a partial-manifest delta against a base + // may send zero files (a delete-only revise inherits the rest), so the min(1) check + // is conditional and lives in the superRefine below, not on the array. + files: z.array(UploadSessionFileInput).max(100), }) .superRefine((request, ctx) => { if (request.base_revision_id === undefined) { + if (request.files.length === 0) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + path: ["files"], + message: "files must contain at least one entry without base_revision_id", + }); + } if (request.deleted_paths !== undefined) { ctx.addIssue({ code: z.ZodIssueCode.custom, @@ -77,6 +87,14 @@ export const CreateUploadSessionRequest = z message: "patch requires base_revision_id", }); } + } else if (request.files.length === 0 && (request.deleted_paths?.length ?? 0) === 0) { + // A partial-manifest delta must change something: zero files AND zero deletions + // would commit a Revision identical to its base, which is a client mistake. + ctx.addIssue({ + code: z.ZodIssueCode.custom, + path: ["files"], + message: "a base_revision_id delta needs at least one changed file or deleted path", + }); } const deleted = new Set(request.deleted_paths ?? []); if (deleted.size !== (request.deleted_paths?.length ?? 0)) { diff --git a/packages/db/src/index.test.ts b/packages/db/src/index.test.ts index b8a21cc8..8326fd9f 100644 --- a/packages/db/src/index.test.ts +++ b/packages/db/src/index.test.ts @@ -2521,6 +2521,46 @@ describe("ADR 0089 tree inheritance", () => { expect(finalized.file_count).toBe(2); }); + it("finalizes a delete-only delta with no uploaded files", async () => { + const { repo, actor } = await localRepoWithApiActor(); + const base = await publishBlobBackedBase( + repo, + actor, + "delete-only", + [ + { path: "index.html", size_bytes: 12, sha256: sha("a") }, + { path: "b.css", size_bytes: 20, sha256: sha("b") }, + { path: "c.js", size_bytes: 30, sha256: sha("c") }, + ], + "2026-01-01T00:00:00.000Z", + ); + const session = await repo.createUploadSession({ + actor, + idempotencyKey: "idem-delete-only-create", + request: { + artifact_id: base.artifactId, + base_revision_id: base.revisionId, + title: "delete-only", + entrypoint: "index.html", + deleted_paths: ["c.js"], + files: [], + }, + now: "2026-01-02T00:00:00.000Z", + }); + expect(session.files).toEqual([]); + const finalized = await repo.finalizeUploadSession({ + actor, + idempotencyKey: "idem-delete-only-finalize", + sessionId: session.upload_session_id, + observedFiles: [], + now: "2026-01-02T00:00:02.000Z", + }); + const files = [...repo.artifactFiles.values()].filter((file) => file.revision_id === finalized.revision_id); + expect(files.map((file) => file.path).sort()).toEqual(["b.css", "index.html"]); + expect(finalized.file_count).toBe(2); + expect(repo.revisions.get(finalized.revision_id)?.parent_revision_id).toBe(base.revisionId); + }); + it("inherits the entrypoint when it is unchanged", async () => { const { repo, actor } = await localRepoWithApiActor(); const base = await publishBlobBackedBase( diff --git a/packages/db/src/validation.ts b/packages/db/src/validation.ts index a7f3f3e8..b52f0314 100644 --- a/packages/db/src/validation.ts +++ b/packages/db/src/validation.ts @@ -11,7 +11,12 @@ export function validateUpload( // against the merged tree at finalize, where the inherited paths are known. options: { wholeTree?: boolean } = { wholeTree: true }, ) { - if (files.length === 0 || files.length > usagePolicy.file_count_cap) { + // A partial-manifest delta (ADR 0089) may carry zero files: a delete-only revise + // inherits the rest of the base tree, so only the upper bound applies here. The + // whole-tree publish still requires at least one file. The merged tree is re-checked + // at finalize with wholeTree, where the entrypoint and total-size caps run. + const minFiles = options.wholeTree === false ? 0 : 1; + if (files.length < minFiles || files.length > usagePolicy.file_count_cap) { repositoryError("file_count_cap_exceeded"); } let total = 0; From 3d9a73c3d757d33fe1dc1d44f20513e0bc9b9a75 Mon Sep 17 00:00:00 2001 From: Isaac Suttell Date: Sun, 14 Jun 2026 19:24:56 -0700 Subject: [PATCH 11/14] fix(cli): fall back to full publish on no-op delta; guard claim-token leak + LCS blowup - runPublish drops the base and sends a whole-blob manifest when the revise plan produces no changed files and no deletions, so an unchanged working tree revise no longer sends an empty delta the server rejects (bugbot). - publish-format only treats the claim token in the URL query string as a leak, not a coincidental fragment substring (CodeRabbit). - unified-diff-gen returns null instead of attempting an LCS over more than MAX_LCS_CELLS cells, so a pathological diff falls back to a whole blob rather than pinning a core (CodeRabbit). - Corrected the pull/read-back comment that wrongly claimed base64 bytes ride in the JSON; oversize text/binary is metadata-only. Co-Authored-By: Claude Opus 4.8 --- apps/cli/src/index.ts | 15 +++-- apps/cli/src/publish-format.ts | 5 +- apps/cli/src/unified-diff-gen.ts | 9 +++ apps/cli/test/index.test.ts | 96 ++++++++++++++++++++++++++++++++ 4 files changed, 120 insertions(+), 5 deletions(-) diff --git a/apps/cli/src/index.ts b/apps/cli/src/index.ts index 5505d646..dc1ac9cf 100644 --- a/apps/cli/src/index.ts +++ b/apps/cli/src/index.ts @@ -314,10 +314,16 @@ async function runPublish(parsed: Parsed, client: ApiClient, mode: OutputMode) { // as verified unified diffs) against the base Revision; unchanged files inherit. // No cache (first publish elsewhere / fresh machine) => a full whole-blob publish. const cache = artifactId ? await loadManifestCache(artifactId) : null; - const plan = + const built = artifactId && cache ? await buildRevisePlan({ client, artifactId, cache, files: filesWithDigest, entrypoint: inferred.entrypoint }) : null; + // A no-op delta (working tree identical to the base: nothing changed, added, or + // deleted) cannot be sent as a partial manifest — the server requires a delta to + // carry at least one change. Fall back to a full whole-blob publish, which always + // produces a valid request and a fresh Revision (e.g. re-publishing an unchanged + // dir, or a metadata-only revise like --title). + const plan = built && built.publishFiles.length === 0 && built.deletedPaths.length === 0 ? null : built; const progress = createProgress(mode); const runOnce = (revise: RevisePlan | null) => @@ -398,8 +404,9 @@ async function makePublic(parsed: Parsed, client: ApiClient) { // Read one stored file's content for the owning member (ADR 0090). Default // output is cat-like: the raw text body to stdout, so `agent-paste pull -// > file` works. --json emits structured metadata; binary content is base64 in -// json and refused in plain (raw bytes would corrupt a terminal / piped text). +// > file` works. --json emits structured metadata (text body inline; binary and +// oversize files carry no body — fetch those via the content URL). Plain mode refuses +// a binary file (raw bytes would corrupt a terminal / piped text). async function pull(parsed: Parsed, client: ApiClient) { const artifactId = ArtifactId.parse(requiredArg(parsed, 0, "artifact-id")); const filePath = requiredArg(parsed, 1, "path"); @@ -420,7 +427,7 @@ async function pull(parsed: Parsed, client: ApiClient) { ); } if (file.is_binary) { - throw new Error(`${file.path} is binary; use --json (base64) or the content URL`); + throw new Error(`${file.path} is binary; use --json for metadata and fetch the bytes via the content URL`); } if (file.body === undefined) { throw new Error(`${file.path} is ${file.size_bytes} bytes, too large to inline; fetch via the content URL`); diff --git a/apps/cli/src/publish-format.ts b/apps/cli/src/publish-format.ts index f975b16d..31dba8cc 100644 --- a/apps/cli/src/publish-format.ts +++ b/apps/cli/src/publish-format.ts @@ -81,7 +81,10 @@ function assertClaimTokenNotInPublicUrls(result: PublishResultShape, claimUrl: s if (!claimToken || !claimUrl.includes("#")) { throw new Error("Claim URL must carry the token in the URL hash"); } - if (claimUrl.includes("?") && claimUrl.includes(claimToken)) { + // The token legitimately lives in the hash, so checking the whole URL for it always + // matches. Scope the leak check to the query string (the part between ? and #). + const query = claimUrl.split("#")[0]?.split("?")[1] ?? ""; + if (query.includes(claimToken)) { throw new Error("Claim Token must not appear in the URL query string"); } if ( diff --git a/apps/cli/src/unified-diff-gen.ts b/apps/cli/src/unified-diff-gen.ts index 1fa3bfc4..30cba794 100644 --- a/apps/cli/src/unified-diff-gen.ts +++ b/apps/cli/src/unified-diff-gen.ts @@ -73,6 +73,12 @@ function lcsOps(base: Line[], next: Line[]): Op[] { const CONTEXT_LINES = 3; +// The LCS table is (n+1)*(m+1) Int32 cells, so two very large text files would +// allocate gigabytes and hang/crash before the whole-blob fallback. Cap the table at +// ~8M cells (32 MB) and skip the diff above it — the patch is only a size optimization, +// so a skipped diff degrades to a correct whole-blob upload, never a failure. +const MAX_LCS_CELLS = 8_000_000; + // A "\ No newline" marker is emitted immediately after the last line of a side when // that line has no terminator. The applier reads it as "the preceding emitted line // carries no trailing newline". @@ -191,6 +197,9 @@ export async function diffWithSelfCheck(input: { } const base = splitLines(input.baseText); const next = splitLines(input.nextText); + if ((base.length + 1) * (next.length + 1) > MAX_LCS_CELLS) { + return null; // Too large to diff in bounded memory; upload the whole file instead. + } const diffText = buildDiff(base, next); const diffBytes = new TextEncoder().encode(diffText); if (diffBytes.byteLength >= input.nextBytes.byteLength) { diff --git a/apps/cli/test/index.test.ts b/apps/cli/test/index.test.ts index 5e2ead34..f0f4f8ea 100644 --- a/apps/cli/test/index.test.ts +++ b/apps/cli/test/index.test.ts @@ -679,6 +679,79 @@ describe("cli command dispatch", () => { } }); + it("revising an unchanged working tree falls back to a full publish, not an empty delta", async () => { + const { createHash } = await import("node:crypto"); + mockStdout(); + const root = await fs.mkdtemp(path.join(os.tmpdir(), "agent-paste-cli-")); + try { + const body = "

Hello

"; + await fs.writeFile(path.join(root, "index.html"), body); + // Cache matches the working tree exactly: nothing changed, added, or deleted, so + // the revise plan is a no-op delta the server would reject. The CLI must drop the + // base and send a full whole-blob manifest instead. + const manifests = path.join(configHome ?? "", "agent-paste", "manifests"); + await fs.mkdir(manifests, { recursive: true }); + const baseRevisionId = "rev_01HZY7Q8X9Y2S3T4V5W6X7Y8Z0"; + const sha256 = createHash("sha256").update(new TextEncoder().encode(body)).digest("hex"); + await fs.writeFile( + path.join(manifests, `${encodeURIComponent(artifactId)}.json`), + JSON.stringify({ + revision_id: baseRevisionId, + files: [{ path: "index.html", sha256, size_bytes: body.length }], + }), + ); + const create = vi.fn().mockResolvedValue({ + upload_session_id: uploadSessionId, + artifact_id: artifactId, + revision_id: revisionId, + status: "pending", + expires_at: "2026-01-01T00:00:00.000Z", + files: [ + { + status: "upload_required", + path: "index.html", + put_url: "https://upload.test/index", + required_headers: {}, + expires_at: "2026-01-01T00:00:00.000Z", + }, + ], + }); + const finalize = vi.fn().mockResolvedValue({ + upload_session_id: uploadSessionId, + artifact_id: artifactId, + revision_id: revisionId, + status: "draft", + title: "Published", + entrypoint: "index.html", + file_count: 1, + size_bytes: body.length, + }); + const publish = vi.fn().mockResolvedValue({ + artifact_id: artifactId, + revision_id: revisionId, + title: "Published", + private_url: "https://app.test/v/art_1", + revision_content_url: "https://content.test/v/token/index.html", + agent_view_url: "https://api.test/agent-view", + expires_at: "2026-02-01T00:00:00.000Z", + }); + const client = fakeClient({ + usagePolicy: vi.fn().mockResolvedValue(usagePolicy), + uploadSessions: { create, finalize }, + revisions: { publish }, + putFile: vi.fn().mockResolvedValue(undefined), + }); + + await main(["publish", root, "--artifact-id", artifactId], client); + + expect(create).toHaveBeenCalledTimes(1); + expect(create.mock.calls[0]?.[0]).not.toHaveProperty("base_revision_id"); + expect(create.mock.calls[0]?.[0]).toMatchObject({ files: [{ path: "index.html" }] }); + } finally { + await removePublishFixture(root); + } + }); + it("pull writes the file body to stdout, and --quiet does not suppress it", async () => { const body = "line one\nline two\n"; const readFile = vi.fn().mockResolvedValue({ @@ -717,6 +790,29 @@ describe("cli command dispatch", () => { await expect(main(["pull", artifactId, "logo.bin"], client)).rejects.toThrow(/binary/); }); + it("pull refuses a too-large-to-inline text file in plain mode but emits metadata in --json", async () => { + // The oversize branch returns text metadata with no body (is_binary false, body + // undefined). Plain mode must refuse it; --json must still emit metadata sans body. + const oversize = { + path: "huge.txt", + sha256: "d".repeat(64), + size_bytes: 20_000_000, + content_type: "text/plain", + is_binary: false, + }; + const client = fakeClient({ artifacts: { readFile: vi.fn().mockResolvedValue(oversize) } }); + + mockStdout(); + await expect(main(["pull", artifactId, "huge.txt"], client)).rejects.toThrow(/too large to inline/); + + const jsonStdout = mockStdout(); + await main(["pull", artifactId, "huge.txt", "--json"], client); + const printed = JSON.parse(stdoutValues(jsonStdout).join("")); + expect(printed).not.toHaveProperty("body"); + expect(printed).toMatchObject({ path: "huge.txt", is_binary: false, size_bytes: 20_000_000 }); + jsonStdout.mockRestore(); + }); + it("throws on unknown commands", async () => { await expect(main(["unknown"], fakeClient())).rejects.toThrow("Unknown command: unknown"); }); From 0bab9c4b93fa61af1df8583d6da99d32ffc4e196 Mon Sep 17 00:00:00 2001 From: Isaac Suttell Date: Sun, 14 Jun 2026 19:25:10 -0700 Subject: [PATCH 12/14] fix(api): infer is_binary from content type on the oversize file-content path An oversize file is returned as metadata without reading R2, so its bytes are never inspected. Previously is_binary defaulted to false, mislabeling an oversize binary as text. Now the flag is derived from the stored content type (non-text/* => binary), so a client keying on the flag does not try to inline binary bytes. body stays absent on this branch (CodeRabbit). Co-Authored-By: Claude Opus 4.8 --- .../src/routes/artifact-file-content.test.ts | 34 +++++++++++++++++++ apps/api/src/routes/artifact-file-content.ts | 9 +++-- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/apps/api/src/routes/artifact-file-content.test.ts b/apps/api/src/routes/artifact-file-content.test.ts index 302c22ce..ea98cf1f 100644 --- a/apps/api/src/routes/artifact-file-content.test.ts +++ b/apps/api/src/routes/artifact-file-content.test.ts @@ -124,6 +124,40 @@ describe("artifacts.fileContent route", () => { expect(json.body).toBeUndefined(); }); + it("flags oversize binary as is_binary from content type without reading R2", async () => { + let getCalled = false; + const env: Env = { + ...testArtifactBytesEncryptionEnv, + ARTIFACTS: { + async get() { + getCalled = true; + return null; + }, + async list() { + return { objects: [], truncated: false }; + }, + async delete() {}, + }, + }; + const sha = await sha256Hex("placeholder"); + const file = { + path: "huge.bin", + sha256: sha, + size_bytes: 11 * 1024 * 1024, + content_type: "application/octet-stream", + }; + + const response = await readArtifactFileContent(contextFor({ env }), apiPrincipal(), dbWithFile(file), { + artifactId: ARTIFACT_ID, + path: "huge.bin", + }); + + const json = await responseJson<{ is_binary: boolean; body?: string }>(response); + expect(getCalled).toBe(false); + expect(json.is_binary).toBe(true); + expect(json.body).toBeUndefined(); + }); + it("404s when the path is not in the artifact or the row has no sha256", async () => { const env: Env = { ...testArtifactBytesEncryptionEnv, ARTIFACTS: fakeR2() }; const missing = await readArtifactFileContent(contextFor({ env }), apiPrincipal(), dbWithFile(null), { diff --git a/apps/api/src/routes/artifact-file-content.ts b/apps/api/src/routes/artifact-file-content.ts index a88d3c79..7d437a27 100644 --- a/apps/api/src/routes/artifact-file-content.ts +++ b/apps/api/src/routes/artifact-file-content.ts @@ -46,15 +46,18 @@ export async function readArtifactFileContent( } // Oversize files are not inlined: return metadata only and skip the R2 read so a - // large file never forces a full decrypt into memory. body absent + is_binary - // false tells the agent "text, too big to inline — fetch via url / whole-blob". + // large file never forces a full decrypt into memory. body is absent either way; + // is_binary is inferred from the stored content type (we never read the bytes here), + // so an oversize binary is not mislabeled as text. Clients key on body===undefined to + // fetch via url / upload whole, so the flag is advisory on this branch. if (file.size_bytes > Mebibytes.ten) { + const isBinaryByType = typeof file.content_type === "string" ? !file.content_type.startsWith("text/") : true; return responders.respondJson({ path: file.path, sha256: file.sha256, size_bytes: file.size_bytes, content_type: file.content_type, - is_binary: false, + is_binary: isBinaryByType, }); } From 1ba7f04c84a10fa03502824dc9b4cdfe68866432 Mon Sep 17 00:00:00 2001 From: Isaac Suttell Date: Sun, 14 Jun 2026 19:25:10 -0700 Subject: [PATCH 13/14] fix(db): return committed revision counts on finalize fast path; document FK scope - The finalized-session fast path now reads the committed revision and reports its file_count/size_bytes, so a repeat finalize returns the merged-tree counts rather than the pre-merge session counts (CodeRabbit). - Documented that migration 0024's column-scoped ON DELETE SET NULL on parent_revision_id is authoritative and that Drizzle cannot express the column list, so the snapshot's unscoped form is expected drift, not a bug to "fix" toward the snapshot (CodeRabbit). Co-Authored-By: Claude Opus 4.8 --- packages/db/src/repository/upload-session-lifecycle.ts | 8 ++++++-- packages/db/src/schema.ts | 7 +++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/packages/db/src/repository/upload-session-lifecycle.ts b/packages/db/src/repository/upload-session-lifecycle.ts index fca2862d..dfed1547 100644 --- a/packages/db/src/repository/upload-session-lifecycle.ts +++ b/packages/db/src/repository/upload-session-lifecycle.ts @@ -438,14 +438,18 @@ export async function finalizeUploadSessionInEntities( repositoryError("upload_session_not_found"); } if (session.status === "finalized") { + // The session row carries only the uploaded DELTA counts; against a base Revision + // the committed tree (merged base + delta) differs. Read the committed Revision so a + // retried finalize returns the same file_count/size_bytes as the first one (ADR 0089). + const committed = await entities.revisions.findById(session.revision_id, session.workspace_id); return buildFinalizeResult({ uploadSessionId: session.id, artifactId: session.artifact_id, revisionId: session.revision_id, title: session.title, entrypoint: session.entrypoint, - fileCount: session.file_count, - sizeBytes: session.size_bytes, + fileCount: committed?.file_count ?? session.file_count, + sizeBytes: committed?.size_bytes ?? session.size_bytes, }); } if (session.status === "expired" || new Date(session.expires_at).getTime() <= new Date(input.now).getTime()) { diff --git a/packages/db/src/schema.ts b/packages/db/src/schema.ts index 55874026..946c1c0a 100644 --- a/packages/db/src/schema.ts +++ b/packages/db/src/schema.ts @@ -292,6 +292,13 @@ export const revisions = pgTable( check("revisions_bundle_status_check", sql`${table.bundleStatus} in ('pending', 'ready', 'failed', 'disabled')`), check("revisions_created_by_type_check", sql`${table.createdByType} in ('api_key', 'member')`), index("revisions_parent_idx").on(table.workspaceId, table.artifactId, table.parentRevisionId), + // Migration 0024 is authoritative for this constraint: it uses the PostgreSQL + // column-scoped `ON DELETE SET NULL (parent_revision_id)` so deleting a parent + // only nulls the (nullable) parent pointer, never workspace_id/artifact_id (both + // NOT NULL). Drizzle cannot express the column list, so this `.onDelete("set null")` + // is the closest ORM approximation; the snapshot it generates is drift-detection + // for schema.ts, not the DDL applied to the database. Do NOT "fix" the migration to + // match the snapshot's unscoped SET NULL — that would violate the NOT NULL columns. foreignKey({ name: "revisions_parent_fk", columns: [table.workspaceId, table.artifactId, table.parentRevisionId], From e5be3b6256395c2626a4db1763a900e9ed91218e Mon Sep 17 00:00:00 2001 From: Isaac Suttell Date: Sun, 14 Jun 2026 19:25:23 -0700 Subject: [PATCH 14/14] fix(storage,docs): preserve UTF-8 BOM in decodeUtf8Strict; tighten patch sha256 spec - decodeUtf8Strict keeps a leading UTF-8 BOM (ignoreBOM) so valid BOM-prefixed text round-trips and is not rejected as binary; fatal is passed explicitly for the Worker TS lib option type (CodeRabbit). - api.md: a patched per-file entry's size_bytes is the diff byte length and the entry carries no whole-file sha256; the sha256 rule now scopes to whole-file entries (CodeRabbit). - Test-only: cover the BOM round-trip + invalid-UTF-8 reject, read_file omits revision_id from the query when absent, and isolate the non-unified-patch contract test from sha256. Co-Authored-By: Claude Opus 4.8 --- apps/mcp/src/tools.test.ts | 22 ++++++++++++++++ docs/specs/api.md | 25 +++++++++++-------- packages/contracts/src/uploadSessions.test.ts | 3 ++- packages/storage/src/unified-diff.test.ts | 15 ++++++++++- packages/storage/src/unified-diff.ts | 14 +++++++---- 5 files changed, 61 insertions(+), 18 deletions(-) diff --git a/apps/mcp/src/tools.test.ts b/apps/mcp/src/tools.test.ts index cbf1e8e3..dde468a1 100644 --- a/apps/mcp/src/tools.test.ts +++ b/apps/mcp/src/tools.test.ts @@ -236,6 +236,28 @@ describe("callMcpTool", () => { expect(url.searchParams.get("revision_id")).toBe(revisionId); }); + it("read_file omits revision_id from the query when not provided", async () => { + const artifactId = "art_01HZY7Q8X9Y2S3T4V5W6X7Y8Z9"; + const fileContent = { + path: "index.md", + sha256: "a".repeat(64), + size_bytes: 6, + content_type: "text/markdown", + is_binary: false, + body: "hello\n", + }; + const api = apiMock(["read"], Response.json(fileContent)); + const result = await callMcpTool("read_file", { artifact_id: artifactId, path: "index.md" }, auth, { + api, + upload, + bearerToken: auth.bearerToken, + }); + expect(result).toEqual({ ok: true, result: fileContent }); + const url = new URL(routeCall(api, 0).url); + expect(url.searchParams.get("path")).toBe("index.md"); + expect(url.searchParams.has("revision_id")).toBe(false); + }); + it("publish_artifact returns the private viewer link (content-only, private)", async () => { vi.stubGlobal( "fetch", diff --git a/docs/specs/api.md b/docs/specs/api.md index 9107d3bb..ea216e6c 100644 --- a/docs/specs/api.md +++ b/docs/specs/api.md @@ -114,7 +114,6 @@ Authenticated `api` and `upload` routes enforce guards in a fixed order { "path": "big.txt", "size_bytes": 240, - "sha256": "", "patch": { "base_sha256": "", "format": "unified", @@ -144,22 +143,26 @@ Rules: - Max file size is `10 MB`. - Max total size is `25 MB`. - Max file count is `100`. -- `sha256` is optional for compatibility. New CLI/MCP clients send lowercase - hex SHA-256 for each file. Legacy clients that omit it keep the full-upload - revision-object path and do not participate in deduplication. +- `sha256` is optional for compatibility on whole-file entries. New CLI/MCP + clients send lowercase hex SHA-256 for each whole-file entry; legacy clients + that omit it keep the full-upload revision-object path and do not participate + in deduplication. A patched entry must NOT carry `sha256` (its uploaded bytes + are the diff, not the content-addressed file); the request is rejected if it + declares both. - `base_revision_id`, `deleted_paths`, and per-file `patch` are the optional commit-chain / partial-manifest inputs ([ADR 0089](../adr/0089-revision-commit-chain-tree-inheritance-and-server-reconstructed-delta.md)). When `base_revision_id` is set, `files` lists only changed and added paths, `deleted_paths` drops paths, and every other path inherits from the base Revision by reference. A per-file `patch` (`{ base_sha256, format: "unified", result_sha256 }`) means the bytes uploaded for that entry are a unified diff - rather than the whole file: `size_bytes`/`sha256` describe the diff, - `base_sha256` is the digest of that path in the base Revision the diff applies - to, and `result_sha256` is the digest of the whole reconstructed file the - server produces and verifies. Structural rules enforced at request validation: - `patch` and `deleted_paths` require `base_revision_id`; `deleted_paths` is - unique; a path cannot be both uploaded and deleted; `format` must be - `unified`. Stateful checks and the tree-inheritance merge run server-side at + rather than the whole file: `size_bytes` is the diff's byte length and the + entry carries no whole-file `sha256`, `base_sha256` is the digest of that path + in the base Revision the diff applies to, and `result_sha256` is the digest of + the whole reconstructed file the server produces and verifies. Structural rules + enforced at request validation: `patch` and `deleted_paths` require + `base_revision_id`; `deleted_paths` is unique; a path cannot be both uploaded + and deleted; a patched entry cannot also declare a whole-file `sha256`; + `format` must be `unified`. Stateful checks and the tree-inheritance merge run server-side at finalize. The base must be a `published` Revision in the same Workspace and Artifact (a cross-workspace base is reported as not found; a cross-artifact base is rejected before it could violate the parent foreign key). Only blob-backed diff --git a/packages/contracts/src/uploadSessions.test.ts b/packages/contracts/src/uploadSessions.test.ts index 1cd7027b..878af61d 100644 --- a/packages/contracts/src/uploadSessions.test.ts +++ b/packages/contracts/src/uploadSessions.test.ts @@ -106,6 +106,8 @@ describe("CreateUploadSessionRequest partial-manifest + patch", () => { }); it("rejects a non-unified patch format", () => { + // No whole-file sha256 here: a patched entry must omit it, so this isolates the + // format:"binary" rejection rather than tripping the sha256+patch mutual-exclusion. const result = CreateUploadSessionRequest.safeParse( baseRequest({ base_revision_id: baseRevisionId, @@ -113,7 +115,6 @@ describe("CreateUploadSessionRequest partial-manifest + patch", () => { { path: "big.bin", size_bytes: 30, - sha256: sha("c"), patch: { base_sha256: sha("d"), format: "binary", result_sha256: sha("e") }, }, ], diff --git a/packages/storage/src/unified-diff.test.ts b/packages/storage/src/unified-diff.test.ts index 4b535168..55ec687d 100644 --- a/packages/storage/src/unified-diff.test.ts +++ b/packages/storage/src/unified-diff.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "vitest"; -import { applyUnifiedDiff } from "./unified-diff.js"; +import { applyUnifiedDiff, decodeUtf8Strict } from "./unified-diff.js"; const enc = new TextEncoder(); @@ -23,6 +23,19 @@ async function applyText(base: string, diff: string, expectedResult: string) { }); } +describe("decodeUtf8Strict", () => { + it("preserves a leading UTF-8 BOM so valid BOM text is not rejected as binary", () => { + const bytes = new Uint8Array([0xef, 0xbb, 0xbf, ...enc.encode("hello\n")]); + const decoded = decodeUtf8Strict(bytes); + expect(decoded).not.toBeNull(); + expect(enc.encode(decoded as string)).toEqual(bytes); + }); + + it("returns null for invalid UTF-8", () => { + expect(decodeUtf8Strict(new Uint8Array([0xff, 0xfe, 0x00]))).toBeNull(); + }); +}); + describe("applyUnifiedDiff", () => { it("applies a single-hunk modification + append byte-exactly", async () => { const base = "line1\nline2\nline3\n"; diff --git a/packages/storage/src/unified-diff.ts b/packages/storage/src/unified-diff.ts index 900ea432..67c9d744 100644 --- a/packages/storage/src/unified-diff.ts +++ b/packages/storage/src/unified-diff.ts @@ -52,12 +52,16 @@ async function sha256Hex(bytes: Uint8Array): Promise { return toHex(new Uint8Array(await crypto.subtle.digest("SHA-256", asBufferSource(bytes)))); } -// Decode UTF-8, returning null on any invalid sequence. `TextDecoder({ fatal: true })` -// is the obvious tool but its option type is not in every Worker TS lib config, so we -// decode lossily then verify the decode round-trips to the same bytes — a replacement -// character inserted for an invalid sequence re-encodes to different bytes. +// Decode UTF-8, returning null on any invalid sequence. We decode lossily +// (`fatal: false`, the default) then verify the decode round-trips to the same +// bytes — a replacement character inserted for an invalid sequence re-encodes to +// different bytes, so a mismatch means the input was not valid UTF-8. +// `ignoreBOM: true` keeps a leading UTF-8 BOM in the output; without it TextDecoder +// strips the BOM and the re-encode would drop those 3 bytes, wrongly rejecting valid +// BOM-prefixed text as binary. `fatal` is passed explicitly because the Worker TS lib +// types it as required on TextDecoderConstructorOptions. export function decodeUtf8Strict(bytes: Uint8Array): string | null { - const text = new TextDecoder().decode(asBufferSource(bytes)); + const text = new TextDecoder("utf-8", { fatal: false, ignoreBOM: true }).decode(asBufferSource(bytes)); if (!bytesEqual(new TextEncoder().encode(text), bytes)) { return null; }