diff --git a/docs/rfcs/0004-storage-engine-v4/tracker.md b/docs/rfcs/0004-storage-engine-v4/tracker.md new file mode 100644 index 000000000..6dd46e97f --- /dev/null +++ b/docs/rfcs/0004-storage-engine-v4/tracker.md @@ -0,0 +1,137 @@ +# Storage Engine v4 — work tracker + +This file tracks the implementation of RFC v4 (canonical version at `/pebble-baton-sdk/rfc-v4.md` in the squire planner). Updated by the implementer as work proceeds. Items marked `[TODO]` are open; `[DONE]` is complete; `[DEFERRED]` is intentionally not in scope for v4. + +## Branch layout + +- **parent**: `pquerna/storage-v4-parent` — SDK option plumbing, magic-byte dispatch in `ReadHeader`, build tag setup. No behavior change. +- **stack 1**: `pquerna/storage-v4-stack1-protos-codegen` — storage protos + `cmd/protoc-gen-batonstore/` plugin + generated codecs + tag-gated engine skeleton. +- **stack 2**: `pquerna/storage-v4-stack2-envelope` — v3 envelope format + manifest proto + descriptor closure-test fixture. +- **stack 3**: `pquerna/storage-v4-stack3-pebble-engine` — full Pebble engine implementing `connectorstore.Reader`/`Writer`/`C1ZStore`. +- **stack 4**: `pquerna/storage-v4-stack4-compaction` — Pebble cross-engine compaction via `IngestAndExcise`. +- **stack 5**: `pquerna/storage-v4-stack5-equiv-bench` — cross-engine equivalence suite + hyper-scale fixtures + bench harness. + +Each child branch is rebased on the previous (linear stack), submitted as a stacked PR pointing at its parent. + +## Status + +| Stack | Status | Branch | LOC est | LOC actual | +|---|---|---|---|---| +| Parent | [IN PROGRESS] | `pquerna/storage-v4-parent` | ~150 | 0 | +| 1 (protos + codegen) | [TODO] | `pquerna/storage-v4-stack1-protos-codegen` | ~1500 | 0 | +| 2 (envelope) | [TODO] | `pquerna/storage-v4-stack2-envelope` | ~600 | 0 | +| 3 (engine) | [TODO] | `pquerna/storage-v4-stack3-pebble-engine` | ~5000 | 0 | +| 4 (compaction) | [TODO] | `pquerna/storage-v4-stack4-compaction` | ~800 | 0 | +| 5 (equiv + bench) | [TODO] | `pquerna/storage-v4-stack5-equiv-bench` | ~2500 | 0 | + +Total target: ~10,550 LOC of new code + generated artifacts. + +## Per-stack TODO + +### Parent + +- [TODO] Add `dotc1z.EngineV3` constant alongside `EngineSQLite`. +- [TODO] Add `WithEngine(name string)` option. +- [TODO] Add `C1Z3` magic header constant + `C1ZFormatV3`. +- [TODO] Update `ReadHeader` to recognize `C1Z3\x00`. +- [TODO] Build tag for the engine package: `//go:build batonsdkv2`. +- [TODO] `cmd/baton-buildtag-check/` — CI gate that builds a sample connector with default tags + greps for `cockroachdb/pebble` in the binary. + +### Stack 1 — protos + codegen + +- [TODO] `proto/c1/storage/v3/options.proto` — TableOption + IndexOption. +- [TODO] `proto/c1/storage/v3/records.proto` — all six record types + mirror types (GrantExpandableRecord, GrantSourceRecord, SyncType). +- [TODO] `proto/c1/storage/v3/refs.proto` — EntitlementRef, PrincipalRef, ResourceRef. +- [TODO] `buf.gen.yaml` entry for `protoc-gen-batonstore`. +- [TODO] `cmd/protoc-gen-batonstore/main.go` — `protogen.Run` shell. +- [TODO] `cmd/protoc-gen-batonstore/walker.go` — walks record-bearing messages, resolves field paths. +- [TODO] `cmd/protoc-gen-batonstore/codec_gen.go` — emits EncodeKey/Value, WriteIndexes/DeleteIndexes per record. +- [TODO] `cmd/protoc-gen-batonstore/register_gen.go` — emits register.gen.go. +- [TODO] `pkg/dotc1z/engine/pebble/codec/registry.go` — Codec interface, Lookup, Register. +- [TODO] `pkg/dotc1z/engine/pebble/codec/reflect.go` — `NewReflectCodec(MessageDescriptor)` + `*ReflectCodec`. +- [TODO] `pkg/dotc1z/engine/pebble/codec/tuple.go` — tuple-encoding helpers (string, bytes, int32, int64, bool). +- [TODO] `pkg/dotc1z/engine/pebble/codec/syncid.go` — `EncodeSyncID(string) ([]byte, error)` + `DecodeSyncID`. +- [TODO] `pkg/dotc1z/engine/pebble/codec/errors.go` — `ErrCodecTypeMismatch`. +- [TODO] `pkg/dotc1z/engine/pebble/gen/*.gen.go` — committed generated codecs (post-protogen). +- [TODO] `pkg/dotc1z/engine/pebble/engine_stub.go` — empty engine satisfying interface via panics, gated by `//go:build batonsdkv2`. +- [TODO] `Makefile` — `make protogen` runs `buf generate`; `make protogen/check` for CI drift. + +### Stack 2 — envelope + +- [TODO] `proto/c1/c1z/v3/manifest.proto` — `C1ZManifestV3`, `PayloadEncoding` enum, `RecordTypeInfo`, `SyncRunSummary`, `PebbleEngineConfig`. +- [TODO] `pkg/dotc1z/format/v3/manifest.go` — `ReadManifest`, `WriteManifest`, `BuildClosure(protoregistry.Files) *descriptorpb.FileDescriptorSet`. +- [TODO] `pkg/dotc1z/format/v3/envelope.go` — `WriteV3Envelope(w, manifest, payloadDir)`, `OpenV3Envelope(r) (*Manifest, payloadReader, error)`. +- [TODO] `pkg/dotc1z/format/v3/payload.go` — zstd-tar streaming. +- [TODO] `cmd/baton-descriptor-closure-test/main.go` — verifies its import graph excludes `c1/connector/v2`; opens fixture; round-trips a known grant via dynamicpb. +- [TODO] `pkg/dotc1z/format/v3/testdata/closure-fixture.c1z3` — committed. +- [TODO] `pkg/dotc1z/format/v3/envelope_test.go` — round-trip tests. + +### Stack 3 — Pebble engine + +- [TODO] `pkg/dotc1z/engine/pebble/engine.go` — `Engine` struct, lifecycle. +- [TODO] `pkg/dotc1z/engine/pebble/options.go` — `Preset` enum, `WithPreset`, `WithSharedCache`, `WithDurability`, `newEngineOptions`. +- [TODO] `pkg/dotc1z/engine/pebble/errors.go` — full sentinel set from Appendix E. +- [TODO] `pkg/dotc1z/engine/pebble/quiesce.go` — strict write-barrier + WaitGroup. +- [TODO] `pkg/dotc1z/engine/pebble/save.go` — `Save(ctx, dest)` using `db.Checkpoint`. +- [TODO] `pkg/dotc1z/engine/pebble/keys.go` — primary-key + index-key encoding for all six record types. +- [TODO] `pkg/dotc1z/engine/pebble/grants.go` — `PutGrants`, `PutGrantsIfNewer`, `DeleteGrant`, `ListGrants`, `ListGrantsForEntitlement`, `ListGrantsForPrincipal`, `ListGrantsForResourceType`, `ListWithAnnotationsForResourcePage`, fresh-sync fast path + MultiGet incremental path. +- [TODO] `pkg/dotc1z/engine/pebble/resources.go` — Resources, ResourceTypes. +- [TODO] `pkg/dotc1z/engine/pebble/entitlements.go`. +- [TODO] `pkg/dotc1z/engine/pebble/assets.go`. +- [TODO] `pkg/dotc1z/engine/pebble/sync_runs.go`. +- [TODO] `pkg/dotc1z/engine/pebble/sessions.go` — value-separation-aware. +- [TODO] `pkg/dotc1z/engine/pebble/stats.go` — append-only counter sidecar. +- [TODO] `pkg/dotc1z/engine/pebble/index_lifecycle.go` — online index build state machine. +- [TODO] `pkg/dotc1z/engine/pebble/event_listener.go` — Pebble EventListener wiring. +- [TODO] `pkg/dotc1z/engine/pebble/translate_v2.go` — `v2.Grant ↔ v3.GrantRecord` translation, stub-hydration on read. +- [TODO] `pkg/dotc1z/engine/pebble/engine_test.go` — basic open/write/read/save. + +### Stack 4 — compaction + +- [TODO] `pkg/synccompactor/pebble/compactor.go` — cross-file diff using `IngestAndExcise`. +- [TODO] `pkg/synccompactor/pebble/sst_writer.go` — emit sorted SSTs from merge stream. +- [TODO] `pkg/synccompactor/pebble/compactor_test.go` — small-scale roundtrip. + +### Stack 5 — equivalence + benchmarks + fixtures + +- [TODO] `pkg/dotc1z/engine/equivalence/runner.go` — runs the same workload through SQLite + Pebble engines and asserts byte-equivalent output for every reader method. +- [TODO] `cmd/baton-fixture-gen/main.go` — generates synthetic hyper-scale fixtures (1M users × 100M grants; global enterprise with cycles). +- [TODO] `cmd/baton-storage-bench/main.go` — benchmark harness targeting G1–G10 from RFC §6. +- [TODO] `pkg/dotc1z/engine/pebble/microtests/` — port the 5 micro-tests from `/tmp/baton-rfc-microtests/`. + +## Deferred (not in v4 scope) + +- [DEFERRED] Stack 6 — deferred grant expansion (Appendix H research spike; separate RFC). +- [DEFERRED] Stack 7 — C1 integration (`/data/squire/src/c1` repo; separate PR series). +- [DEFERRED] `cmd/baton-c1z migrate` — out-of-band v1→v3 migration tool (NG10). +- [DEFERRED] Vector / full-text / geospatial search. +- [DEFERRED] Columnar / Parquet projection. + +## Open questions to resolve before merge + +(From RFC v4 §8; PR 3 benchmarks settle most.) + +1. [TODO] Composite vs flatter form for `GrantsByEntitlement` — Stack 5 bench. +2. [TODO] Whether `GrantsByPrincipal` is a covering index — Stack 5 bench. +3. [TODO] Block cache hit rate on realistic workloads — Stack 5 bench. +4. [TODO] Whether to retain `WithMinCheckpointInterval` knob in v3 — decided by C1 ops post-rollout. +5. [TODO] Compression default at L6 (`zstd1` vs `zstd3`) — Stack 5 bench. +6. [TODO] When to bump pinned Pebble format — policy decision. + +## Per-stack OODA loop status + +Each stack iterates: research → plan → build → review → loop. + +| Stack | Research | Plan | Build | Review | Status | +|---|---|---|---|---|---| +| Parent | DONE | DONE | IN PROGRESS | TODO | building | +| 1 | TODO | TODO | TODO | TODO | not started | +| 2 | TODO | TODO | TODO | TODO | not started | +| 3 | TODO | TODO | TODO | TODO | not started | +| 4 | DONE (IngestAndExcise micro-test) | TODO | TODO | TODO | not started | +| 5 | DONE (micro-tests prove pattern) | TODO | TODO | TODO | not started | + +## Notes + +- Material risks already tested ahead of time via micro-tests at `/tmp/baton-rfc-microtests/`: outer-compression ratio, Checkpoint roundtrip, tuple encoding prefix-free, codegen vs reflection perf, IngestAndExcise atomicity. Tests will move into `pkg/dotc1z/engine/pebble/microtests/` as part of Stack 3. +- The compression-bench WIP work (RFC 0003) is stashed on `main` as `rfc-0003-compression-bench-wip`; unstash to recover. diff --git a/pkg/dotc1z/c1file.go b/pkg/dotc1z/c1file.go index 38a1f685b..3bf508544 100644 --- a/pkg/dotc1z/c1file.go +++ b/pkg/dotc1z/c1file.go @@ -73,6 +73,11 @@ type C1File struct { // See WithC1FV2GrantsWriter. v2GrantsWriter bool + + // engine is the storage engine to use for newly created files. + // Reads dispatch on magic byte regardless of this value. Default + // is EngineSQLite (v1 .c1z format). + engine Engine } // *C1File satisfies connectorstore.Writer (the connector-facing contract), @@ -134,6 +139,22 @@ func WithC1FSyncCountLimit(limit int) C1FOption { } } +// WithC1FEngine selects the storage engine for new .c1z files. The +// default is EngineSQLite, which keeps the legacy v1 file format and +// behavior. EnginePebble selects the v3 engine introduced by the +// storage-engine-v4 RFC; under default build tags the Pebble engine +// is not linked in and an attempt to use it returns +// ErrEngineNotAvailable. +// +// Engine selection only affects newly created files. Existing files +// dispatch on their magic byte; readers handle both v1 and v3 +// regardless of this option. +func WithC1FEngine(engine Engine) C1FOption { + return func(o *C1File) { + o.engine = engine + } +} + // WithC1FV2GrantsWriter strips Grant.Entitlement and Grant.Principal // from the serialized data blob at write time. Readers rebuild them // as identity-only stubs (Id + nested Resource.Id) from the grants @@ -190,6 +211,12 @@ func NewC1File(ctx context.Context, dbFilePath string, opts ...C1FOption) (*C1Fi opt(c1File) } + // Normalize the engine zero value so downstream switch/if-eq + // checks treat an unset engine as EngineSQLite. + if c1File.engine == "" { + c1File.engine = EngineSQLite + } + err = c1File.validateDb(ctx) if err != nil { return nil, err @@ -212,6 +239,10 @@ type c1zOptions struct { syncLimit int skipCleanup bool v2GrantsWriter bool + + // engine is the storage engine to use for newly created files. + // Reads dispatch on magic byte regardless. Default EngineSQLite. + engine Engine } type C1ZOption func(*c1zOptions) @@ -268,6 +299,19 @@ func WithSyncLimit(limit int) C1ZOption { } } +// WithEngine selects the storage engine for newly created .c1z files. +// Default is EngineSQLite (v1 format). EnginePebble enables the v3 +// engine; under default build tags it returns ErrEngineNotAvailable +// when the file is opened. +// +// Reading existing files dispatches on the file's magic byte and is +// independent of this option. +func WithEngine(engine Engine) C1ZOption { + return func(o *c1zOptions) { + o.engine = engine + } +} + // WithV2GrantsWriter toggles the slim-blob writer path for grants. // See WithC1FV2GrantsWriter for details. func WithV2GrantsWriter(enabled bool) C1ZOption { @@ -322,6 +366,9 @@ func NewC1ZFile(ctx context.Context, outputFilePath string, opts ...C1ZOption) ( if options.v2GrantsWriter { c1fopts = append(c1fopts, WithC1FV2GrantsWriter(true)) } + if options.engine != "" { + c1fopts = append(c1fopts, WithC1FEngine(options.engine)) + } c1File, err := NewC1File(ctx, dbFilePath, c1fopts...) if err != nil { diff --git a/pkg/dotc1z/c1file_test.go b/pkg/dotc1z/c1file_test.go index c52278083..c481511ad 100644 --- a/pkg/dotc1z/c1file_test.go +++ b/pkg/dotc1z/c1file_test.go @@ -685,3 +685,34 @@ func TestC1ZCachedViewSyncRunInvalidation(t *testing.T) { err = f.Close(ctx) require.NoError(t, err) } + +// TestWithEngineThreadsToC1File proves that WithEngine on NewC1ZFile +// actually reaches C1File.engine. Regression coverage for PR #867 +// review (btipling): the option was previously dropped during +// c1zOptions → c1fopts translation. +func TestWithEngineThreadsToC1File(t *testing.T) { + dir := t.TempDir() + f, err := NewC1ZFile(context.Background(), filepath.Join(dir, "test.c1z"), + WithEngine(EnginePebble)) + if err != nil { + t.Fatalf("NewC1ZFile: %v", err) + } + defer f.Close(context.Background()) + if f.engine != EnginePebble { + t.Errorf("engine = %q, want %q (option lost in c1zOptions→c1fopts translation)", f.engine, EnginePebble) + } +} + +// TestEngineDefaultsToSQLite proves that omitting WithEngine yields +// EngineSQLite (the documented default), not the empty zero value. +func TestEngineDefaultsToSQLite(t *testing.T) { + dir := t.TempDir() + f, err := NewC1ZFile(context.Background(), filepath.Join(dir, "default.c1z")) + if err != nil { + t.Fatalf("NewC1ZFile: %v", err) + } + defer f.Close(context.Background()) + if f.engine != EngineSQLite { + t.Errorf("engine = %q, want %q", f.engine, EngineSQLite) + } +} diff --git a/pkg/dotc1z/format.go b/pkg/dotc1z/format.go new file mode 100644 index 000000000..3b8b5a6d6 --- /dev/null +++ b/pkg/dotc1z/format.go @@ -0,0 +1,102 @@ +package dotc1z + +import ( + "bytes" + "fmt" + "io" +) + +// C1ZFormat identifies the on-disk format of a .c1z file. The format byte +// is the first 5 bytes of the file; see ReadHeaderFormat. +type C1ZFormat int + +const ( + // C1ZFormatUnknown is the zero value. Returned when the header bytes + // match neither the v1 nor the v3 magic, or when the read failed. + C1ZFormatUnknown C1ZFormat = iota + + // C1ZFormatV1 is the original .c1z format: 5-byte magic "C1ZF\x00" + // followed by a zstd-compressed SQLite database. + C1ZFormatV1 + + // C1ZFormatV3 is the v3 format introduced by the storage-engine-v4 + // RFC: 5-byte magic "C1Z3\x00", a length-prefixed proto manifest, + // and a zstd-tar payload of a Pebble Checkpoint directory. v3 is + // only opened when a v3-aware engine is linked in; under default + // build tags (no batonsdkv2), opening a v3 file returns + // ErrEngineNotAvailable. + C1ZFormatV3 +) + +// String returns a stable human-readable name for the format. +func (f C1ZFormat) String() string { + switch f { + case C1ZFormatV1: + return "v1" + case C1ZFormatV3: + return "v3" + default: + return "unknown" + } +} + +// C1Z3FileHeader is the magic byte sequence for v3 files. +var C1Z3FileHeader = []byte("C1Z3\x00") + +// Engine identifies a storage engine implementation. The engine is +// chosen by callers via WithEngine(...) on write; on read, the engine +// is dictated by the file's magic byte and (for v3) the manifest's +// engine field. +type Engine string + +const ( + // EngineSQLite is the default engine: the v1 .c1z format backed by + // a zstd-compressed SQLite database. Connectors use this; backend + // infra can opt out. + EngineSQLite Engine = "sqlite" + + // EnginePebble is the v3 engine: a Pebble LSM wrapped in the v3 + // envelope. Only available when the batonsdkv2 build tag is set; + // otherwise WithEngine(EnginePebble) returns ErrEngineNotAvailable + // at engine-construction time. + EnginePebble Engine = "pebble" +) + +// ErrEngineNotAvailable is returned when a caller requests an engine +// that the binary does not have linked in. The Pebble engine lives +// behind //go:build batonsdkv2 — default-tag connector binaries do +// not link it. Calling WithEngine(EnginePebble) from a default-build +// binary surfaces this error at the engine-construction call site. +var ErrEngineNotAvailable = fmt.Errorf("dotc1z: engine not available (build-tag gated)") + +// ReadHeaderFormat reads the first 5 bytes of reader and returns the +// detected format. On return, the reader is positioned immediately +// after the header bytes. If reader is also an io.Seeker, it is +// rewound to offset 0 before reading. +// +// Returns: +// - C1ZFormatV1, nil — file starts with "C1ZF\x00". +// - C1ZFormatV3, nil — file starts with "C1Z3\x00". +// - C1ZFormatUnknown, ErrInvalidFile — header matched no known magic. +// - C1ZFormatUnknown, err — underlying read error. +func ReadHeaderFormat(reader io.Reader) (C1ZFormat, error) { + if rs, ok := reader.(io.Seeker); ok { + if _, err := rs.Seek(0, io.SeekStart); err != nil { + return C1ZFormatUnknown, err + } + } + + headerBytes := make([]byte, len(C1ZFileHeader)) + if _, err := io.ReadFull(reader, headerBytes); err != nil { + return C1ZFormatUnknown, err + } + + switch { + case bytes.Equal(headerBytes, C1ZFileHeader): + return C1ZFormatV1, nil + case bytes.Equal(headerBytes, C1Z3FileHeader): + return C1ZFormatV3, nil + default: + return C1ZFormatUnknown, ErrInvalidFile + } +} diff --git a/pkg/dotc1z/format_test.go b/pkg/dotc1z/format_test.go new file mode 100644 index 000000000..73b5abda8 --- /dev/null +++ b/pkg/dotc1z/format_test.go @@ -0,0 +1,113 @@ +package dotc1z + +import ( + "bytes" + "errors" + "io" + "strings" + "testing" +) + +func TestReadHeaderFormat(t *testing.T) { + tests := []struct { + name string + input []byte + want C1ZFormat + wantErr error + }{ + { + name: "v1 magic", + input: append([]byte("C1ZF\x00"), 0xde, 0xad, 0xbe, 0xef), + want: C1ZFormatV1, + }, + { + name: "v3 magic", + input: append([]byte("C1Z3\x00"), 0xca, 0xfe, 0xba, 0xbe), + want: C1ZFormatV3, + }, + { + name: "unknown magic", + input: []byte("XXXX\x00garbage"), + want: C1ZFormatUnknown, + wantErr: ErrInvalidFile, + }, + { + name: "short read", + input: []byte("C1Z"), + want: C1ZFormatUnknown, + wantErr: io.ErrUnexpectedEOF, + }, + { + name: "empty input", + input: []byte{}, + want: C1ZFormatUnknown, + wantErr: io.EOF, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got, err := ReadHeaderFormat(bytes.NewReader(tc.input)) + if tc.wantErr != nil && !errors.Is(err, tc.wantErr) { + t.Fatalf("err: got %v, want %v", err, tc.wantErr) + } + if tc.wantErr == nil && err != nil { + t.Fatalf("unexpected err: %v", err) + } + if got != tc.want { + t.Errorf("format: got %v, want %v", got, tc.want) + } + }) + } +} + +func TestReadHeaderFormat_SeekerRewinds(t *testing.T) { + // A seekable reader positioned past the header should be rewound + // to 0 before the format is read. + r := bytes.NewReader(append([]byte("C1Z3\x00"), 'x', 'y', 'z')) + if _, err := r.Seek(7, io.SeekStart); err != nil { + t.Fatal(err) + } + got, err := ReadHeaderFormat(r) + if err != nil { + t.Fatal(err) + } + if got != C1ZFormatV3 { + t.Fatalf("got %v want v3", got) + } +} + +func TestFormatString(t *testing.T) { + for _, tc := range []struct { + f C1ZFormat + want string + }{ + {C1ZFormatV1, "v1"}, + {C1ZFormatV3, "v3"}, + {C1ZFormatUnknown, "unknown"}, + } { + if got := tc.f.String(); got != tc.want { + t.Errorf("String(%d): got %q want %q", tc.f, got, tc.want) + } + } +} + +func TestEngineConstants(t *testing.T) { + // Smoke-check the engine constants are non-empty and distinct. + if EngineSQLite == "" || EnginePebble == "" { + t.Fatal("engine constants must be non-empty") + } + if EngineSQLite == EnginePebble { + t.Fatal("engine constants must be distinct") + } +} + +func TestErrEngineNotAvailable(t *testing.T) { + // Ensure the sentinel is well-formed and discoverable via errors.Is. + if ErrEngineNotAvailable == nil { + t.Fatal("ErrEngineNotAvailable must be set") + } + wrapped := errors.New("upstream: " + ErrEngineNotAvailable.Error()) + if !strings.Contains(wrapped.Error(), "engine not available") { + t.Fatalf("error message changed unexpectedly: %v", wrapped) + } +}