From 947afad2b8ffdcc0d6b0980a68dda8d30fbcfdc5 Mon Sep 17 00:00:00 2001 From: Rafael Richards Date: Sun, 14 Jun 2026 22:30:35 -0400 Subject: [PATCH 1/3] feat(arch): add G2 forbidden-symbol gate (no VistA below the waterline) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `m arch check` now runs G2 alongside G1: an m-layer repo's .m code must not reference a VistA-only symbol (FileMan/Kernel/KIDS). A v-layer repo passes trivially. Phase B item 2 of the VSL/MSL effort (m/v waterline ADR §3.2 G2). Deny-list: ^DIC/^DIE/^DIK/^DIQ, ^DD(, ^DPT(, ^VA(, ^XUS*, ^XPD*. - Comment-aware: scans only codePortion(line) (before the first ';' not inside a "..." string), so a symbol named in a comment (e.g. an STDMOCK doc example) is not a reference. - RE2 has no lookahead, so the FileMan-API pattern uses a trailing-delimiter guard `(?:[^A-Za-z0-9]|$)` to avoid matching ^DIETST as ^DIE. - Extracted a shared forEachMLine walk used by CheckMRefs (G1) and the new CheckVistaSymbols (G2). Verified: cleaned m-stdlib G2-clean; m-stdlib master flags exactly STDSEED.m:218 (do FILE^DIE); all 5 m-layer repos clean. arch pkg 88.2% cover; golangci-lint + gofmt + self `arch check` clean. Owed: G3 (transport-monopoly), G4 (seam-pin). Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/memory/MEMORY.md | 1 + docs/memory/arch-g2-forbidden-symbol.md | 43 +++++++++ internal/arch/arch.go | 119 +++++++++++++++++++----- internal/arch/arch_test.go | 110 ++++++++++++++++++++++ main.go | 23 ++--- 5 files changed, 263 insertions(+), 33 deletions(-) create mode 100644 docs/memory/arch-g2-forbidden-symbol.md diff --git a/docs/memory/MEMORY.md b/docs/memory/MEMORY.md index 1c33714..442d89b 100644 --- a/docs/memory/MEMORY.md +++ b/docs/memory/MEMORY.md @@ -1,3 +1,4 @@ # m-cli memory index - [chset byte mode](chset-byte-mode.md) — `--chset m|utf-8` on test/coverage/watch; m-stdlib byte suites need `m` on YDB +- [arch G2 forbidden-symbol](arch-g2-forbidden-symbol.md) — `m arch check` gained **G2** (no VistA symbols below the waterline): comment-aware deny-list scan (`^DIC/DIE/DIK/DIQ`, `^DD(`, `^DPT(`, `^VA(`, `^XUS*`, `^XPD*`) of m-layer `.m` code; RE2 trailing-guard (no lookahead) avoids `^DIETST`. Shared `forEachMLine` walk. Verified all 5 m-repos clean. G3/G4 still owed. diff --git a/docs/memory/arch-g2-forbidden-symbol.md b/docs/memory/arch-g2-forbidden-symbol.md new file mode 100644 index 0000000..4c69f94 --- /dev/null +++ b/docs/memory/arch-g2-forbidden-symbol.md @@ -0,0 +1,43 @@ +--- +name: arch-g2-forbidden-symbol +description: m arch check gained G2 (no VistA symbols below the waterline) — comment-aware deny-list scan of m-layer .m source +metadata: + type: project +--- + +`m arch check` (internal/arch) now runs **G2 — forbidden-symbol** alongside G1 +(dependency-direction). G2 asserts an `m`-layer repo's `.m` **code** references +no VistA-only symbol; a `v`-layer repo passes trivially (VistA is allowed above +the waterline). Branch `phase-b-arch-gates` (off m-cli main); Phase B item 2 of +the VSL effort. + +**Deny-list (`vistaSymbols` in arch.go):** `^DIC/^DIE/^DIK/^DIQ` (FileMan API), +`^DD(`, `^DPT(`, `^VA(`, `^XUS*` (Kernel security), `^XPD*` (KIDS). + +**Two non-obvious design points:** +- **Comment-aware.** A naive grep false-positives on STDMOCK doc lines like + `; doc: @example do register^STDMOCK("EN^DIE",...)`. G2 scans only + `codePortion(line)` — everything before the first `;` that is not inside a + `"..."` string (the `"` toggle handles doubled-quote escapes). Comment + mentions are not references. +- **Trailing-delimiter guard, not lookahead.** Go's RE2 has **no lookahead**, so + to stop `^DIE` matching the test routine `^DIETST`, the FileMan-API pattern is + `\^DI[CEKQ](?:[^A-Za-z0-9]|$)` — the symbol must be followed by a non-alnum or + end-of-line. + +**Implementation:** extracted a shared `forEachMLine(root, fn)` walk (skips +dist/vendor/.git/node_modules) used by both `CheckMRefs` (G1, scans the full +line) and the new `CheckVistaSymbols` (G2, scans `codePortion`). G1 is left +comment-UNAWARE deliberately (unchanged, shipped) — so **`^VSL*` named in any +m-layer `.m` comment still trips G1**; keep VSL names out of m-stdlib comments. + +**Verified end-to-end:** cleaned m-stdlib (`stdseed-engine-neutral-g2`) → G2 +clean; m-stdlib `master` (still has `do FILE^DIE`) → G2 flags exactly +`src/STDSEED.m:218`; all 5 m-layer repos (m-cli/m-stdlib/m-driver-sdk/m-ydb/m-iris) +G2-clean. arch pkg 88.2% cover; golangci-lint + gofmt clean. + +**Still owed in Phase B:** G3 (transport-monopoly — only m-driver-sdk runs a +driver / builds the envelope), G4 (seam-pin — tagged SDK, no `replace`), the +root-`repo.meta.json` schema validation (item 1), the scheduled meta-gate, the +reusable `m-ci.yml`, and pinning `m-cli-ref` to a tag. See the org docs-repo +`docs/vsl-msl/vsl-implementation-tracker.md` Phase B row. diff --git a/internal/arch/arch.go b/internal/arch/arch.go index 33f6181..7a0a4c1 100644 --- a/internal/arch/arch.go +++ b/internal/arch/arch.go @@ -2,12 +2,21 @@ // boundary between the engine-neutral `m` layer and the VistA-specific `v` // layer (see docs/background/m-v-waterline-adr.md in the org `docs` repo). // -// This stage ships G1 — dependency-direction — the core invariant: dependency -// flows one way, v → m, never the reverse. A repo declares its layer in a -// committed meta artifact ("layer": "m"|"v"); the gate then asserts that an -// `m`-layer repo's Go dependency closure contains no `vista-cloud-dev/v-*` -// module, and that its M source references no `VSL*` (v-layer) routine. A -// `v`-layer repo passes G1 trivially (v → m is allowed). +// It ships two gates: +// +// - G1 — dependency-direction — the core invariant: dependency flows one way, +// v → m, never the reverse. An `m`-layer repo's Go dependency closure must +// contain no `vista-cloud-dev/v-*` module, and its M source must reference +// no `VSL*` (v-layer) routine. +// - G2 — forbidden-symbol (no VistA below the waterline): an `m`-layer `.m` +// file's code must not reference a VistA-only symbol (FileMan/Kernel/KIDS: +// ^DIC/^DIE/^DIK/^DIQ, ^DD(, ^DPT(, ^VA(, ^XUS*, ^XPD*). The scan is +// comment-aware — a symbol named only in a ';' comment (e.g. an STDMOCK doc +// example) is not a reference. +// +// A repo declares its layer in a committed meta artifact ("layer": "m"|"v"); a +// `v`-layer repo passes both gates trivially (v → m, and VistA above the line, +// are allowed). package arch import ( @@ -41,6 +50,22 @@ const vModulePrefix = "github.com/vista-cloud-dev/v-" // form — ^VSLCFG, $$tag^VSLCFG, do x^VSLCFG — since all contain "^VSL". var vRoutineRef = regexp.MustCompile(`\^VSL[A-Z0-9]*`) +// vistaSymbols is the G2 deny-list: VistA-only symbols (FileMan/Kernel/KIDS) +// that must not appear in m-layer code. The FileMan-API patterns carry a +// trailing-delimiter guard `(?:[^A-Za-z0-9]|$)` so a longer routine name such +// as ^DIETST is not mistaken for ^DIE — Go's RE2 has no lookahead. +var vistaSymbols = []struct { + name string + re *regexp.Regexp +}{ + {"^DIC/^DIE/^DIK/^DIQ (FileMan API)", regexp.MustCompile(`\^DI[CEKQ](?:[^A-Za-z0-9]|$)`)}, + {"^DD( (FileMan data dictionary)", regexp.MustCompile(`\^DD\(`)}, + {"^DPT( (patient file)", regexp.MustCompile(`\^DPT\(`)}, + {"^VA( (institution file)", regexp.MustCompile(`\^VA\(`)}, + {"^XUS* (Kernel security)", regexp.MustCompile(`\^XUS[A-Za-z0-9]*`)}, + {"^XPD* (KIDS)", regexp.MustCompile(`\^XPD[A-Za-z0-9]*`)}, +} + // Violation is one G1 finding — a dependency that crosses the waterline the // wrong way (m → v). type Violation struct { @@ -156,12 +181,11 @@ func goListModules(root string) ([]string, error) { return parseGoListDeps(out.Bytes()) } -// CheckMRefs scans the .m source under root for references to v-layer (VSL*) -// routines — the M-side m → v G1 violation. Generated/vendored trees are -// skipped (dist, vendor, .git, node_modules). -func CheckMRefs(root string) ([]Violation, error) { - var vs []Violation - err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { +// forEachMLine walks the .m source under root and calls fn for every line. +// Generated/vendored trees are skipped (dist, vendor, .git, node_modules). +// rel is the path relative to root; lineNo is 1-based. +func forEachMLine(root string, fn func(rel string, lineNo int, line string)) error { + return filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } @@ -184,20 +208,65 @@ func CheckMRefs(root string) ([]Violation, error) { rel = path } for i, line := range strings.Split(string(body), "\n") { - if m := vRoutineRef.FindString(line); m != "" { + fn(rel, i+1, line) + } + return nil + }) +} + +// codePortion returns the executable part of an M line — everything before the +// first ';' that is not inside a double-quoted string. M comments begin with +// ';'; a ';' inside a "..." literal (including a doubled-quote escape) is data, +// not a comment. +func codePortion(line string) string { + inStr := false + for i := 0; i < len(line); i++ { + switch line[i] { + case '"': + inStr = !inStr + case ';': + if !inStr { + return line[:i] + } + } + } + return line +} + +// CheckMRefs scans the .m source under root for references to v-layer (VSL*) +// routines — the M-side m → v G1 violation. +func CheckMRefs(root string) ([]Violation, error) { + var vs []Violation + err := forEachMLine(root, func(rel string, lineNo int, line string) { + if m := vRoutineRef.FindString(line); m != "" { + vs = append(vs, Violation{ + Gate: "G1", Kind: "m-ref", + Source: fmt.Sprintf("%s:%d", rel, lineNo), + Detail: fmt.Sprintf("m-layer routine references v-layer routine %s", m), + }) + } + }) + return vs, err +} + +// CheckVistaSymbols scans the code portion of the .m source under root for +// VistA-only symbols (the G2 violation — no VistA below the waterline). +// Comment text is ignored via codePortion. +func CheckVistaSymbols(root string) ([]Violation, error) { + var vs []Violation + err := forEachMLine(root, func(rel string, lineNo int, line string) { + code := codePortion(line) + for _, sym := range vistaSymbols { + if sym.re.MatchString(code) { vs = append(vs, Violation{ - Gate: "G1", Kind: "m-ref", - Source: fmt.Sprintf("%s:%d", rel, i+1), - Detail: fmt.Sprintf("m-layer routine references v-layer routine %s", m), + Gate: "G2", Kind: "vista-symbol", + Source: fmt.Sprintf("%s:%d", rel, lineNo), + Detail: fmt.Sprintf("m-layer source references VistA-only symbol %s", sym.name), }) } } - return nil }) - if err != nil { - return nil, err - } - return vs, nil + return vs, err } // Check resolves the repo layer and runs the applicable G1 checks. A v-layer @@ -221,12 +290,18 @@ func Check(root, override string) (Report, error) { rep.CheckedGo = true rep.Violations = append(rep.Violations, vViolations(mods)...) } - // M-side dependency-direction (STD* → VSL*). + // M-side dependency-direction (G1: STD* → VSL*). mvs, err := CheckMRefs(root) if err != nil { return rep, err } rep.CheckedM = true rep.Violations = append(rep.Violations, mvs...) + // M-side forbidden-symbol (G2: no VistA below the waterline). + sym, err := CheckVistaSymbols(root) + if err != nil { + return rep, err + } + rep.Violations = append(rep.Violations, sym...) return rep, nil } diff --git a/internal/arch/arch_test.go b/internal/arch/arch_test.go index ff6a9a9..87eb5b7 100644 --- a/internal/arch/arch_test.go +++ b/internal/arch/arch_test.go @@ -228,6 +228,116 @@ func TestCheckMLayerGoArmClean(t *testing.T) { } } +// --- G2: codePortion (comment-awareness) ------------------------------------ + +func TestCodePortion(t *testing.T) { + cases := map[string]string{ + "\tdo FILE^DIE(x) ; call the filer": "\tdo FILE^DIE(x) ", + "\t; do FILE^DIE(x)": "\t", + "\tset x=\"a;b\" ; tail": "\tset x=\"a;b\" ", + // A ';' inside a (doubled-quote) string is not a comment. + "\tset x=\"q\"\" ; in string\"": "\tset x=\"q\"\" ; in string\"", + "\tquit": "\tquit", + } + for in, want := range cases { + if got := codePortion(in); got != want { + t.Errorf("codePortion(%q) = %q, want %q", in, got, want) + } + } +} + +// --- G2: CheckVistaSymbols (no VistA below the waterline) -------------------- + +func TestCheckVistaSymbolsFlagsCodeRef(t *testing.T) { + dir := t.TempDir() + writeFile(t, filepath.Join(dir, "src", "STDX.m"), + "STDX ;\nfiler() ;\n do FILE^DIE(\"\",a,b)\n quit\n") + vs, err := CheckVistaSymbols(dir) + if err != nil { + t.Fatalf("CheckVistaSymbols: %v", err) + } + if len(vs) != 1 { + t.Fatalf("expected 1 G2 violation, got %d: %v", len(vs), vs) + } + if vs[0].Gate != "G2" || vs[0].Kind != "vista-symbol" { + t.Errorf("unexpected violation: %+v", vs[0]) + } +} + +func TestCheckVistaSymbolsIgnoresComment(t *testing.T) { + dir := t.TempDir() + // STDMOCK's doc examples name "EN^DIE" as a mock target — comment only. + writeFile(t, filepath.Join(dir, "src", "STDMOCK.m"), + "STDMOCK ;\n ; doc: @example do register^STDMOCK(\"EN^DIE\",\"stub\")\n quit\n") + vs, err := CheckVistaSymbols(dir) + if err != nil { + t.Fatalf("CheckVistaSymbols: %v", err) + } + if len(vs) != 0 { + t.Errorf("comment mentions must not be flagged, got %v", vs) + } +} + +func TestCheckVistaSymbolsTrailingGuard(t *testing.T) { + dir := t.TempDir() + // ^DIETST is a test routine name, not FileMan ^DIE — must not match. + writeFile(t, filepath.Join(dir, "src", "STDX.m"), + "STDX ;\n do stub^DIETST\n quit\n") + vs, err := CheckVistaSymbols(dir) + if err != nil { + t.Fatalf("CheckVistaSymbols: %v", err) + } + if len(vs) != 0 { + t.Errorf("^DIETST must not match ^DIE, got %v", vs) + } +} + +func TestCheckVistaSymbolsGlobals(t *testing.T) { + dir := t.TempDir() + writeFile(t, filepath.Join(dir, "src", "STDX.m"), + "STDX ;\n set a=^DPT(1,0)\n set b=$get(^DD(2))\n set c=^VA(200,0)\n set d=^XUSEC(\"K\",1)\n quit\n") + vs, err := CheckVistaSymbols(dir) + if err != nil { + t.Fatalf("CheckVistaSymbols: %v", err) + } + if len(vs) != 4 { + t.Errorf("expected 4 violations (DPT/DD/VA/XUSEC), got %d: %v", len(vs), vs) + } +} + +func TestCheckMLayerFlagsVistaSymbol(t *testing.T) { + dir := t.TempDir() + writeFile(t, filepath.Join(dir, "dist", "repo.meta.json"), `{"layer":"m"}`) + writeFile(t, filepath.Join(dir, "src", "STDX.m"), " do FILE^DIE(\"\")\n") + rep, err := Check(dir, "") + if err != nil { + t.Fatalf("Check: %v", err) + } + var g2 int + for _, v := range rep.Violations { + if v.Gate == "G2" { + g2++ + } + } + if g2 != 1 { + t.Errorf("expected 1 G2 violation in report, got %d: %v", g2, rep.Violations) + } +} + +func TestCheckVLayerSkipsVistaSymbols(t *testing.T) { + dir := t.TempDir() + writeFile(t, filepath.Join(dir, "dist", "v-contract.json"), `{"layer":"v"}`) + // VistA symbols are expected above the waterline — v-layer passes. + writeFile(t, filepath.Join(dir, "src", "VSLX.m"), " do FILE^DIE(\"\")\n") + rep, err := Check(dir, "") + if err != nil { + t.Fatalf("Check: %v", err) + } + if len(rep.Violations) != 0 { + t.Errorf("v-layer must pass G2, got %v", rep.Violations) + } +} + // --- helpers ----------------------------------------------------------------- func writeFile(t *testing.T, path, body string) { diff --git a/main.go b/main.go index bd0f1a4..2b98691 100644 --- a/main.go +++ b/main.go @@ -1024,16 +1024,17 @@ func (lspCmd) Run(_ *clikit.Context) error { return nil } -// --- arch (the m/v waterline gate) ------------------------------------------- +// --- arch (the m/v waterline gates) ------------------------------------------ // -// G1 — dependency-direction: dependency flows one way, v → m, never the -// reverse (docs/background/m-v-waterline-adr.md). The repo declares its layer -// in a committed meta artifact; `m arch check` asserts an m-layer repo's Go -// dependency closure carries no vista-cloud-dev/v-* module and its M source -// references no VSL* (v-layer) routine. A v-layer repo passes trivially. +// The m/v waterline (docs/background/m-v-waterline-adr.md). The repo declares +// its layer in a committed meta artifact; `m arch check` runs, for an m-layer +// repo: G1 dependency-direction (Go closure carries no vista-cloud-dev/v-* +// module; M source references no VSL* routine) and G2 forbidden-symbol (M code +// references no VistA-only symbol — FileMan/Kernel/KIDS). A v-layer repo passes +// trivially (v → m, and VistA above the line, are allowed). type archCmd struct { - Check archCheckCmd `cmd:"" help:"Run the G1 dependency-direction gate for this repo."` + Check archCheckCmd `cmd:"" help:"Run the m/v waterline gates (G1 dependency-direction, G2 forbidden-symbol) for this repo."` } type archCheckCmd struct { @@ -1066,7 +1067,7 @@ func (c *archCheckCmd) Run(cc *clikit.Context) error { } cc.KV( [2]string{"layer", cc.Accent(string(rep.Layer))}, - [2]string{"gate", "G1 dependency-direction"}, + [2]string{"gates", "G1 dependency-direction, G2 forbidden-symbol"}, [2]string{"checked", strings.Join(checks, ", ")}, [2]string{"violations", fmt.Sprintf("%d", len(rep.Violations))}, ) @@ -1075,7 +1076,7 @@ func (c *archCheckCmd) Run(cc *clikit.Context) error { cc.Severity("error"), cc.Accent(v.Gate), v.Source, cc.Faint(v.Detail)) } if len(rep.Violations) == 0 { - fmt.Fprintln(cc.Stdout, cc.Success("waterline clean — no m → v dependency")) + fmt.Fprintln(cc.Stdout, cc.Success("waterline clean")) } }); err != nil { return err @@ -1083,8 +1084,8 @@ func (c *archCheckCmd) Run(cc *clikit.Context) error { if len(rep.Violations) > 0 { return clikit.Fail(clikit.ExitCheck, "WATERLINE_VIOLATION", - fmt.Sprintf("%d m → v dependency violation(s)", len(rep.Violations)), - "the m layer must not depend on the v layer (v → m only)") + fmt.Sprintf("%d waterline violation(s)", len(rep.Violations)), + "the m layer must not depend on the v layer (G1) or reference VistA symbols (G2)") } return nil } From a71e26e476198dccaf821c686d622a5569e02a49 Mon Sep 17 00:00:00 2001 From: Rafael Richards Date: Sun, 14 Jun 2026 22:47:53 -0400 Subject: [PATCH 2/3] feat(arch): add G3 transport-monopoly + G4 seam-pin gates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completes the m/v waterline gate suite in `m arch check` (Phase B item 2, ADR §3.2). Check now runs G1/G2 for the m layer and G3/G4 for every repo. - G3 transport-monopoly (CheckDriverMonopoly): flags a non-SDK repo that *execs* a driver binary. The driver literal ("m-ydb"/"m-iris") must co-occur with exec.Command on the same code line — so the gate's own deny-list var and test fixtures (which name the binaries but never exec them) don't trip it; the gate is self-hosting. goCodePortion strips Go // comments. SDK exempt; a driver may exec itself. - G4 seam-pin (CheckSeamPin): text-parses go.mod (no x/mod dep), flags a `replace` to m-driver-sdk or a pseudo-version (untagged) require. - Restructured Check + added goModulePath; Report gains CheckedG3/CheckedG4; command output lists all four gates. Verified: all 8 ecosystem repos clean under G1–G4 (no false-positives); planted exec/replace/pseudo-version fixtures red. arch 86.7% cover; golangci-lint + gofmt + self `arch check` clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/memory/MEMORY.md | 2 +- docs/memory/arch-g2-forbidden-symbol.md | 36 +++- internal/arch/arch.go | 246 +++++++++++++++++++++--- internal/arch/arch_test.go | 158 +++++++++++++++ main.go | 22 ++- 5 files changed, 421 insertions(+), 43 deletions(-) diff --git a/docs/memory/MEMORY.md b/docs/memory/MEMORY.md index 442d89b..6c45713 100644 --- a/docs/memory/MEMORY.md +++ b/docs/memory/MEMORY.md @@ -1,4 +1,4 @@ # m-cli memory index - [chset byte mode](chset-byte-mode.md) — `--chset m|utf-8` on test/coverage/watch; m-stdlib byte suites need `m` on YDB -- [arch G2 forbidden-symbol](arch-g2-forbidden-symbol.md) — `m arch check` gained **G2** (no VistA symbols below the waterline): comment-aware deny-list scan (`^DIC/DIE/DIK/DIQ`, `^DD(`, `^DPT(`, `^VA(`, `^XUS*`, `^XPD*`) of m-layer `.m` code; RE2 trailing-guard (no lookahead) avoids `^DIETST`. Shared `forEachMLine` walk. Verified all 5 m-repos clean. G3/G4 still owed. +- [arch waterline gates G2/G3/G4](arch-g2-forbidden-symbol.md) — `m arch check` gained the full suite: **G2** (no VistA symbols below the line; comment-aware deny-list, RE2 trailing-guard avoids `^DIETST`), **G3** (transport-monopoly — flags a non-SDK repo *exec*-ing `"m-ydb"/"m-iris"`; co-occurrence with `exec.Command` makes it self-hosting), **G4** (seam-pin — go.mod text-parse flags `replace`/pseudo-version of m-driver-sdk). G1/G2 = m-layer; G3/G4 = all repos. All 8 repos clean. Item-1 meta-schema + meta-gate + m-ci.yml + tag/pin still owed. diff --git a/docs/memory/arch-g2-forbidden-symbol.md b/docs/memory/arch-g2-forbidden-symbol.md index 4c69f94..8067b85 100644 --- a/docs/memory/arch-g2-forbidden-symbol.md +++ b/docs/memory/arch-g2-forbidden-symbol.md @@ -1,6 +1,6 @@ --- name: arch-g2-forbidden-symbol -description: m arch check gained G2 (no VistA symbols below the waterline) — comment-aware deny-list scan of m-layer .m source +description: m arch check gained the full waterline gate suite G2/G3/G4 (forbidden-symbol, transport-monopoly, seam-pin) on top of G1 metadata: type: project --- @@ -36,8 +36,32 @@ clean; m-stdlib `master` (still has `do FILE^DIE`) → G2 flags exactly `src/STDSEED.m:218`; all 5 m-layer repos (m-cli/m-stdlib/m-driver-sdk/m-ydb/m-iris) G2-clean. arch pkg 88.2% cover; golangci-lint + gofmt clean. -**Still owed in Phase B:** G3 (transport-monopoly — only m-driver-sdk runs a -driver / builds the envelope), G4 (seam-pin — tagged SDK, no `replace`), the -root-`repo.meta.json` schema validation (item 1), the scheduled meta-gate, the -reusable `m-ci.yml`, and pinning `m-cli-ref` to a tag. See the org docs-repo -`docs/vsl-msl/vsl-implementation-tracker.md` Phase B row. +## G3 + G4 (added same branch, 2026-06-14) + +`Check` was restructured: **G1/G2 run for the m layer only; G3/G4 run for every +repo** (a `v` consumer also must not hand-roll transport / must seam-pin). Layer +resolution feeds the new `goModulePath(root)`. + +- **G3 — transport-monopoly** (`CheckDriverMonopoly`): flags a non-SDK repo that + **execs** a driver binary. **Key subtlety:** a bare driver-literal scan + false-positives on the gate's OWN `driverBinaries` deny-list var and on test + fixtures — so G3 requires the driver literal (`"m-ydb"`/`"m-iris"`) to + **co-occur with `exec.Command` on the same code line** (ADR §3.2 wording). + That makes the gate self-hosting: m-cli passes its own G3 even though arch.go + names both binaries. `goCodePortion` strips Go `//` comments (string-aware, + honors `\` escapes). The SDK is exempt (Check skips G3 when the module path is + `m-driver-sdk`); a driver may exec itself (selfName exemption). +- **G4 — seam-pin** (`CheckSeamPin`): text-parses `go.mod` (no `x/mod` dep — kept + the graph minimal). Flags a `replace` to m-driver-sdk (`seam-replace`) or a + pseudo-version require (`seam-untagged`, matched by `\d{14}-[0-9a-f]{12}`). A + repo not requiring the SDK passes trivially. Current state: all SDK consumers + pin a tag (m-ydb v0.2.0, rest v0.3.0), no `replace` → all clean. + +**Verified:** all 8 ecosystem repos G1–G4 clean (no false-positives); planted +exec + pseudo-version + replace fixtures red (unit tests). arch 86.7% cover, +golangci-lint + gofmt clean, m-cli self-`arch check` clean. + +**Still owed in Phase B:** the root-`repo.meta.json` schema validation (item 1) ++ migrate m-stdlib/v-stdlib off `dist/` (the only two not on root meta), the +scheduled meta-gate, the reusable `m-ci.yml`, and pinning `m-cli-ref` to a tag. +See the org docs-repo `docs/vsl-msl/vsl-implementation-tracker.md` Phase B row. diff --git a/internal/arch/arch.go b/internal/arch/arch.go index 7a0a4c1..6f0f9ba 100644 --- a/internal/arch/arch.go +++ b/internal/arch/arch.go @@ -2,7 +2,7 @@ // boundary between the engine-neutral `m` layer and the VistA-specific `v` // layer (see docs/background/m-v-waterline-adr.md in the org `docs` repo). // -// It ships two gates: +// It ships four gates: // // - G1 — dependency-direction — the core invariant: dependency flows one way, // v → m, never the reverse. An `m`-layer repo's Go dependency closure must @@ -10,13 +10,20 @@ // no `VSL*` (v-layer) routine. // - G2 — forbidden-symbol (no VistA below the waterline): an `m`-layer `.m` // file's code must not reference a VistA-only symbol (FileMan/Kernel/KIDS: -// ^DIC/^DIE/^DIK/^DIQ, ^DD(, ^DPT(, ^VA(, ^XUS*, ^XPD*). The scan is -// comment-aware — a symbol named only in a ';' comment (e.g. an STDMOCK doc -// example) is not a reference. +// ^DIC/^DIE/^DIK/^DIQ, ^DD(, ^DPT(, ^VA(, ^XUS*, ^XPD*). Comment-aware — a +// symbol named only in a ';' comment (e.g. an STDMOCK doc example) is not a +// reference. +// - G3 — transport-monopoly: only m-driver-sdk may run a driver binary / build +// the engine envelope. Any other repo's Go code naming a driver binary +// ("m-ydb"/"m-iris") other than its own is hand-rolling transport — reach the +// engine through mdriver.Client instead. +// - G4 — seam-pin: a repo requiring m-driver-sdk must pin a tagged release in +// go.mod — no `replace` to it, no pseudo-version (untagged commit). // -// A repo declares its layer in a committed meta artifact ("layer": "m"|"v"); a -// `v`-layer repo passes both gates trivially (v → m, and VistA above the line, -// are allowed). +// G1 and G2 apply to the m layer; G3 and G4 are layer-agnostic (a v consumer +// also must not hand-roll transport and must seam-pin). A repo declares its +// layer in a committed meta artifact ("layer": "m"|"v"); a `v`-layer repo passes +// G1/G2 trivially. package arch import ( @@ -46,6 +53,21 @@ const ( // shares (v-pkg, v-cli, v-stdlib, …). An m-layer closure must not contain it. const vModulePrefix = "github.com/vista-cloud-dev/v-" +// sdkModule is the one module allowed to run a driver binary / build the engine +// envelope — the transport monopoly (G3). Every other repo reaches the engine +// through its reference Client (mdriver.Client). +const sdkModule = "github.com/vista-cloud-dev/m-driver-sdk" + +// driverBinaries are the engine-driver binary names. Outside the SDK, only the +// repo that *is* a given driver may name it; any other repo naming one is +// hand-rolling transport (G3). +var driverBinaries = []string{"m-ydb", "m-iris"} + +// sdkPseudoVersion matches a Go pseudo-version — an untagged commit pin: a +// 14-digit UTC timestamp + 12-hex commit hash. A tagged require (vX.Y.Z) does +// not match. Used by G4 (seam-pin). +var sdkPseudoVersion = regexp.MustCompile(`\d{14}-[0-9a-f]{12}`) + // vRoutineRef matches a reference to a v-layer (VSL*) M routine in any call // form — ^VSLCFG, $$tag^VSLCFG, do x^VSLCFG — since all contain "^VSL". var vRoutineRef = regexp.MustCompile(`\^VSL[A-Z0-9]*`) @@ -75,11 +97,13 @@ type Violation struct { Detail string `json:"detail"` // human-readable explanation } -// Report is the full G1 result for one repo. +// Report is the full waterline-gate result for one repo. type Report struct { Layer Layer `json:"layer"` - CheckedGo bool `json:"checkedGo"` - CheckedM bool `json:"checkedM"` + CheckedGo bool `json:"checkedGo"` // G1 Go dependency closure + CheckedM bool `json:"checkedM"` // G1 m-ref + G2 forbidden-symbol + CheckedG3 bool `json:"checkedG3"` // G3 transport-monopoly (driver refs) + CheckedG4 bool `json:"checkedG4"` // G4 seam-pin (go.mod) Violations []Violation `json:"violations"` } @@ -233,6 +257,45 @@ func codePortion(line string) string { return line } +// goCodePortion returns the code part of a Go line — everything before a "//" +// line comment that is not inside a "..." string. Backslash escapes inside a +// string are honored. (Driver-binary literals are double-quoted, so backtick +// raw strings and block comments need no special handling here.) +func goCodePortion(line string) string { + inStr := false + for i := 0; i < len(line); i++ { + switch line[i] { + case '\\': + if inStr { + i++ // skip the escaped character + } + case '"': + inStr = !inStr + case '/': + if !inStr && i+1 < len(line) && line[i+1] == '/' { + return line[:i] + } + } + } + return line +} + +// goModulePath reads the module path from root/go.mod. ok is false when there +// is no go.mod (e.g. a pure-M repo). +func goModulePath(root string) (path string, ok bool) { + body, err := os.ReadFile(filepath.Join(root, "go.mod")) + if err != nil { + return "", false + } + for _, line := range strings.Split(string(body), "\n") { + f := strings.Fields(line) + if len(f) >= 2 && f[0] == "module" { + return f[1], true + } + } + return "", false +} + // CheckMRefs scans the .m source under root for references to v-layer (VSL*) // routines — the M-side m → v G1 violation. func CheckMRefs(root string) ([]Violation, error) { @@ -269,6 +332,108 @@ func CheckVistaSymbols(root string) ([]Violation, error) { return vs, err } +// CheckDriverMonopoly scans the Go source under root for an exec of a driver +// binary other than the repo's own (selfName) — the G3 transport-monopoly +// violation (ADR §3.2: no `exec.Command(…, "m-ydb"/"m-iris", …)` outside the +// SDK). The driver literal must co-occur with an exec.Command/CommandContext +// call on the same code line, so the gate's own deny-list and string fixtures +// (which name the binaries but never exec them) do not trip it. Only +// m-driver-sdk may run a driver / build the envelope; every other consumer +// reaches the engine through mdriver.Client (engine name "ydb"/"iris", never the +// binary). Comment text is ignored (goCodePortion); generated/vendored trees are +// skipped. The SDK is exempt and is not scanned (the caller skips it). +func CheckDriverMonopoly(root, selfName string) ([]Violation, error) { + var vs []Violation + err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + switch d.Name() { + case ".git", "dist", "vendor", "node_modules": + return filepath.SkipDir + } + return nil + } + if strings.ToLower(filepath.Ext(path)) != ".go" { + return nil + } + body, err := os.ReadFile(path) + if err != nil { + return err + } + rel, relErr := filepath.Rel(root, path) + if relErr != nil { + rel = path + } + for i, line := range strings.Split(string(body), "\n") { + code := goCodePortion(line) + if !strings.Contains(code, "exec.Command") { + continue + } + for _, bin := range driverBinaries { + if bin == selfName { + continue // a driver may run itself + } + if strings.Contains(code, `"`+bin+`"`) { + vs = append(vs, Violation{ + Gate: "G3", Kind: "driver-ref", + Source: fmt.Sprintf("%s:%d", rel, i+1), + Detail: fmt.Sprintf("non-SDK repo execs driver binary %q — reach the engine via mdriver.Client", bin), + }) + } + } + } + return nil + }) + return vs, err +} + +// CheckSeamPin inspects root/go.mod for the seam-pin invariant (G4): a repo +// that requires m-driver-sdk must pin a *tagged* release — no `replace` +// directive to it and no pseudo-version (untagged commit) require. A repo with +// no go.mod, or one not depending on the SDK, passes trivially. +func CheckSeamPin(root string) ([]Violation, error) { + body, err := os.ReadFile(filepath.Join(root, "go.mod")) + if err != nil { + return nil, nil + } + var vs []Violation + inReplace := false + for _, line := range strings.Split(string(body), "\n") { + t := strings.TrimSpace(line) + switch { + case strings.HasPrefix(t, "replace ("): + inReplace = true + continue + case inReplace && t == ")": + inReplace = false + continue + } + if !strings.Contains(t, sdkModule) { + continue + } + // A replace directive to the SDK (single-line or inside a replace block). + if (inReplace || strings.HasPrefix(t, "replace ")) && strings.Contains(t, "=>") { + vs = append(vs, Violation{ + Gate: "G4", Kind: "seam-replace", + Source: "go.mod", + Detail: "m-driver-sdk pinned via a replace directive — require a tagged release instead", + }) + continue + } + // Otherwise a require of the SDK — flag an untagged (pseudo-version) pin. + if sdkPseudoVersion.MatchString(t) { + vs = append(vs, Violation{ + Gate: "G4", Kind: "seam-untagged", + Source: "go.mod", + Detail: "m-driver-sdk pinned to a pseudo-version (untagged commit) — pin a tagged release", + }) + } + } + return vs, nil +} + // Check resolves the repo layer and runs the applicable G1 checks. A v-layer // repo passes trivially (v → m is allowed); an m-layer repo is checked on both // the Go dependency closure (when a go.mod is present) and its M source. @@ -278,30 +443,57 @@ func Check(root, override string) (Report, error) { return Report{}, err } rep := Report{Layer: layer} - if layer == LayerV { - return rep, nil - } - // Go dependency-direction (only when the repo is a Go module). - if _, statErr := os.Stat(filepath.Join(root, "go.mod")); statErr == nil { - mods, err := goListModules(root) + selfMod, hasMod := goModulePath(root) + + // G1 + G2 apply to the m layer only (v → m, and VistA above the line, are + // allowed). + if layer == LayerM { + // G1 Go dependency-direction (only when the repo is a Go module). + if hasMod { + mods, err := goListModules(root) + if err != nil { + return rep, err + } + rep.CheckedGo = true + rep.Violations = append(rep.Violations, vViolations(mods)...) + } + // G1 M-side dependency-direction (STD* → VSL*). + mvs, err := CheckMRefs(root) if err != nil { return rep, err } - rep.CheckedGo = true - rep.Violations = append(rep.Violations, vViolations(mods)...) + rep.CheckedM = true + rep.Violations = append(rep.Violations, mvs...) + // G2 forbidden-symbol (no VistA below the waterline). + sym, err := CheckVistaSymbols(root) + if err != nil { + return rep, err + } + rep.Violations = append(rep.Violations, sym...) } - // M-side dependency-direction (G1: STD* → VSL*). - mvs, err := CheckMRefs(root) - if err != nil { - return rep, err + + // G3 transport-monopoly applies to every repo except the SDK itself, which + // owns the transport and legitimately names every driver binary. + if selfMod != sdkModule { + selfName := "" + if hasMod { + selfName = selfMod[strings.LastIndex(selfMod, "/")+1:] + } + g3, err := CheckDriverMonopoly(root, selfName) + if err != nil { + return rep, err + } + rep.CheckedG3 = true + rep.Violations = append(rep.Violations, g3...) } - rep.CheckedM = true - rep.Violations = append(rep.Violations, mvs...) - // M-side forbidden-symbol (G2: no VistA below the waterline). - sym, err := CheckVistaSymbols(root) + + // G4 seam-pin applies to every repo (trivial for one not requiring the SDK). + g4, err := CheckSeamPin(root) if err != nil { return rep, err } - rep.Violations = append(rep.Violations, sym...) + rep.CheckedG4 = true + rep.Violations = append(rep.Violations, g4...) + return rep, nil } diff --git a/internal/arch/arch_test.go b/internal/arch/arch_test.go index 87eb5b7..c146778 100644 --- a/internal/arch/arch_test.go +++ b/internal/arch/arch_test.go @@ -338,6 +338,164 @@ func TestCheckVLayerSkipsVistaSymbols(t *testing.T) { } } +// --- G3: CheckDriverMonopoly (transport monopoly) --------------------------- + +func TestCheckDriverMonopolyFlagsForeignDriver(t *testing.T) { + dir := t.TempDir() + // A consumer must not name a driver binary — it reaches the engine via + // mdriver.Client (engine name "ydb"/"iris", never the binary). + writeFile(t, filepath.Join(dir, "internal", "x.go"), + "package x\nimport \"os/exec\"\nfunc r() { _ = exec.Command(\"m-ydb\", \"meta\") }\n") + vs, err := CheckDriverMonopoly(dir, "m-cli") + if err != nil { + t.Fatalf("CheckDriverMonopoly: %v", err) + } + if len(vs) != 1 || vs[0].Gate != "G3" || vs[0].Kind != "driver-ref" { + t.Fatalf("expected 1 G3 driver-ref, got %v", vs) + } +} + +func TestCheckDriverMonopolyAllowsNameWithoutExec(t *testing.T) { + dir := t.TempDir() + // Naming a driver binary without exec'ing it is fine — this is what makes + // the gate self-hosting (its own deny-list var names both binaries). + writeFile(t, filepath.Join(dir, "x.go"), + "package x\nvar driverBinaries = []string{\"m-ydb\", \"m-iris\"}\n") + vs, err := CheckDriverMonopoly(dir, "m-cli") + if err != nil { + t.Fatalf("CheckDriverMonopoly: %v", err) + } + if len(vs) != 0 { + t.Errorf("naming a driver without exec is allowed, got %v", vs) + } +} + +func TestCheckDriverMonopolyAllowsSelfExec(t *testing.T) { + dir := t.TempDir() + // The m-ydb driver may exec itself. + writeFile(t, filepath.Join(dir, "main.go"), + "package main\nimport \"os/exec\"\nfunc r() { _ = exec.Command(\"m-ydb\") }\n") + vs, err := CheckDriverMonopoly(dir, "m-ydb") + if err != nil { + t.Fatalf("CheckDriverMonopoly: %v", err) + } + if len(vs) != 0 { + t.Errorf("a driver exec'ing itself is allowed, got %v", vs) + } +} + +func TestCheckDriverMonopolyIgnoresComment(t *testing.T) { + dir := t.TempDir() + // Even an exec.Command named in a comment is not a real exec. + writeFile(t, filepath.Join(dir, "x.go"), + "package x\n// once did exec.Command(\"m-iris\", ...)\nvar y = 1\n") + vs, err := CheckDriverMonopoly(dir, "m-cli") + if err != nil { + t.Fatalf("CheckDriverMonopoly: %v", err) + } + if len(vs) != 0 { + t.Errorf("a driver exec named only in a comment is not a reference, got %v", vs) + } +} + +// --- G4: CheckSeamPin (seam pin) -------------------------------------------- + +func TestCheckSeamPinFlagsReplace(t *testing.T) { + dir := t.TempDir() + writeFile(t, filepath.Join(dir, "go.mod"), + "module example.com/c\n\ngo 1.26\n\nrequire github.com/vista-cloud-dev/m-driver-sdk v0.3.0\n\nreplace github.com/vista-cloud-dev/m-driver-sdk => ../m-driver-sdk\n") + vs, err := CheckSeamPin(dir) + if err != nil { + t.Fatalf("CheckSeamPin: %v", err) + } + if len(vs) != 1 || vs[0].Gate != "G4" || vs[0].Kind != "seam-replace" { + t.Fatalf("expected 1 G4 seam-replace, got %v", vs) + } +} + +func TestCheckSeamPinFlagsPseudoVersion(t *testing.T) { + dir := t.TempDir() + writeFile(t, filepath.Join(dir, "go.mod"), + "module example.com/c\n\ngo 1.26\n\nrequire github.com/vista-cloud-dev/m-driver-sdk v0.0.0-20260101000000-abcdef123456\n") + vs, err := CheckSeamPin(dir) + if err != nil { + t.Fatalf("CheckSeamPin: %v", err) + } + if len(vs) != 1 || vs[0].Gate != "G4" || vs[0].Kind != "seam-untagged" { + t.Fatalf("expected 1 G4 seam-untagged, got %v", vs) + } +} + +func TestCheckSeamPinCleanTagInBlock(t *testing.T) { + dir := t.TempDir() + // A tagged require inside a require ( ... ) block, no replace — clean. + writeFile(t, filepath.Join(dir, "go.mod"), + "module example.com/c\n\ngo 1.26\n\nrequire (\n\tgithub.com/alecthomas/kong v1.0.0\n\tgithub.com/vista-cloud-dev/m-driver-sdk v0.3.0\n)\n") + vs, err := CheckSeamPin(dir) + if err != nil { + t.Fatalf("CheckSeamPin: %v", err) + } + if len(vs) != 0 { + t.Errorf("a tagged require with no replace is clean, got %v", vs) + } +} + +func TestCheckSeamPinNoSdkDep(t *testing.T) { + dir := t.TempDir() + writeFile(t, filepath.Join(dir, "go.mod"), "module example.com/c\n\ngo 1.26\n") + vs, err := CheckSeamPin(dir) + if err != nil { + t.Fatalf("CheckSeamPin: %v", err) + } + if len(vs) != 0 { + t.Errorf("a repo not depending on the SDK passes G4, got %v", vs) + } +} + +// --- Check integration: G3/G4 are layer-agnostic ---------------------------- + +func TestCheckVLayerRunsSeamPin(t *testing.T) { + dir := t.TempDir() + writeFile(t, filepath.Join(dir, "dist", "v-contract.json"), `{"layer":"v"}`) + writeFile(t, filepath.Join(dir, "go.mod"), + "module example.com/v\n\ngo 1.26\n\nrequire github.com/vista-cloud-dev/m-driver-sdk v0.3.0\n\nreplace github.com/vista-cloud-dev/m-driver-sdk => ../x\n") + rep, err := Check(dir, "") + if err != nil { + t.Fatalf("Check: %v", err) + } + var g4 int + for _, v := range rep.Violations { + if v.Gate == "G4" { + g4++ + } + } + if g4 != 1 { + t.Errorf("a v-layer repo must still run G4 seam-pin, got %v", rep.Violations) + } +} + +func TestCheckSdkExemptFromG3(t *testing.T) { + if _, err := exec.LookPath("go"); err != nil { + t.Skip("go toolchain not on PATH") + } + dir := t.TempDir() + writeFile(t, filepath.Join(dir, "go.mod"), + "module github.com/vista-cloud-dev/m-driver-sdk\n\ngo 1.26\n") + writeFile(t, filepath.Join(dir, "dist", "repo.meta.json"), `{"layer":"m"}`) + // The SDK legitimately execs every driver binary. + writeFile(t, filepath.Join(dir, "client.go"), + "package mdriver\n\nimport \"os/exec\"\n\nvar _ = exec.Command(\"m-ydb\")\n") + rep, err := Check(dir, "") + if err != nil { + t.Fatalf("Check: %v", err) + } + for _, v := range rep.Violations { + if v.Gate == "G3" { + t.Errorf("m-driver-sdk is exempt from G3, got %v", v) + } + } +} + // --- helpers ----------------------------------------------------------------- func writeFile(t *testing.T, path, body string) { diff --git a/main.go b/main.go index 2b98691..488fdde 100644 --- a/main.go +++ b/main.go @@ -1027,14 +1027,15 @@ func (lspCmd) Run(_ *clikit.Context) error { // --- arch (the m/v waterline gates) ------------------------------------------ // // The m/v waterline (docs/background/m-v-waterline-adr.md). The repo declares -// its layer in a committed meta artifact; `m arch check` runs, for an m-layer -// repo: G1 dependency-direction (Go closure carries no vista-cloud-dev/v-* -// module; M source references no VSL* routine) and G2 forbidden-symbol (M code -// references no VistA-only symbol — FileMan/Kernel/KIDS). A v-layer repo passes -// trivially (v → m, and VistA above the line, are allowed). +// its layer in a committed meta artifact; `m arch check` runs G1 dependency- +// direction (Go closure carries no vista-cloud-dev/v-* module; M source +// references no VSL* routine) and G2 forbidden-symbol (M code references no +// VistA-only symbol) for an m-layer repo, plus G3 transport-monopoly (only +// m-driver-sdk names a driver binary) and G4 seam-pin (go.mod pins a tagged +// m-driver-sdk, no replace) for every repo. type archCmd struct { - Check archCheckCmd `cmd:"" help:"Run the m/v waterline gates (G1 dependency-direction, G2 forbidden-symbol) for this repo."` + Check archCheckCmd `cmd:"" help:"Run the m/v waterline gates (G1 dependency-direction, G2 forbidden-symbol, G3 transport-monopoly, G4 seam-pin) for this repo."` } type archCheckCmd struct { @@ -1062,12 +1063,15 @@ func (c *archCheckCmd) Run(cc *clikit.Context) error { if rep.CheckedM { checks = append(checks, "m-source") } - if len(checks) == 0 { - checks = append(checks, "none (v-layer)") + if rep.CheckedG3 { + checks = append(checks, "driver-refs") + } + if rep.CheckedG4 { + checks = append(checks, "seam-pin") } cc.KV( [2]string{"layer", cc.Accent(string(rep.Layer))}, - [2]string{"gates", "G1 dependency-direction, G2 forbidden-symbol"}, + [2]string{"gates", "G1 dependency-direction · G2 forbidden-symbol · G3 transport-monopoly · G4 seam-pin"}, [2]string{"checked", strings.Join(checks, ", ")}, [2]string{"violations", fmt.Sprintf("%d", len(rep.Violations))}, ) From 5aad0fffb20994fb0cc46f6a760bf95d08ed8d85 Mon Sep 17 00:00:00 2001 From: Rafael Richards Date: Mon, 15 Jun 2026 06:13:47 -0400 Subject: [PATCH 3/3] feat(arch): validate the repo meta artifact (Phase B item 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `m arch check` now reads the meta root-first (repo.meta.json, then dist/repo.meta.json, then dist/v-contract.json) and validates its shape: a repo.meta.json (root preferred) must carry id, layer, language, and verification_commands; layer must be m|v. consumes/exposes are optional. Problems surface as Gate:"META" violations; Report.CheckedMeta. Validation runs only when a repo.meta.json is found — a repo declaring its layer only via dist/v-contract.json or --layer has nothing to validate and is skipped (back-compat while m-stdlib/v-stdlib migrate to a root meta). The Meta struct holds only the four required fields: the live metas carry consumes/exposes as objects (not arrays), so typing them as []string made json.Unmarshal hard-error on v-pkg/v-cli/m-stdlib — caught by the all-8 sweep. Optional/descriptive fields are unknown-and-ignored. Regression test added. Verified: all 8 ecosystem repos clean under G1-G4 + meta; arch 87.3% cover; golangci-lint + gofmt + self `arch check` clean. Owed: migrate m-stdlib/v-stdlib off dist/ to a root repo.meta.json (tooling is now root-first). Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/memory/MEMORY.md | 2 +- docs/memory/arch-g2-forbidden-symbol.md | 28 +++++- internal/arch/arch.go | 124 ++++++++++++++++++++---- internal/arch/arch_test.go | 111 ++++++++++++++++++++- main.go | 15 ++- 5 files changed, 253 insertions(+), 27 deletions(-) diff --git a/docs/memory/MEMORY.md b/docs/memory/MEMORY.md index 6c45713..91075af 100644 --- a/docs/memory/MEMORY.md +++ b/docs/memory/MEMORY.md @@ -1,4 +1,4 @@ # m-cli memory index - [chset byte mode](chset-byte-mode.md) — `--chset m|utf-8` on test/coverage/watch; m-stdlib byte suites need `m` on YDB -- [arch waterline gates G2/G3/G4](arch-g2-forbidden-symbol.md) — `m arch check` gained the full suite: **G2** (no VistA symbols below the line; comment-aware deny-list, RE2 trailing-guard avoids `^DIETST`), **G3** (transport-monopoly — flags a non-SDK repo *exec*-ing `"m-ydb"/"m-iris"`; co-occurrence with `exec.Command` makes it self-hosting), **G4** (seam-pin — go.mod text-parse flags `replace`/pseudo-version of m-driver-sdk). G1/G2 = m-layer; G3/G4 = all repos. All 8 repos clean. Item-1 meta-schema + meta-gate + m-ci.yml + tag/pin still owed. +- [arch waterline gates G2/G3/G4](arch-g2-forbidden-symbol.md) — `m arch check` gained the full suite: **G2** (no VistA symbols below the line; comment-aware deny-list, RE2 trailing-guard avoids `^DIETST`), **G3** (transport-monopoly — flags a non-SDK repo *exec*-ing `"m-ydb"/"m-iris"`; co-occurrence with `exec.Command` makes it self-hosting), **G4** (seam-pin — go.mod text-parse flags `replace`/pseudo-version of m-driver-sdk). G1/G2 = m-layer; G3/G4 = all repos. **Item 1 (meta-schema validation) also done:** root-first `metaCandidates`, `LoadMeta`/`ValidateMeta` require id/layer/language/verification_commands; `Gate:"META"`. Gotcha: consumes/exposes are objects not arrays → `Meta` struct holds only the 4 required fields. All 8 repos clean. Owed: migrate m-stdlib/v-stdlib off `dist/` to root meta; meta-gate; m-ci.yml; tag/pin. diff --git a/docs/memory/arch-g2-forbidden-symbol.md b/docs/memory/arch-g2-forbidden-symbol.md index 8067b85..07c257a 100644 --- a/docs/memory/arch-g2-forbidden-symbol.md +++ b/docs/memory/arch-g2-forbidden-symbol.md @@ -61,7 +61,31 @@ resolution feeds the new `goModulePath(root)`. exec + pseudo-version + replace fixtures red (unit tests). arch 86.7% cover, golangci-lint + gofmt clean, m-cli self-`arch check` clean. -**Still owed in Phase B:** the root-`repo.meta.json` schema validation (item 1) -+ migrate m-stdlib/v-stdlib off `dist/` (the only two not on root meta), the +## Item 1 — meta-schema validation (added same branch, 2026-06-15) + +`m arch check` now also validates the repo's meta artifact. `metaCandidates` +(layer resolution) flipped to **root-first**: `repo.meta.json`, then +`dist/repo.meta.json`, then `dist/v-contract.json`. New `LoadMeta(root)` reads +the repo.meta.json-shaped artifact (root preferred, then `dist/`; **not** +`v-contract.json` — different shape) → `(Meta, path, found, err)`. `ValidateMeta` +requires **id, layer, language, verification_commands** (layer ∈ m|v); consumes/ +exposes optional. `Check` validates **only when a repo.meta.json is found** (a +v-contract-only / `--layer` repo is skipped — avoids breaking the v-contract test +fixtures and the pre-migration dist-only repos). Problems surface as +`Gate:"META", Kind:"meta-shape"` violations; `Report.CheckedMeta`. + +**GOTCHA (real bug the all-8 sweep caught):** `consumes`/`exposes` in the live +metas are **objects, not `[]string`** (e.g. v-pkg/v-cli `exposes`, m-stdlib +`consumes`/`exposes`). Typing them as `[]string` in the `Meta` struct made +`json.Unmarshal` hard-error on 3 repos. Fix: the `Meta` struct carries **only the +4 required fields**; optional/descriptive fields (consumes, exposes, repo, role, +license, …) are unknown-and-ignored. Regression test +`TestLoadMetaIgnoresObjectOptionalFields`. + +**Verified:** all 8 repos clean under G1–G4 + meta. arch 87.3% cover. + +**Still owed in Phase B:** **migrate m-stdlib + v-stdlib off `dist/repo.meta.json` +to root `repo.meta.json`** (the only two not on root meta — tooling is now +root-first, so they can migrate cleanly; per-repo increments in those repos), the scheduled meta-gate, the reusable `m-ci.yml`, and pinning `m-cli-ref` to a tag. See the org docs-repo `docs/vsl-msl/vsl-implementation-tracker.md` Phase B row. diff --git a/internal/arch/arch.go b/internal/arch/arch.go index 6f0f9ba..8dd190b 100644 --- a/internal/arch/arch.go +++ b/internal/arch/arch.go @@ -20,10 +20,14 @@ // - G4 — seam-pin: a repo requiring m-driver-sdk must pin a tagged release in // go.mod — no `replace` to it, no pseudo-version (untagged commit). // -// G1 and G2 apply to the m layer; G3 and G4 are layer-agnostic (a v consumer -// also must not hand-roll transport and must seam-pin). A repo declares its -// layer in a committed meta artifact ("layer": "m"|"v"); a `v`-layer repo passes -// G1/G2 trivially. +// It also validates the repo's standardized meta artifact (Phase B item 1): +// root repo.meta.json (preferred, then dist/repo.meta.json) must carry id, +// layer, language, and verification_commands; layer must be "m" or "v". +// +// G1 and G2 apply to the m layer; G3, G4, and meta-validation are layer-agnostic +// (a v consumer also must not hand-roll transport and must seam-pin). A repo +// declares its layer in a committed meta artifact ("layer": "m"|"v"); a `v`-layer +// repo passes G1/G2 trivially. package arch import ( @@ -88,31 +92,59 @@ var vistaSymbols = []struct { {"^XPD* (KIDS)", regexp.MustCompile(`\^XPD[A-Za-z0-9]*`)}, } -// Violation is one G1 finding — a dependency that crosses the waterline the -// wrong way (m → v). +// Violation is one gate finding — a waterline breach (G1–G4) or a meta-shape +// problem (META). type Violation struct { - Gate string `json:"gate"` // "G1" - Kind string `json:"kind"` // "go-dep" | "m-ref" + Gate string `json:"gate"` // "G1" | "G2" | "G3" | "G4" | "META" + Kind string `json:"kind"` // "go-dep" | "m-ref" | "vista-symbol" | "driver-ref" | "seam-replace" | "seam-untagged" | "meta-shape" Source string `json:"source"` // offending module path or file:line Detail string `json:"detail"` // human-readable explanation } // Report is the full waterline-gate result for one repo. type Report struct { - Layer Layer `json:"layer"` - CheckedGo bool `json:"checkedGo"` // G1 Go dependency closure - CheckedM bool `json:"checkedM"` // G1 m-ref + G2 forbidden-symbol - CheckedG3 bool `json:"checkedG3"` // G3 transport-monopoly (driver refs) - CheckedG4 bool `json:"checkedG4"` // G4 seam-pin (go.mod) - Violations []Violation `json:"violations"` + Layer Layer `json:"layer"` + CheckedGo bool `json:"checkedGo"` // G1 Go dependency closure + CheckedM bool `json:"checkedM"` // G1 m-ref + G2 forbidden-symbol + CheckedG3 bool `json:"checkedG3"` // G3 transport-monopoly (driver refs) + CheckedG4 bool `json:"checkedG4"` // G4 seam-pin (go.mod) + CheckedMeta bool `json:"checkedMeta"` // meta-artifact shape + Violations []Violation `json:"violations"` +} + +// Meta is the standardized repo meta artifact (the schema item 1 validates). +// Required: id, layer, language, verification_commands. Optional fields +// (consumes, exposes — repo-defined object shapes) and descriptive fields +// (repo, role, license, …) are allowed and ignored here. +type Meta struct { + ID string `json:"id"` + Layer string `json:"layer"` + Language []string `json:"language"` + VerificationCommands []string `json:"verification_commands"` +} + +// MetaProblem is one meta-shape finding (a missing or malformed field). +type MetaProblem struct { + Field string `json:"field"` + Detail string `json:"detail"` } // metaCandidates are the committed meta artifacts, in priority order, that may -// carry the repo's "layer" declaration (ADR §3.1). +// carry the repo's "layer" declaration (ADR §3.1). Root repo.meta.json is the +// standard location and is read first; the dist/ forms are read for back-compat +// while repos migrate to root (Phase B item 1). var metaCandidates = []string{ + "repo.meta.json", filepath.Join("dist", "repo.meta.json"), filepath.Join("dist", "v-contract.json"), - "repo.meta.json", // repos whose dist/ is gitignored (e.g. m-cli) +} + +// metaFileCandidates are the repo.meta.json-shaped artifacts (root preferred, +// then dist/) that LoadMeta validates. v-contract.json is a differently-shaped +// per-domain artifact and is not validated here. +var metaFileCandidates = []string{ + "repo.meta.json", + filepath.Join("dist", "repo.meta.json"), } // ResolveLayer determines the repo's declared layer. An explicit override @@ -148,7 +180,48 @@ func ResolveLayer(root, override string) (Layer, error) { return "", fmt.Errorf(`%s: invalid "layer" %q (want m or v)`, rel, meta.Layer) } } - return "", fmt.Errorf(`no "layer" declared — add it to dist/repo.meta.json or dist/v-contract.json, or pass --layer`) + return "", fmt.Errorf(`no "layer" declared — add it to repo.meta.json (root, preferred), dist/repo.meta.json, or dist/v-contract.json, or pass --layer`) +} + +// LoadMeta reads the repo's standardized meta artifact — root repo.meta.json +// preferred, then dist/repo.meta.json. found is false when neither exists (a +// repo that declares its layer only via dist/v-contract.json, or via --layer). +// A malformed JSON meta returns an error. +func LoadMeta(root string) (meta Meta, path string, found bool, err error) { + for _, rel := range metaFileCandidates { + body, readErr := os.ReadFile(filepath.Join(root, rel)) + if readErr != nil { + continue + } + if err := json.Unmarshal(body, &meta); err != nil { + return Meta{}, rel, true, fmt.Errorf("%s: %w", rel, err) + } + return meta, rel, true, nil + } + return Meta{}, "", false, nil +} + +// ValidateMeta checks the meta against the standardized schema (Phase B item 1): +// id, layer, language, and verification_commands are required; layer must be +// "m" or "v"; consumes and exposes are optional. Returns one MetaProblem per +// violation (empty when clean). +func ValidateMeta(meta Meta) []MetaProblem { + var ps []MetaProblem + if strings.TrimSpace(meta.ID) == "" { + ps = append(ps, MetaProblem{Field: "id", Detail: `required field "id" is missing or empty`}) + } + switch Layer(meta.Layer) { + case LayerM, LayerV: + default: + ps = append(ps, MetaProblem{Field: "layer", Detail: fmt.Sprintf(`"layer" must be "m" or "v" (got %q)`, meta.Layer)}) + } + if len(meta.Language) == 0 { + ps = append(ps, MetaProblem{Field: "language", Detail: `required field "language" is missing or empty`}) + } + if len(meta.VerificationCommands) == 0 { + ps = append(ps, MetaProblem{Field: "verification_commands", Detail: `required field "verification_commands" is missing or empty`}) + } + return ps } // parseGoListDeps extracts the distinct module import paths from the streamed @@ -445,6 +518,23 @@ func Check(root, override string) (Report, error) { rep := Report{Layer: layer} selfMod, hasMod := goModulePath(root) + // Meta-artifact shape (Phase B item 1). Validate when a repo.meta.json is + // present (root preferred, then dist/); a malformed meta is a hard error. + // A repo declaring its layer only via dist/v-contract.json or --layer has no + // repo.meta.json to validate and is skipped. + if meta, metaPath, found, mErr := LoadMeta(root); mErr != nil { + return rep, mErr + } else if found { + rep.CheckedMeta = true + for _, p := range ValidateMeta(meta) { + rep.Violations = append(rep.Violations, Violation{ + Gate: "META", Kind: "meta-shape", + Source: fmt.Sprintf("%s:%s", metaPath, p.Field), + Detail: p.Detail, + }) + } + } + // G1 + G2 apply to the m layer only (v → m, and VistA above the line, are // allowed). if layer == LayerM { diff --git a/internal/arch/arch_test.go b/internal/arch/arch_test.go index c146778..9cdb545 100644 --- a/internal/arch/arch_test.go +++ b/internal/arch/arch_test.go @@ -191,7 +191,8 @@ func TestCheckVLayerPassesTrivially(t *testing.T) { func TestCheckMLayerScansM(t *testing.T) { dir := t.TempDir() - writeFile(t, filepath.Join(dir, "dist", "repo.meta.json"), `{"layer":"m"}`) + writeFile(t, filepath.Join(dir, "dist", "repo.meta.json"), + `{"id":"tool:x","layer":"m","language":["m"],"verification_commands":["m test"]}`) writeFile(t, filepath.Join(dir, "src", "STDX.m"), " set x=$$cfg^VSLCFG(1)\n") rep, err := Check(dir, "") if err != nil { @@ -215,7 +216,8 @@ func TestCheckMLayerGoArmClean(t *testing.T) { writeFile(t, filepath.Join(dir, "go.mod"), "module example.com/clean\n\ngo 1.26\n") writeFile(t, filepath.Join(dir, "main.go"), "package main\n\nimport \"fmt\"\n\nfunc main() { fmt.Println(\"hi\") }\n") - writeFile(t, filepath.Join(dir, "dist", "repo.meta.json"), `{"layer":"m"}`) + writeFile(t, filepath.Join(dir, "dist", "repo.meta.json"), + `{"id":"tool:clean","layer":"m","language":["go"],"verification_commands":["go test ./..."]}`) rep, err := Check(dir, "") if err != nil { t.Fatalf("Check: %v", err) @@ -496,6 +498,111 @@ func TestCheckSdkExemptFromG3(t *testing.T) { } } +// --- Item 1: meta-schema validation ----------------------------------------- + +func TestValidateMetaClean(t *testing.T) { + m := Meta{ID: "tool:x", Layer: "m", Language: []string{"go"}, VerificationCommands: []string{"make test"}} + if p := ValidateMeta(m); len(p) != 0 { + t.Errorf("a complete meta has no problems, got %v", p) + } +} + +func TestValidateMetaMissingRequired(t *testing.T) { + m := Meta{Layer: "m"} // missing id, language, verification_commands + p := ValidateMeta(m) + if len(p) != 3 { + t.Errorf("expected 3 missing-field problems, got %d: %v", len(p), p) + } +} + +func TestValidateMetaBadLayer(t *testing.T) { + m := Meta{ID: "x", Layer: "z", Language: []string{"go"}, VerificationCommands: []string{"t"}} + p := ValidateMeta(m) + if len(p) != 1 || p[0].Field != "layer" { + t.Errorf("expected 1 layer problem, got %v", p) + } +} + +func TestValidateMetaOptionalFieldsAllowedAbsent(t *testing.T) { + // consumes/exposes are optional — a meta without them is clean. + m := Meta{ID: "x", Layer: "v", Language: []string{"m"}, VerificationCommands: []string{"m test"}} + if p := ValidateMeta(m); len(p) != 0 { + t.Errorf("optional fields may be absent, got %v", p) + } +} + +func TestLoadMetaPrefersRoot(t *testing.T) { + dir := t.TempDir() + writeFile(t, filepath.Join(dir, "repo.meta.json"), + `{"id":"root","layer":"m","language":["go"],"verification_commands":["x"]}`) + writeFile(t, filepath.Join(dir, "dist", "repo.meta.json"), + `{"id":"dist","layer":"v","language":["m"],"verification_commands":["y"]}`) + m, _, found, err := LoadMeta(dir) + if err != nil || !found { + t.Fatalf("LoadMeta: err=%v found=%v", err, found) + } + if m.ID != "root" { + t.Errorf("root repo.meta.json must win, got id=%q", m.ID) + } +} + +func TestLoadMetaFallsToDist(t *testing.T) { + dir := t.TempDir() + writeFile(t, filepath.Join(dir, "dist", "repo.meta.json"), + `{"id":"dist","layer":"m","language":["m"],"verification_commands":["y"]}`) + m, _, found, err := LoadMeta(dir) + if err != nil || !found { + t.Fatalf("LoadMeta: err=%v found=%v", err, found) + } + if m.ID != "dist" { + t.Errorf("got id=%q", m.ID) + } +} + +func TestLoadMetaAbsent(t *testing.T) { + dir := t.TempDir() + if _, _, found, err := LoadMeta(dir); err != nil || found { + t.Errorf("no repo.meta.json → found=false, err=nil; got found=%v err=%v", found, err) + } +} + +func TestLoadMetaIgnoresObjectOptionalFields(t *testing.T) { + dir := t.TempDir() + // Real metas carry consumes/exposes as objects (not arrays); they must be + // ignored, not cause an unmarshal error (regression: v-pkg/m-stdlib metas). + writeFile(t, filepath.Join(dir, "repo.meta.json"), + `{"id":"x","layer":"v","language":["go"],"verification_commands":["t"],"exposes":{"pkg":{"verbs":[]}},"consumes":{"sdk":"v0.3.0"}}`) + m, _, found, err := LoadMeta(dir) + if err != nil || !found { + t.Fatalf("object-valued optional fields must be ignored: err=%v found=%v", err, found) + } + if p := ValidateMeta(m); len(p) != 0 { + t.Errorf("clean meta with object optional fields, got %v", p) + } +} + +func TestCheckReportsMetaProblems(t *testing.T) { + dir := t.TempDir() + // Layer resolves (m) but the meta is missing the other required fields. + writeFile(t, filepath.Join(dir, "repo.meta.json"), `{"layer":"m"}`) + rep, err := Check(dir, "") + if err != nil { + t.Fatalf("Check: %v", err) + } + if !rep.CheckedMeta { + t.Error("expected CheckedMeta=true") + } + var meta int + for _, v := range rep.Violations { + if v.Gate == "META" { + meta++ + } + } + if meta == 0 { + t.Errorf("expected META problems for an incomplete meta, got %v", rep.Violations) + } +} + // --- helpers ----------------------------------------------------------------- func writeFile(t *testing.T, path, body string) { diff --git a/main.go b/main.go index 488fdde..8f26bcd 100644 --- a/main.go +++ b/main.go @@ -1032,10 +1032,12 @@ func (lspCmd) Run(_ *clikit.Context) error { // references no VSL* routine) and G2 forbidden-symbol (M code references no // VistA-only symbol) for an m-layer repo, plus G3 transport-monopoly (only // m-driver-sdk names a driver binary) and G4 seam-pin (go.mod pins a tagged -// m-driver-sdk, no replace) for every repo. +// m-driver-sdk, no replace) for every repo. It also validates the repo's +// standardized meta artifact (root repo.meta.json: id/layer/language/ +// verification_commands). type archCmd struct { - Check archCheckCmd `cmd:"" help:"Run the m/v waterline gates (G1 dependency-direction, G2 forbidden-symbol, G3 transport-monopoly, G4 seam-pin) for this repo."` + Check archCheckCmd `cmd:"" help:"Run the m/v waterline gates (G1 dependency-direction, G2 forbidden-symbol, G3 transport-monopoly, G4 seam-pin) + meta-shape validation for this repo."` } type archCheckCmd struct { @@ -1069,9 +1071,12 @@ func (c *archCheckCmd) Run(cc *clikit.Context) error { if rep.CheckedG4 { checks = append(checks, "seam-pin") } + if rep.CheckedMeta { + checks = append(checks, "meta") + } cc.KV( [2]string{"layer", cc.Accent(string(rep.Layer))}, - [2]string{"gates", "G1 dependency-direction · G2 forbidden-symbol · G3 transport-monopoly · G4 seam-pin"}, + [2]string{"gates", "G1 dependency-direction · G2 forbidden-symbol · G3 transport-monopoly · G4 seam-pin · meta-shape"}, [2]string{"checked", strings.Join(checks, ", ")}, [2]string{"violations", fmt.Sprintf("%d", len(rep.Violations))}, ) @@ -1088,8 +1093,8 @@ func (c *archCheckCmd) Run(cc *clikit.Context) error { if len(rep.Violations) > 0 { return clikit.Fail(clikit.ExitCheck, "WATERLINE_VIOLATION", - fmt.Sprintf("%d waterline violation(s)", len(rep.Violations)), - "the m layer must not depend on the v layer (G1) or reference VistA symbols (G2)") + fmt.Sprintf("%d gate violation(s)", len(rep.Violations)), + "fix the flagged waterline (G1–G4) or meta-shape findings above") } return nil }