From 9e50170f9805a65d1d29b703849670fa84351817 Mon Sep 17 00:00:00 2001 From: Rafael Richards Date: Sun, 14 Jun 2026 09:37:12 -0400 Subject: [PATCH 1/2] engine: add --chset flag for byte (M) mode on m test/coverage/watch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Byte-oriented m-stdlib suites (STDCSPRNG, STDB64, STDHEX, STDJSON UTF-8 decode) assume one M character == one byte. On YottaDB the m-test-engine container defaults to ydb_chset=UTF-8, under which byte ops like $ZCHAR(200) raise %YDB-E-BADCHAR — aborting STDCSPRNGTST (0/0) even though the library is correct under byte mode. Add an engine-neutral --chset m|utf-8 flag, threaded through engine.Options.Chset. The YDB-vs-IRIS difference stays in the engine adapter (its documented role): - YDB: prepend `env ydb_chset=` to the invocation argv. Works for both LocalRunner and DockerRunner (overrides the container profile default) without widening the Runner seam. - IRIS: no-op. Byte mode is inherent — a Unicode IRIS instance round-trips all 256 byte values in-memory and has no ydb_chset analog. The flag is accepted only to keep the CLI uniform. Unset (the default) leaves argv untouched, so existing UTF-8 runs are unchanged. Verified (YDB m-test-engine / IRIS vista-iris, via the normal m test path, no hand docker exec): - YDB core suite under --chset m: 44 suites, 2414 assertions, 0 failed; STDCSPRNGTST 406/406 green. - Regression: same suite without the flag → STDCSPRNGTST 0/0 (default behavior unchanged). - IRIS STDHEXTST 49/49 identical with and without --chset m (no-op). Rebased onto post-T0.1 main: internal/engine adapters were unchanged, so the only conflict was the testCmd struct (kept both --resident and --chset fields); all engine.New call sites still thread Chset, and the resident-harness path reuses the same constructed engine. Fixed an errcheck finding in the new IRIS chset test under the current lint config. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/engine/engine.go | 23 ++++++++++++- internal/engine/engine_test.go | 61 ++++++++++++++++++++++++++++++++++ internal/engine/iris.go | 7 ++++ internal/engine/ydb.go | 23 ++++++++++--- main.go | 25 ++++++++------ 5 files changed, 122 insertions(+), 17 deletions(-) diff --git a/internal/engine/engine.go b/internal/engine/engine.go index 69932d8..8f06d90 100644 --- a/internal/engine/engine.go +++ b/internal/engine/engine.go @@ -60,6 +60,25 @@ type Options struct { IrisBin string // default "iris" Instance string // IRIS instance name (default "IRIS") Namespace string // IRIS namespace (default "USER") + // Chset selects the engine charset: "m" (byte mode — one char == one byte, + // required by binary suites like STDCSPRNG/STDB64/STDHEX) or "utf-8". Empty + // means "engine default" (YDB inherits its ambient $ydb_chset). On YDB this + // exports ydb_chset; on IRIS it is a no-op (byte semantics are inherent — + // see IrisEngine). + Chset string +} + +// ydbChset maps the user-facing --chset token to YottaDB's $ydb_chset value. +// Unknown/empty tokens yield "" (leave the engine default untouched). +func ydbChset(tok string) string { + switch tok { + case "m": + return "M" + case "utf-8", "utf8", "UTF-8": + return "UTF-8" + default: + return "" + } } // New builds the Engine for kind with opts (zero values defaulted). @@ -68,6 +87,8 @@ func New(kind Kind, opts Options) Engine { opts.Runner = LocalRunner } if kind == IRIS { + // opts.Chset intentionally ignored: byte mode is inherent on IRIS + // (see IrisEngine doc). Accepting the option keeps the CLI uniform. return &IrisEngine{ bin: orDefault(opts.IrisBin, "iris"), instance: orDefault(opts.Instance, "IRIS"), @@ -75,7 +96,7 @@ func New(kind Kind, opts Options) Engine { run: opts.Runner, } } - return &YdbEngine{bin: orDefault(opts.YdbBin, "ydb"), run: opts.Runner} + return &YdbEngine{bin: orDefault(opts.YdbBin, "ydb"), run: opts.Runner, chset: ydbChset(opts.Chset)} } func orDefault(s, def string) string { diff --git a/internal/engine/engine_test.go b/internal/engine/engine_test.go index 0ef90e0..4248acd 100644 --- a/internal/engine/engine_test.go +++ b/internal/engine/engine_test.go @@ -103,6 +103,67 @@ func TestIrisCommands(t *testing.T) { } } +// TestYdbChset verifies that Options.Chset is translated to an `env ydb_chset=…` +// prefix on every YDB invocation (byte mode for binary suites), and that the +// unset default leaves argv untouched (no regression on UTF-8 runs). +func TestYdbChset(t *testing.T) { + ctx := context.Background() + + t.Run("m maps to ydb_chset=M", func(t *testing.T) { + c := &capture{} + e := New(YDB, Options{Runner: c.run, Chset: "m"}) + + _, _ = e.RunRoutine(ctx, "^FOO", "a") + if got := strings.Join(c.argv, " "); got != "env ydb_chset=M ydb -run ^FOO a" { + t.Errorf("RunRoutine argv = %q", got) + } + _, _ = e.RunXCmd(ctx, "set ^X=1") + if got := strings.Join(c.argv, " "); got != "env ydb_chset=M ydb -run %XCMD set ^X=1" { + t.Errorf("RunXCmd argv = %q", got) + } + _, _ = e.RunScript(ctx, "halt\n") + if got := strings.Join(c.argv, " "); got != "env ydb_chset=M ydb -direct" { + t.Errorf("RunScript argv = %q", got) + } + }) + + t.Run("utf-8 maps to ydb_chset=UTF-8", func(t *testing.T) { + c := &capture{} + e := New(YDB, Options{Runner: c.run, Chset: "utf-8"}) + _, _ = e.RunRoutine(ctx, "^FOO") + if got := strings.Join(c.argv, " "); got != "env ydb_chset=UTF-8 ydb -run ^FOO" { + t.Errorf("RunRoutine argv = %q", got) + } + }) + + t.Run("unset leaves argv unchanged", func(t *testing.T) { + c := &capture{} + e := New(YDB, Options{Runner: c.run}) + _, _ = e.RunRoutine(ctx, "^FOO") + if got := strings.Join(c.argv, " "); got != "ydb -run ^FOO" { + t.Errorf("RunRoutine argv = %q", got) + } + }) +} + +// TestIrisChset verifies that Chset is a no-op on IRIS: byte semantics are +// inherent (Unicode build round-trips all 256 byte values), and IRIS has no +// ydb_chset analog, so the invocation must be identical with or without it. +func TestIrisChset(t *testing.T) { + ctx := context.Background() + with := &capture{} + without := &capture{} + _, _ = New(IRIS, Options{Runner: with.run, Instance: "VISTA", Namespace: "VISTA", Chset: "m"}).RunRoutine(ctx, "^FOO") + _, _ = New(IRIS, Options{Runner: without.run, Instance: "VISTA", Namespace: "VISTA"}).RunRoutine(ctx, "^FOO") + + if w, wo := strings.Join(with.argv, " "), strings.Join(without.argv, " "); w != wo { + t.Errorf("IRIS argv differs with chset: %q vs %q", w, wo) + } + if with.stdin != without.stdin { + t.Errorf("IRIS stdin differs with chset: %q vs %q", with.stdin, without.stdin) + } +} + func TestLocalRunnerExitCode(t *testing.T) { res, err := LocalRunner(context.Background(), []string{"sh", "-c", "printf hi; exit 3"}, "") if err != nil { diff --git a/internal/engine/iris.go b/internal/engine/iris.go index 2a69e8c..26e7c64 100644 --- a/internal/engine/iris.go +++ b/internal/engine/iris.go @@ -5,6 +5,13 @@ import "context" // IrisEngine runs M on InterSystems IRIS via the `iris` binary (the VA target // engine). Routine source lives in IRIS.DAT, so EnsureLoaded imports a .mac // from the irissync mirror before it can run. +// +// Note on charset: Options.Chset has no effect on IRIS — byte mode is INHERENT +// here. A Unicode IRIS instance round-trips all 256 byte values in-memory +// ($char(200) is one char with $ascii 200), and IRIS has no process-wide +// $ydb_chset analog to export. So `--chset m` is satisfied as a no-op and the +// flag is accepted only to keep the CLI surface uniform across engines. (Raw +// binary *device* I/O is a per-OPEN translation concern owned by the routine.) type IrisEngine struct { bin string instance string diff --git a/internal/engine/ydb.go b/internal/engine/ydb.go index 9b4d275..3d5e360 100644 --- a/internal/engine/ydb.go +++ b/internal/engine/ydb.go @@ -4,13 +4,26 @@ import "context" // YdbEngine runs M on YottaDB via the `ydb` binary (the tooling-native engine). type YdbEngine struct { - bin string - run Runner + bin string + run Runner + chset string // "" = inherit ambient $ydb_chset; "M"/"UTF-8" exported per-run } // Kind implements Engine. func (e *YdbEngine) Kind() Kind { return YDB } +// cmd builds the argv for a `ydb` invocation, prepending `env ydb_chset=` +// when a charset is pinned. The `env` prefix sets the variable for the ydb +// process under both LocalRunner (os/exec) and DockerRunner (inside `bash -lc`, +// overriding the container's profile default) without widening the Runner seam. +func (e *YdbEngine) cmd(args ...string) []string { + argv := append([]string{e.bin}, args...) + if e.chset != "" { + argv = append([]string{"env", "ydb_chset=" + e.chset}, argv...) + } + return argv +} + // EnsureLoaded is a no-op on YottaDB: routines compile on first reference // ($ydb_routines auto-compile), so there is nothing to pre-load. func (e *YdbEngine) EnsureLoaded(_ context.Context, _ string) error { return nil } @@ -18,18 +31,18 @@ func (e *YdbEngine) EnsureLoaded(_ context.Context, _ string) error { return nil // RunRoutine runs an entryref via `ydb -run`. Extra args are passed through as // $ZCMDLINE. func (e *YdbEngine) RunRoutine(ctx context.Context, entryref string, args ...string) (Result, error) { - argv := append([]string{e.bin, "-run", entryref}, args...) + argv := append(e.cmd("-run", entryref), args...) return e.run(ctx, argv, "") } // RunXCmd runs a one-off M command line via the %XCMD utility (which XECUTEs its // $ZCMDLINE): `ydb -run %XCMD `. func (e *YdbEngine) RunXCmd(ctx context.Context, mcmd string) (Result, error) { - return e.run(ctx, []string{e.bin, "-run", "%XCMD", mcmd}, "") + return e.run(ctx, e.cmd("-run", "%XCMD", mcmd), "") } // RunScript runs a multi-line script in YDB direct mode (`ydb -direct`), feeding // the script on stdin. The script should end with `halt`. func (e *YdbEngine) RunScript(ctx context.Context, script string) (Result, error) { - return e.run(ctx, []string{e.bin, "-direct"}, script) + return e.run(ctx, e.cmd("-direct"), script) } diff --git a/main.go b/main.go index e918d0b..bd0f1a4 100644 --- a/main.go +++ b/main.go @@ -417,6 +417,7 @@ type testCmd struct { Routines []string `help:"Extra source dirs to stage (e.g. m-stdlib/src for ^STDASSERT). Repeatable."` Namespace string `help:"IRIS namespace (default USER)."` Resident bool `help:"Run ';; tier: integration' suites via the resident harness (RUN^STDHARN) and reconcile with file-side pure-logic suites (spec §9)."` + Chset string `default:"" enum:",m,utf-8" help:"Engine charset: m (byte mode) or utf-8. Default: engine default (YDB inherits its ambient ydb_chset). Byte suites (STDCSPRNG/STDB64/STDHEX) need m on YDB; inherent on IRIS."` } type suiteResult struct { @@ -484,7 +485,7 @@ func (c *testCmd) Run(cc *clikit.Context) error { } if kind == engine.IRIS { stageDir := fmt.Sprintf("/tmp/m-test-%d", time.Now().UnixNano()) - eng = engine.New(kind, engine.Options{Runner: engine.DockerRunner(c.Docker, ""), Namespace: c.Namespace}) + eng = engine.New(kind, engine.Options{Runner: engine.DockerRunner(c.Docker, ""), Namespace: c.Namespace, Chset: c.Chset}) if err := engine.IrisStageLoad(ctx, eng, c.Docker, stageDir, files); err != nil { return clikit.Fail(clikit.ExitRuntime, "STAGE_FAILED", err.Error(), "") } @@ -495,10 +496,10 @@ func (c *testCmd) Run(cc *clikit.Context) error { return clikit.Fail(clikit.ExitRuntime, "STAGE_FAILED", err.Error(), "") } defer engine.DockerUnstage(ctx, c.Docker, stageDir) - eng = engine.New(kind, engine.Options{Runner: engine.DockerRunner(c.Docker, stageDir)}) + eng = engine.New(kind, engine.Options{Runner: engine.DockerRunner(c.Docker, stageDir), Chset: c.Chset}) } } else { - eng = engine.New(kind, engine.Options{Namespace: c.Namespace}) + eng = engine.New(kind, engine.Options{Namespace: c.Namespace, Chset: c.Chset}) } var rows []suiteResult if c.Resident { @@ -613,6 +614,7 @@ type coverageCmd struct { Namespace string `help:"IRIS namespace (default USER)."` MinPercent float64 `name:"min-percent" help:"Fail (exit 3) if line coverage is below this percent."` Lcov string `help:"Write an LCOV tracefile to this path."` + Chset string `default:"" enum:",m,utf-8" help:"Engine charset: m (byte mode) or utf-8. Default: engine default (YDB inherits its ambient ydb_chset). Byte suites (STDCSPRNG/STDB64/STDHEX) need m on YDB; inherent on IRIS."` } type fileCov struct { @@ -673,7 +675,7 @@ func (c *coverageCmd) Run(cc *clikit.Context) error { } if kind == engine.IRIS { stageDir := fmt.Sprintf("/tmp/m-cov-%d", time.Now().UnixNano()) - eng = engine.New(kind, engine.Options{Runner: engine.DockerRunner(c.Docker, ""), Namespace: c.Namespace}) + eng = engine.New(kind, engine.Options{Runner: engine.DockerRunner(c.Docker, ""), Namespace: c.Namespace, Chset: c.Chset}) if err := engine.IrisStageLoad(ctx, eng, c.Docker, stageDir, files); err != nil { return clikit.Fail(clikit.ExitRuntime, "STAGE_FAILED", err.Error(), "") } @@ -684,10 +686,10 @@ func (c *coverageCmd) Run(cc *clikit.Context) error { return clikit.Fail(clikit.ExitRuntime, "STAGE_FAILED", err.Error(), "") } defer engine.DockerUnstage(ctx, c.Docker, stageDir) - eng = engine.New(kind, engine.Options{Runner: engine.DockerRunner(c.Docker, stageDir)}) + eng = engine.New(kind, engine.Options{Runner: engine.DockerRunner(c.Docker, stageDir), Chset: c.Chset}) } } else { - eng = engine.New(kind, engine.Options{Namespace: c.Namespace}) + eng = engine.New(kind, engine.Options{Namespace: c.Namespace, Chset: c.Chset}) } result, err := mcov.Run(ctx, p, eng, routinePaths, suiteEntries) @@ -749,17 +751,17 @@ type stagedEngine struct { cleanup func() } -func newStagedEngine(ctx context.Context, kind engine.Kind, docker, namespace string, initialFiles []string) (*stagedEngine, error) { +func newStagedEngine(ctx context.Context, kind engine.Kind, docker, namespace, chset string, initialFiles []string) (*stagedEngine, error) { if docker == "" { return &stagedEngine{ - eng: engine.New(kind, engine.Options{Namespace: namespace}), + eng: engine.New(kind, engine.Options{Namespace: namespace, Chset: chset}), restage: func([]string) error { return nil }, cleanup: func() {}, }, nil } if kind == engine.IRIS { stageDir := fmt.Sprintf("/tmp/m-eng-%d", time.Now().UnixNano()) - eng := engine.New(kind, engine.Options{Runner: engine.DockerRunner(docker, ""), Namespace: namespace}) + eng := engine.New(kind, engine.Options{Runner: engine.DockerRunner(docker, ""), Namespace: namespace, Chset: chset}) restage := func(files []string) error { return engine.IrisStageLoad(ctx, eng, docker, stageDir, files) } if err := restage(initialFiles); err != nil { return nil, err @@ -771,7 +773,7 @@ func newStagedEngine(ctx context.Context, kind engine.Kind, docker, namespace st return nil, err } return &stagedEngine{ - eng: engine.New(kind, engine.Options{Runner: engine.DockerRunner(docker, stageDir)}), + eng: engine.New(kind, engine.Options{Runner: engine.DockerRunner(docker, stageDir), Chset: chset}), restage: func(files []string) error { return engine.DockerStage(ctx, docker, stageDir, files) }, cleanup: func() { engine.DockerUnstage(ctx, docker, stageDir) }, }, nil @@ -793,6 +795,7 @@ type watchCmd struct { Docker string `help:"Run --run suites inside this container via docker exec."` Routines []string `help:"Extra source dirs to stage for --run (e.g. m-stdlib/src). Repeatable."` Namespace string `help:"IRIS namespace for --run (default USER)."` + Chset string `default:"" enum:",m,utf-8" help:"Engine charset for --run: m (byte mode) or utf-8. Default: engine default. Byte suites need m on YDB; inherent on IRIS."` } func (c *watchCmd) Run(cc *clikit.Context) error { @@ -850,7 +853,7 @@ func (c *watchCmd) Run(cc *clikit.Context) error { ms, _ := filepath.Glob(filepath.Join(rdir, "*.m")) files = append(files, ms...) } - staged, err = newStagedEngine(ctx, kind, c.Docker, c.Namespace, files) + staged, err = newStagedEngine(ctx, kind, c.Docker, c.Namespace, c.Chset, files) if err != nil { return clikit.Fail(clikit.ExitRuntime, "STAGE_FAILED", err.Error(), "") } From 7ea7edf6b65c5686bc73d09add71a35390b927e3 Mon Sep 17 00:00:00 2001 From: Rafael Richards Date: Sun, 14 Jun 2026 09:39:12 -0400 Subject: [PATCH 2/2] docs(memory): record --chset byte-mode reconciliation (PR #2) Establish m-cli's per-repo docs/memory/ (org memory rule). Capture the durable fact: running m-stdlib byte suites (STDCSPRNG/STDB64/STDHEX) via m test needs --chset m on YDB; default UTF-8 aborts them; IRIS no-op. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/memory/MEMORY.md | 3 +++ docs/memory/chset-byte-mode.md | 27 +++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 docs/memory/MEMORY.md create mode 100644 docs/memory/chset-byte-mode.md diff --git a/docs/memory/MEMORY.md b/docs/memory/MEMORY.md new file mode 100644 index 0000000..1c33714 --- /dev/null +++ b/docs/memory/MEMORY.md @@ -0,0 +1,3 @@ +# m-cli memory index + +- [chset byte mode](chset-byte-mode.md) — `--chset m|utf-8` on test/coverage/watch; m-stdlib byte suites need `m` on YDB diff --git a/docs/memory/chset-byte-mode.md b/docs/memory/chset-byte-mode.md new file mode 100644 index 0000000..ec3dbf6 --- /dev/null +++ b/docs/memory/chset-byte-mode.md @@ -0,0 +1,27 @@ +--- +name: chset-byte-mode +description: m test/coverage/watch take --chset m|utf-8; m-stdlib byte suites need m on YDB +metadata: + type: reference +--- + +`m test`, `m coverage`, and `m watch --run` accept `--chset m|utf-8` +(threaded through `engine.Options.Chset`). Default is empty = engine default +(YDB inherits ambient `$ydb_chset`, which in the `m-test-engine` container is +UTF-8). + +**Running m-stdlib byte-oriented suites via m-cli requires `--chset m`.** +STDCSPRNG/STDB64/STDHEX (and STDJSON UTF-8 decode) assume one M char == one +byte; under UTF-8 byte values >127 re-encode and the suite aborts (e.g. +`STDCSPRNGTST` reports 0/0). Verified live: `STDCSPRNGTST` is 406/406 under +`--chset m` and fails without it (exit 3) — default unchanged. + +Mechanics: on YDB the adapter prepends `env ydb_chset=` to the argv +(works for LocalRunner and DockerRunner, no Runner-seam change). On IRIS the +flag is a **no-op** — byte mode is inherent (Unicode IRIS round-trips all 256 +byte values; no `ydb_chset` analog). + +Landed via PR #2 (`engine-chset-byte-mode`), rebased onto post-T0.1 main +2026-06-14. The `internal/engine` adapters were unchanged by T0.1, so the only +conflict was the `testCmd` struct (both `--resident` and `--chset` kept). +Closes "Stage A" of m-stdlib's follow-up tracker (in the m-stdlib repo).