diff --git a/dist/repo.meta.json b/dist/repo.meta.json index 9d9d23e..c18b4c8 100644 --- a/dist/repo.meta.json +++ b/dist/repo.meta.json @@ -7,7 +7,7 @@ "layer": "m", "license": "AGPL-3.0", "agent_instructions": "AGENTS.md", - "verified_on": "2026-06-13", + "verified_on": "2026-06-14", "exposes": { "modules": "dist/stdlib-manifest.json", "errors": "dist/errors.json" diff --git a/dist/stdlib-manifest.json b/dist/stdlib-manifest.json index 24e1708..1cb764c 100644 --- a/dist/stdlib-manifest.json +++ b/dist/stdlib-manifest.json @@ -2783,7 +2783,7 @@ }, "STDCOMPRESS": { "synopsis": "m-stdlib — gzip / deflate / zstd via $&stdcompress callouts.", - "description": "doc: @tier optional\nm-lint: disable-file=M-MOD-024\nm-lint: disable-file=M-MOD-036\nm-lint: disable-file=M-MOD-020\nM-MOD-024 false positives: rc / out are initialised before every\nXECUTE'd $& call but the analyser cannot follow flow through the\nXECUTE indirection.\nM-MOD-036 (XECUTE injection) is intentional: the XECUTE wrapper is\nthe only way to invoke $&pkg.fn from M code that tree-sitter-m can\nstill parse — same trick as STDCRYPTO. The XECUTE source is built\nfrom a literal template plus a `sym` symbol that the M-side public\nsurface controls; no user data flows into the XECUTE string.\nM-MOD-020 (by-ref formal not written) false positives: dispatch\nhelpers write to `out` via the XECUTE'd $& call.\n\nPublic extrinsics (output via .out byref; return 1=ok / 0=fail):\n $$gzip^STDCOMPRESS(data,.out[,level]) — RFC 1952 gzip\n $$gunzip^STDCOMPRESS(data,.out) — RFC 1952 gunzip\n $$deflate^STDCOMPRESS(data,.out[,level]) — RFC 1951 deflate\n $$inflate^STDCOMPRESS(data,.out) — RFC 1951 inflate\n $$zstdCompress^STDCOMPRESS(data,.out[,level]) — RFC 8478 zstd\n $$zstdDecompress^STDCOMPRESS(data,.out) — RFC 8478 zstd\n $$available^STDCOMPRESS() — \"\"=ok, else missing\n\nErrors set $ECODE: ,U-STDCOMPRESS-CALLOUT-MISSING, (.so unloaded);\n,U-STDCOMPRESS-BAD-LEVEL, (level out of range); ,U-STDCOMPRESS-LIBZ-FAIL,\n(libz returned non-Z_STREAM_END); ,U-STDCOMPRESS-LIBZSTD-FAIL, (zstd\nreturned an error frame).\n\nLevels: gzip / deflate accept 1..9 (default 6); zstd accepts 1..22\n(default 3). Level 0 (no compression) is rejected to avoid surprise\npass-through.\n\nOutput cap: 1 MiB per call (YDB's max M-string length on this\nbuild; declared in tools/std_compress.xc). Streaming for larger\npayloads is queued.\n\nBackend: $&stdcompress. → libz (gzip / deflate) + libzstd\n(zstd). Source at src/callouts/stdcompress.c; descriptor at\ntools/std_compress.xc.\n\nDeployment runbook (full detail in docs/modules/stdcompress.md):\n 1. tools/build-callouts.sh ; produce so//stdcompress.so\n 2. export STDLIB_LIB=\n 3. export ydb_xc_stdcompress=/tools/std_compress.xc\n 4. ensure libz.so.1 + libzstd.so.1 are on the loader path", + "description": "doc: @tier optional\nm-lint: disable-file=M-MOD-024\nm-lint: disable-file=M-MOD-036\nm-lint: disable-file=M-MOD-020\nM-MOD-024 false positives: rc / out are initialised before every\nXECUTE'd $& call but the analyser cannot follow flow through the\nXECUTE indirection.\nM-MOD-036 (XECUTE injection) is intentional: the XECUTE wrapper is\nthe only way to invoke $&pkg.fn from M code that tree-sitter-m can\nstill parse — same trick as STDCRYPTO. The XECUTE source is built\nfrom a literal template plus a `sym` symbol that the M-side public\nsurface controls; no user data flows into the XECUTE string.\nM-MOD-020 (by-ref formal not written) false positives: dispatch\nhelpers write to `out` via the XECUTE'd $& call.\n\nPublic extrinsics (output via .out byref; return 1=ok / 0=fail):\n $$gzip^STDCOMPRESS(data,.out[,level]) — RFC 1952 gzip\n $$gunzip^STDCOMPRESS(data,.out) — RFC 1952 gunzip\n $$deflate^STDCOMPRESS(data,.out[,level]) — RFC 1951 deflate\n $$inflate^STDCOMPRESS(data,.out) — RFC 1951 inflate\n $$zstdCompress^STDCOMPRESS(data,.out[,level]) — RFC 8478 zstd\n $$zstdDecompress^STDCOMPRESS(data,.out) — RFC 8478 zstd\n $$available^STDCOMPRESS() — \"\"=ok, else missing\n\nErrors set $ECODE: ,U-STDCOMPRESS-CALLOUT-MISSING, (.so unloaded);\n,U-STDCOMPRESS-BAD-LEVEL, (level out of range); ,U-STDCOMPRESS-LIBZ-FAIL,\n(libz returned non-Z_STREAM_END); ,U-STDCOMPRESS-LIBZSTD-FAIL, (zstd\nreturned an error frame).\n\nLevels: gzip / deflate accept 1..9 (default 6); zstd accepts 1..22\n(default 3). Level 0 (no compression) is rejected to avoid surprise\npass-through.\n\nOutput cap: 1 MiB per call (YDB's max M-string length on this\nbuild; declared in tools/std_compress.xc). Streaming for larger\npayloads is queued.\n\nBackend (engine-branched in dispatchC / dispatchD on $zversion[\"IRIS\"):\n YottaDB: $&stdcompress. → libz (gzip / deflate) + libzstd\n (zstd). Source src/callouts/stdcompress.c; descriptor\n tools/std_compress.xc.\n IRIS: embedded Python — zlib (wbits 31 gzip / -15 raw deflate)\n and libzstd.so.1 via ctypes (no zstd Python module is\n shipped, but the system .so is). M<->Python binary is\n bridged latin-1 (codepoint==byte). Same wire formats\n (RFC 1952 / 1951 / 8478), so the *TST.m vectors hold on\n both engines.\n\nDeployment runbook (full detail in docs/modules/stdcompress.md):\n 1. tools/build-callouts.sh ; produce so//stdcompress.so\n 2. export STDLIB_LIB=\n 3. export ydb_xc_stdcompress=/tools/std_compress.xc\n 4. ensure libz.so.1 + libzstd.so.1 are on the loader path", "errors": [ "U-STDCOMPRESS-BAD-LEVEL", "U-STDCOMPRESS-CALLOUT-MISSING", @@ -2846,7 +2846,7 @@ "description": "", "source": { "file": "src/STDCOMPRESS.m", - "line": 53 + "line": 60 } }, "gunzip": { @@ -2892,7 +2892,7 @@ "description": "", "source": { "file": "src/STDCOMPRESS.m", - "line": 72 + "line": 79 } }, "deflate": { @@ -2950,7 +2950,7 @@ "description": "", "source": { "file": "src/STDCOMPRESS.m", - "line": 87 + "line": 94 } }, "inflate": { @@ -2996,7 +2996,7 @@ "description": "", "source": { "file": "src/STDCOMPRESS.m", - "line": 106 + "line": 113 } }, "zstdCompress": { @@ -3053,7 +3053,7 @@ "description": "", "source": { "file": "src/STDCOMPRESS.m", - "line": 121 + "line": 128 } }, "zstdDecompress": { @@ -3099,7 +3099,7 @@ "description": "", "source": { "file": "src/STDCOMPRESS.m", - "line": 140 + "line": 147 } }, "available": { @@ -3126,7 +3126,7 @@ "description": "Probes by attempting an empty round-trip on each backend.\nNever raises — clears $ECODE on the way out.", "source": { "file": "src/STDCOMPRESS.m", - "line": 155 + "line": 162 } } }, @@ -3138,7 +3138,7 @@ }, "STDCRYPTO": { "synopsis": "m-stdlib — Cryptographic digests via $&stdcrypto → libcrypto.", - "description": "doc: @tier optional\nm-lint: disable-file=M-MOD-024\nm-lint: disable-file=M-MOD-036\nm-lint: disable-file=M-MOD-020\nM-MOD-024 false positives: rc is initialised by every entry to\ndispatch3 / dispatch4 before any read, but the analyser cannot\ntrack flow through the $ETRAP indirection used to recover from\nmissing-callout failures.\nM-MOD-036 (XECUTE injection) is intentional here: the XECUTE\nwrapper is the only way to embed $&stdcrypto.() without\nthe tree-sitter-m grammar tripping on the package-prefixed\nexternal-call syntax (open work in tree-sitter-m). The\nXECUTEd command string is built only from a literal template\nand a `sym` argument that the M-side public surface controls\n— no user data ever flows into the XECUTE source. Same\npattern as STDXFRM's @expr indirection.\nM-MOD-020 (by-ref formal not written) false positives: dispatch3\n/ dispatch4 write to `out` by reference, but the writes happen\nthrough the XECUTE'd command string, which the by-ref analyser\ncan't introspect.\n\nPublic extrinsics:\n $$sha256^STDCRYPTO(data) — 64-char lowercase hex\n $$sha384^STDCRYPTO(data) — 96-char lowercase hex\n $$sha512^STDCRYPTO(data) — 128-char lowercase hex\n $$sha256Bytes^STDCRYPTO(data) — 32 raw bytes\n $$sha384Bytes^STDCRYPTO(data) — 48 raw bytes\n $$sha512Bytes^STDCRYPTO(data) — 64 raw bytes\n $$hmacSha256^STDCRYPTO(key,msg) — 64-char lowercase hex\n $$hmacSha384^STDCRYPTO(key,msg) — 96-char lowercase hex\n $$hmacSha512^STDCRYPTO(key,msg) — 128-char lowercase hex\n $$hmacSha256Bytes^STDCRYPTO(key,msg) — 32 raw bytes\n $$hmacSha384Bytes^STDCRYPTO(key,msg) — 48 raw bytes\n $$hmacSha512Bytes^STDCRYPTO(key,msg) — 64 raw bytes\n $$available^STDCRYPTO() — 1 iff stdcrypto callout\n is loaded\n\nBackend: $&stdcrypto. → libcrypto (OpenSSL EVP_Digest + HMAC).\nThe C source is at src/callouts/std_crypto.c; the YDB call-out\ndescriptor is at tools/std_crypto.xc; the build harness is\ntools/build-callouts.sh.\n\nYottaDB ABI note — argc-prefixed C signatures: YDB's\n$&pkg.fn(args) external-call ABI prepends an `int argc` to\nevery C entry point. The .xc descriptor still describes the\nuser-visible signature (sha256(I:,O:) etc.), but the actual\nC function is `int crypto_sha256(int argc, ydb_string_t* in,\nydb_string_t* out)`. A wrong argc returns -5. The legacy\n$ZF + ydb_ci form was abandoned because YDB r2.02's parser\nrejects the `.var` byref-output syntax for $ZF.\n\nDeployment runbook (full detail in docs/modules/stdcrypto.md):\n 1. tools/build-callouts.sh ; so//std_crypto.so\n 2. export STDLIB_LIB= ; resolved by the .xc\n 3. export ydb_xc_stdcrypto=/tools/std_crypto.xc\n 4. ensure libcrypto.so.3 (or .so.1.1) is on the loader path\n\nImplementation note — XECUTE wrapper:\nM-side calls go through dispatch3 / dispatch4, which build the\n\"set rc=$&stdcrypto.(...)\" command as a STRING and XECUTE\nit. This serves two purposes:\n (a) sidesteps the tree-sitter-m grammar gap for the\n `$&pkg.fn` external-call syntax (literal strings are\n not introspected by the parser);\n (b) sidesteps a pre-existing m fmt longest-prefix bug\n where bare $ZF was rewritten to $zfind / $ZFIND.\nThe XECUTE template is closed over a `sym` argument that the\npublic extrinsics control directly — no caller-supplied data\never appears in the command source.\n\nAll error paths set $ECODE rather than raising directly so callers\ncan wrap with a single $ETRAP — matches STDCSPRNG / STDCSV style.\n\nOut of scope at v1 (queued under T-N follow-ups):\n - AES-128/256-GCM encrypt/decrypt\n - Ed25519 / Ed448 sign/verify\n - X25519 key agreement\n - Streaming digest API (init/update/final tied to a handle)\n - SHA-1, MD5 (deprecated; ship only if a real consumer asks)\n - SHA-3 / SHAKE", + "description": "doc: @tier optional\nm-lint: disable-file=M-MOD-024\nm-lint: disable-file=M-MOD-036\nm-lint: disable-file=M-MOD-020\nM-MOD-024 false positives: rc is initialised by every entry to\ndispatch3 / dispatch4 before any read, but the analyser cannot\ntrack flow through the $ETRAP indirection used to recover from\nmissing-callout failures.\nM-MOD-036 (XECUTE injection) is intentional here: the XECUTE\nwrapper is the only way to embed $&stdcrypto.() without\nthe tree-sitter-m grammar tripping on the package-prefixed\nexternal-call syntax (open work in tree-sitter-m). The\nXECUTEd command string is built only from a literal template\nand a `sym` argument that the M-side public surface controls\n— no user data ever flows into the XECUTE source. Same\npattern as STDXFRM's @expr indirection.\nM-MOD-020 (by-ref formal not written) false positives: dispatch3\n/ dispatch4 write to `out` by reference, but the writes happen\nthrough the XECUTE'd command string, which the by-ref analyser\ncan't introspect.\n\nPublic extrinsics:\n $$sha256^STDCRYPTO(data) — 64-char lowercase hex\n $$sha384^STDCRYPTO(data) — 96-char lowercase hex\n $$sha512^STDCRYPTO(data) — 128-char lowercase hex\n $$sha256Bytes^STDCRYPTO(data) — 32 raw bytes\n $$sha384Bytes^STDCRYPTO(data) — 48 raw bytes\n $$sha512Bytes^STDCRYPTO(data) — 64 raw bytes\n $$hmacSha256^STDCRYPTO(key,msg) — 64-char lowercase hex\n $$hmacSha384^STDCRYPTO(key,msg) — 96-char lowercase hex\n $$hmacSha512^STDCRYPTO(key,msg) — 128-char lowercase hex\n $$hmacSha256Bytes^STDCRYPTO(key,msg) — 32 raw bytes\n $$hmacSha384Bytes^STDCRYPTO(key,msg) — 48 raw bytes\n $$hmacSha512Bytes^STDCRYPTO(key,msg) — 64 raw bytes\n $$available^STDCRYPTO() — 1 iff stdcrypto callout\n is loaded\n\nBackend (engine-branched in dispatch3 / dispatch4 on $zversion[\"IRIS\"):\n YottaDB: $&stdcrypto. → libcrypto (OpenSSL EVP_Digest + HMAC).\n C source src/callouts/std_crypto.c; descriptor\n tools/std_crypto.xc; built by tools/build-callouts.sh.\n IRIS: $SYSTEM.Encryption.SHAHash / .HMACSHA (built-in classes;\n no callout, no .so). Same raw-byte digest output, so the\n public hex/Bytes API and the *TST.m vectors are identical\n on both engines.\n\nYottaDB ABI note — argc-prefixed C signatures: YDB's\n$&pkg.fn(args) external-call ABI prepends an `int argc` to\nevery C entry point. The .xc descriptor still describes the\nuser-visible signature (sha256(I:,O:) etc.), but the actual\nC function is `int crypto_sha256(int argc, ydb_string_t* in,\nydb_string_t* out)`. A wrong argc returns -5. The legacy\n$ZF + ydb_ci form was abandoned because YDB r2.02's parser\nrejects the `.var` byref-output syntax for $ZF.\n\nDeployment runbook (full detail in docs/modules/stdcrypto.md):\n 1. tools/build-callouts.sh ; so//std_crypto.so\n 2. export STDLIB_LIB= ; resolved by the .xc\n 3. export ydb_xc_stdcrypto=/tools/std_crypto.xc\n 4. ensure libcrypto.so.3 (or .so.1.1) is on the loader path\n\nImplementation note — XECUTE wrapper:\nM-side calls go through dispatch3 / dispatch4, which build the\n\"set rc=$&stdcrypto.(...)\" command as a STRING and XECUTE\nit. This serves two purposes:\n (a) sidesteps the tree-sitter-m grammar gap for the\n `$&pkg.fn` external-call syntax (literal strings are\n not introspected by the parser);\n (b) sidesteps a pre-existing m fmt longest-prefix bug\n where bare $ZF was rewritten to $zfind / $ZFIND.\nThe XECUTE template is closed over a `sym` argument that the\npublic extrinsics control directly — no caller-supplied data\never appears in the command source.\n\nAll error paths set $ECODE rather than raising directly so callers\ncan wrap with a single $ETRAP — matches STDCSPRNG / STDCSV style.\n\nOut of scope at v1 (queued under T-N follow-ups):\n - AES-128/256-GCM encrypt/decrypt\n - Ed25519 / Ed448 sign/verify\n - X25519 key agreement\n - Streaming digest API (init/update/final tied to a handle)\n - SHA-1, MD5 (deprecated; ship only if a real consumer asks)\n - SHA-3 / SHAKE", "errors": [ "U-STDCRYPTO-CALLOUT-MISSING", "U-STDCRYPTO-DIGEST-FAIL", @@ -3185,7 +3185,7 @@ "description": "", "source": { "file": "src/STDCRYPTO.m", - "line": 87 + "line": 91 } }, "sha384": { @@ -3227,7 +3227,7 @@ "description": "", "source": { "file": "src/STDCRYPTO.m", - "line": 98 + "line": 102 } }, "sha512": { @@ -3269,7 +3269,7 @@ "description": "", "source": { "file": "src/STDCRYPTO.m", - "line": 109 + "line": 113 } }, "sha256Bytes": { @@ -3311,7 +3311,7 @@ "description": "", "source": { "file": "src/STDCRYPTO.m", - "line": 120 + "line": 124 } }, "sha384Bytes": { @@ -3352,7 +3352,7 @@ "description": "", "source": { "file": "src/STDCRYPTO.m", - "line": 134 + "line": 138 } }, "sha512Bytes": { @@ -3393,7 +3393,7 @@ "description": "", "source": { "file": "src/STDCRYPTO.m", - "line": 148 + "line": 152 } }, "hmacSha256": { @@ -3440,7 +3440,7 @@ "description": "", "source": { "file": "src/STDCRYPTO.m", - "line": 164 + "line": 168 } }, "hmacSha384": { @@ -3485,7 +3485,7 @@ "description": "", "source": { "file": "src/STDCRYPTO.m", - "line": 176 + "line": 180 } }, "hmacSha512": { @@ -3530,7 +3530,7 @@ "description": "", "source": { "file": "src/STDCRYPTO.m", - "line": 187 + "line": 191 } }, "hmacSha256Bytes": { @@ -3574,7 +3574,7 @@ "description": "", "source": { "file": "src/STDCRYPTO.m", - "line": 198 + "line": 202 } }, "hmacSha384Bytes": { @@ -3618,7 +3618,7 @@ "description": "", "source": { "file": "src/STDCRYPTO.m", - "line": 212 + "line": 216 } }, "hmacSha512Bytes": { @@ -3662,7 +3662,7 @@ "description": "", "source": { "file": "src/STDCRYPTO.m", - "line": 226 + "line": 230 } }, "available": { @@ -3688,7 +3688,7 @@ "description": "Pre-flight probe — never raises.", "source": { "file": "src/STDCRYPTO.m", - "line": 242 + "line": 246 } } }, @@ -6459,7 +6459,7 @@ "$$request^STDHTTP" ], "deprecated": "", - "description": "Never raises — clears $ECODE on the way out.", + "description": "Never raises — clears $ECODE on the way out. On IRIS the HTTP\nbackend is the built-in %Net.HttpRequest class, always present.", "source": { "file": "src/STDHTTP.m", "line": 313 diff --git a/docs/memory/MEMORY.md b/docs/memory/MEMORY.md index 8c5580c..4e7e75d 100644 --- a/docs/memory/MEMORY.md +++ b/docs/memory/MEMORY.md @@ -2,6 +2,7 @@ One line per memory file. Content lives in the files, not here. +- [iris-native-backends](iris-native-backends.md) — PR #1: the 3 optional modules' IRIS dispatch arm uses the **inlined `$zversion["IRIS"` probe** (not a public engine helper — that part of the PR was dropped as superseded); dual-engine local-test runbook (**YDB needs `--chset m`**, rebuild `/tmp/m` for the flag); `m-test-iris` embedded-Python is non-functional so STDCOMPRESS-IRIS is unverifiable locally; how the stale PR was landed without merge/rebase/force-push (forward-commit-to-master-tree). - [waterline-g1-gate](waterline-g1-gate.md) — the m/v waterline **G1 gate** (`m arch check` in m-cli) — dependency-direction (v→m only); how `layer` is declared (dist/ meta vs root `repo.meta.json` for m-cli's gitignored dist/), check-manifest doesn't schema-validate the meta, and the v-cli registry-regen `go mod tidy` dep. Built s12 (loose end C). - [t0b2-msl-kids-base](t0b2-msl-kids-base.md) — VSL T0b.2 (MSL KIDS-install-as-green): **YDB leg GREEN — 15/15 test-in-place** after the m-ydb gbldir (`e5dcf85`) + v-pkg streamed-install (`aa1991f`) fixes. **IRIS leg (s9):** `raises^STDASSERT` **now ported to IRIS** (try/catch `irisRaises` branch; YDB byte-identical) → STDFMT/STDARGS clean + STDASSERTTST 40/40 both engines + STDUUID P2 gone; remaining IRIS crashes are **non-raises**. **(s10):** file I/O made dual-engine — STDFS portable facade (`$$openRead/Write/Append`+`readLn`) + STDOS.env IRIS arm + 5 consumers migrated; **STDFSTST 50/50 both engines, YDB full 2098/0**. **But the consumer SUITES still don't go green on IRIS** — separate non-file blockers (STDJSON **byte-mode** parser, STDCSV **`@cb@` indirection**, **wide-char** descriptions). file-I/O ≠ green suites; see §s10. Full 15/15 needs byte-mode + callback-idiom + wide-char work (out of file-I/O scope). ≤8-char-name decision keeps STDASSERT/STDSEMVER as a rename follow-up. - [vsl-doc-gaps-v0.2](vsl-doc-gaps-v0.2.md) — how the VistA Standard Library architecture doc's §12 VDL gaps resolved at v0.2; the vdocs `XU:XU:UG` over-collapse defect that blocks gold-promotion of the Kernel feature guides. diff --git a/docs/memory/iris-native-backends.md b/docs/memory/iris-native-backends.md new file mode 100644 index 0000000..83451fa --- /dev/null +++ b/docs/memory/iris-native-backends.md @@ -0,0 +1,55 @@ +--- +name: iris-native-backends +description: PR #1 reconciliation — the 3 optional modules' IRIS dispatch arm uses the inlined `$zversion["IRIS"` probe (not a public engine helper); dual-engine local-test runbook; m-test-iris embedded-Python gap; how a stale PR was landed without merge/rebase/force-push. +metadata: + type: project +--- + +**IRIS-native backends for the 3 optional modules** (STDCRYPTO / STDCOMPRESS / +STDHTTP) landed via **PR #1** ("B2"), reconciled onto the s9–s12 IRIS sweep on +2026-06-14. The stale PR predated that sweep and was CONFLICTING + partially +superseded; reconciliation kept only the novel payload. + +**The engine seam is the inlined `$zversion["IRIS"` probe — the house idiom +for runtime modules.** Each module's dispatch helper gets an +`if $zversion["IRIS" quit $$iris…(…)` arm ahead of the YDB `$&pkg.fn` path +(STDCRYPTO → `$SYSTEM.Encryption.SHAHash/.HMACSHA`; STDHTTP → +`%Net.HttpRequest`; STDCOMPRESS → embedded-Python zlib + ctypes/zstd). The +PR's own `$$engine^STDOS()` helper was **dropped as superseded** — master's +STDOS is already IRIS-ported by inlining the same probe, STDASSERT already has +its `irisRaises` try/catch arm (s9), and `$$engine^STDHARN()` (internal) exists +if a helper is ever wanted. **Future IRIS arms in m-stdlib runtime code: inline +`$zversion["IRIS"`, do not add a cross-module engine helper.** See +[[t0b2-msl-kids-base]] for the s9–s12 idioms ($ZTIMESTAMP clock, xecute-built +dispatch, STDFS facade). + +**Dual-engine local-test runbook** (the canonical invocations — the Makefile +targets omit these flags because they target host-YDB CI / the Python m-cli): +- Build a current `m`: the committed `m-cli/dist/m` can be stale; rebuild with + `cd m-cli && GOPROXY=file://$HOME/go/pkg/mod/cache/download GOSUMDB=off GOFLAGS=-mod=mod go build -o /tmp/m .` + (the `--chset` flag was missing from the committed binary). +- **YDB:** `m test tests/X.m --engine=ydb --docker=m-test-engine --routines src --chset m` + — **`--chset m` is mandatory**: the m-test-engine container defaults to + `ydb_chset=UTF-8`, but the byte/binary modules need byte mode (else + `%YDB-E-BADCHAR` on raw digest/compress bytes). +- **IRIS:** `m test tests/X.m --engine=iris --docker=m-test-iris --routines src --namespace USER` + (byte mode is inherent on IRIS). +- Results: STDCRYPTOTST 23/23, STDHTTPTST 68(YDB)/67(IRIS), STDCRYPTODOCTST 1/1, + STDCOMPRESSTST 59/59 (YDB) all green. + +**Gap — `m-test-iris` (iris-community image) has non-functional embedded +Python**: `%SYS.Python` class exists but `Import("sys").version`→0, and the +STDCOMPRESS IRIS path aborts non-trappably (0/0). So **STDCOMPRESS-IRIS can't +be verified locally**; the PR validated it on `vista-iris` (working embedded +Python). The reconciled code is the PR's vista-iris-validated logic with only +the (proven-correct) seam changed. Discoveries register has the detail. + +**Landing a stale PR under this sandbox** (`git merge`, `git rebase`, +`git clean`, `rm`, and force-push are all denied): don't rebase. Make a +**forward commit on the branch** whose tree equals `master + additive +backends` — `git checkout origin/master -- .` (sync whole tree to master), +re-`git rm` master's deletions, `git checkout HEAD -- ` to +restore the wanted changes, regenerate `dist/`. GitHub's 3-way merge is then +clean (master's changes appear identically on both sides; backends are purely +additive in files master never touched) and the PR diff is minimal. Verify +with `git diff --cached --stat origin/master` before committing. diff --git a/docs/tracking/discoveries.md b/docs/tracking/discoveries.md index de0de2f..0983ed4 100644 --- a/docs/tracking/discoveries.md +++ b/docs/tracking/discoveries.md @@ -1,7 +1,7 @@ --- created: 2026-05-10 -last_modified: 2026-06-07 -revisions: 3 +last_modified: 2026-06-14 +revisions: 4 doc_type: [NOTES] --- @@ -71,6 +71,9 @@ requires "no open P0/P1 entries against those subjects." | 2026-06-13 | P2 | m-stdlib | Consumer SUITES stay IRIS-red for **non-file** reasons (byte-mode · `@cb@` indirection · wide-char) | After the file-I/O facade, the consumer suites still 0/0 on IRIS — **the file I/O was only part of why.** Confirmed by isolated probes: **STDJSON** — `$$parse^STDJSON` (no file) crashes on IRIS = the **byte-mode** assumption (STDJSON/STDB64/STDHEX/STDCSPRNG treat 1 char == 1 byte; IRIS strings are 16-bit Unicode) — documented charset constraint; **STDCSV** — `do @callback@(rownum,.fields)` (indirection-with-args) crashes on IRIS even with ASCII (`parseFile` callback dispatch); **STDCSV/STDSEED/STDLOG/STDXML** descriptions also carry non-ASCII (wide-char output, the m-iris GetOut/session-capture lane). STDCSV's *parser core* (`$$parse` of a string) DOES pass on IRIS (2/2), so it's the callback + wide-char, not the parser. | Out of file-I/O scope — separate follow-ups: byte-mode portability (big; affects the byte-oriented modules), an IRIS-portable callback-dispatch idiom for `parseFile^STDCSV` (replace `@cb@(args)` with an `xecute`d call or a fixed dispatch), and the wide-char capture path. Tracked so the next session knows the file-I/O refactor alone does NOT green these suites. | **superseded 2026-06-13 (s11)** — the "STDJSON byte-mode" half was a MISDIAGNOSIS (see s11 row below); the `@cb@` + wide-char halves stand (wide-char now fixed). | | 2026-06-13 | P1 | m-stdlib | **RE-BASELINE (s11): T0b.2 IRIS leg is 10/15; the gap is 4 code fixes, NONE byte-mode** | Rebuilt `m-iris/dist/m-iris` from `m-iris-driver@49a5b00` (the GetOut wide-char fix; the prior dist binary predated it) and ran `kids-test-in-place.sh iris` on foia. **10/15** (was 6/15). **The GetOut wide-char fix works on remote** — STDURL 150/0, STDREGEX 102/0, STDFMT 62/0 all green now. **Crucially STDB64 (55/0) + STDHEX (49/0) — the byte family — PASS on IRIS**, so there is **NO byte-mode blocker**; the s10 "STDJSON byte-mode" claim was wrong (it never isolated the crash). The 5 reds, each a DISTINCT cause: **(1) STDJSON** crash ` parse+12` for ALL inputs = the **unguarded `zgoto`-`$etrap`** idiom (parse + encode) — IRIS rejects YDB `zgoto LEVEL:label`; STDFS/STDHARN/STDASSERT.raises already guard the same idiom with an `if $zversion["IRIS" … quit` arm, STDJSON is the only base module that didn't. **(2) STDXML** crash ` parseElement+20 myNs("")` = **null/empty-string subscripts** (`myNs("")`/`nsMap("")` for the default namespace) — IRIS rejects null subscripts, YDB allows. **(3) STDCSV** won't COMPILE on IRIS: ` #5475 Expected end of line : '@callback@(curRow,.fields)'` — **IRIS has no ARGUMENT indirection** (`do @cb@(args)`), only name-indirection; whole routine fails → 0/0. **(4) STDDATE** 1 fail "year in plausible range": `now()` → `3567-05-6.157218T…` because it reads `$ZHOROLOG` as YDB's 4-comma `d,s,u,t` but **IRIS `$ZHOROLOG` is single elapsed-seconds**. **(5) STDUUID** 2 fails were COLLATERAL from the crashers in the same sequential process — source AND installed both 131/131 in isolation; no fix. | **4 code fixes (this session, TDD dual-engine, YDB byte-identical):** #1 STDJSON IRIS try/catch arm (parse+encode); #2 STDXML null-subscript fix [USER FORK → **sentinel-key in code**, chosen 2026-06-13] **[LANDED — `$$dfltNsKey()`=single space; STDXMLTST 209/209 both engines]**; #3 STDCSV `xecute`-built callback dispatch **[LANDED — + a latent STDFS readLn `$ECODE`-on-EOF bug; STDCSVTST 59/59 & STDFSTST 50/50 both engines]**; #4 STDDATE `$ZTIMESTAMP` IRIS arm in now() **[LANDED — STDDATETST 66/66 both engines]**. #5 STDUUID none. Then re-run on foia → expect 15/15. **#1 COMPLETE — STDJSONTST 209/209 BOTH engines (YDB + foia remote).** Beyond the etrap port: fixed TWO latent UTF-8 OPERATOR-PRECEDENCE bugs in emitUtf8 + the surrogate combine (M has no precedence — `$char(192+cp\\64)` evaluated as `(192+cp)\\64`; garbage on BOTH engines, latent because old tests used literal-byte passthrough); rewrote the 2 byte tests to `\u`-escapes+`$char`; graceful empty-key reject on IRIS (user decision — null local subscript is unconditional on IRIS incl. foia; documented in-code + stdjson.md + users-guide); fixed irisParse `$ECODE`-on-failure pollution (same class as STDFS readLn EOF); migrated STDJSONTST file tests off raw YDB `open:(newversion)` to the STDFS facade — **but STDJSONTST has 2 further IRIS tail issues exposed once the crash clears: (a) byte-exact UTF-8 tests use `$zchar` (unsupported on IRIS) + literal-multibyte source (byte-mode boundary) → needs the byte-mode decision; (b) `tParseObjectEmptyKeyAllowed` stores `root("")` = null subscript → same sentinel-key class as STDXML.** Lesson: each crashing suite stacks several IRIS issues; 10/15 undercounts per-suite work. | **CLOSED (s11): IRIS leg 17/15→17/17 on foia** (`suites=17 pass=1483 fail=0`). All 4 code fixes + STDUUID `unixMs` `$ztimestamp` IRIS arm (v7 time prefix was loosely-monotonic on IRIS via the YDB `$zhorolog` assumption) landed; STDFS+STDOS added to the base (STDOS ported to IRIS dual-engine first). YDB per-suite green; vehu in-place loop re-run owed. | | 2026-06-07 | P2 | vdocs | `consolidate` over-collapses the Kernel-8.0 per-feature User Guides into one anchor | Surfaced fetching the dedicated KIDS / Device Handler / TaskMan guides to fill the [VistA Standard Library architecture §12](../plans/msl-vsl-architecture.md) doc gaps. All ~41 distinct Kernel-8.0 `krn_8_0_{dg,sm}_*_ug` feature guides (KIDS, Device Handler, TaskMan, Alerts, Common Services, …) are assigned the **same `XU:XU:UG` anchor key** by the catalog/identity logic. `consolidate` treats them as one version group, keeps a single "winner", and demotes the other ~40 to `is_latest=0` — so they are fetched/converted/enriched/normalized and present in `index.db`, but **excluded from the FTS gold-search surface** (`vdocs ask` can't see them) and have no `documents/gold/consolidated/.../body.md` anchor. Same defect blocks `VIAB/via_vip_user_guide` (a separate fetch, also stuck at convert/promote). | m-stdlib side: the architecture doc's findings were read **directly from the normalized silver bodies** (`documents/silver/text/03-normalized/XU//body.md`), so the doc does not depend on the fix. **vdocs side (open, upstream):** the `catalog`/identity stage must derive a per-document anchor key for granular feature guides (e.g. include `doc_subject`/slug, not just `doc_code=UG`), then re-run `consolidate`→`index`→`relate`→`manifest`. Until then these guides are cited by their fetched body paths and marked 🟡 gold-promotion-pending in the architecture doc §12/§13. | open (in vdocs) | +| 2026-06-14 | docs | m-stdlib | PR #1 IRIS backends reconciled — engine seam is the inlined `$zversion["IRIS"` probe, not a public `$$engine^STDOS()` | Stale feature PR #1 ("B2: IRIS-native backends") predated the s9–s12 IRIS sweep. It added its own `$$engine^STDOS()` engine-detect helper + a STDASSERT `irisCapture` try/catch arm. **Both were superseded:** master's STDOS is already IRIS-ported (inlines `$zversion["IRIS"` per function, no public engine helper) and STDASSERT already has the `irisRaises` try/catch arm (s9). Reconciliation **dropped** the PR's STDOS + STDASSERT changes and **rewired** the 3 backends' 6 dispatch call-sites from `$$engine^STDOS()="iris"` → `if $zversion["IRIS"` (master's established runtime idiom, used in STDOS + STDASSERT + STDHARN.engine). The `$ZCHAR`→`$CHAR` change in STDCOMPRESSTST was kept (IRIS has no `$ZCHAR`; `$CHAR`≡`$ZCHAR` for 0..255 under `ydb_chset=M`, matching the rest of the byte suites). | Branch made mergeable without merge/rebase/force-push (all sandbox-denied): a forward commit set the branch tree to `master + additive backends` (revert STDOS/STDASSERT to master, regenerate dist), so GitHub's 3-way merge is clean. PR diff is now just the 3 backend modules + STDCOMPRESSTST + their manifest entries. | resolved (PR #1) | +| 2026-06-14 | P3 | m-stdlib | `m-test-iris` (iris-community image) has **non-functional embedded Python** → STDCOMPRESS-IRIS unverifiable locally | The `intersystemsdc/iris-community:latest` container used for `m test --docker=m-test-iris` ships the `%SYS.Python` class (exists=1) but its embedded-Python runtime is not wired up: `##class(%SYS.Python).Import("sys").version` returns `0`, and calling the STDCOMPRESS IRIS path (`irisInit`→`b.exec`→helper) aborts the suite non-trappably (0/0). STDCRYPTO-IRIS (23/23) and STDHTTP-IRIS (67/67) verify fine — they use built-in classes (`$SYSTEM.Encryption` / `%Net.HttpRequest`), no Python. STDCOMPRESS-YDB is green (59/59 via libz/libzstd callout). The PR's original STDCOMPRESS-IRIS 59/59 was on `vista-iris` (working embedded Python). | The reconciled STDCOMPRESS IRIS logic is the PR's vista-iris-validated code, unchanged except the proven-correct seam — so the gap is the local engine image, not the code. Verify STDCOMPRESS-IRIS on a `vista-iris`-class instance (or once embedded Python is enabled in m-test-iris). Local-test runbook: `--docker=m-test-engine --routines src --chset m` (YDB byte mode — the container defaults to UTF-8) / `--docker=m-test-iris --routines src --namespace USER` (IRIS). Robustness follow-up (out of reconciliation scope): make the STDCOMPRESS IRIS path fail gracefully (return "FAIL"/0) when embedded Python is absent instead of a non-trappable abort. | open (env / follow-up) | +| 2026-06-14 | P2 | tree-sitter-m | pinned grammar (CI's `m-dev-tools/tree-sitter-m`) can't parse `^||` process-private globals | STDCOMPRESS's IRIS backend memoized its embedded-Python init with a `^||STDCOMPRESS("py")` process-private global. The local Go `m` (newer bundled grammar) parses it fine, but CI's Python m-cli + pinned tree-sitter-m emits **2 ERROR nodes** on the two `^||` references → `m fmt --check` fails (`source did not parse cleanly`), red-gating the YDB job. (master uses no `^||`, so master is green.) | m-stdlib side: **dropped the `^||` memo** — `irisInit^STDCOMPRESS` now re-execs the Python defs per call (idempotent; `ctypes.CDLL` refcounts the same .so, redefining helpers is free). tree-sitter-m side (upstream, open): add `^||` process-private-global syntax to the grammar so the memo idiom is available to M code. | resolved (m-stdlib); open (tree-sitter-m lane) | ## Cross-references diff --git a/docs/tracking/module-tracker.md b/docs/tracking/module-tracker.md index 97b0ffa..669c665 100644 --- a/docs/tracking/module-tracker.md +++ b/docs/tracking/module-tracker.md @@ -114,16 +114,25 @@ current state. | [x] | P4 | L25 | 27 | [`STDXML`](../modules/stdxml.md) | `v0.4.0` | 14d | none (completed) | none | XML 1.0 + Namespaces 1.0 + XPath 1.0 + DTD envelope | n/a | | [x] | P4 | L26 | 28 | [`STDMATH`](../modules/stdmath.md) | `v0.4.0` | 1d | none (completed) | none | Numeric helpers — clamp / min / max / sum / count / mean | n/a | | [x] | P4 | L27 | 29 | [`STDXFRM`](../modules/stdxfrm.md) | `v0.4.0` | 1d | none (completed) | none | Higher-order array transforms — map / filter / reduce | n/a | -| [x] | P3 | H1 | 30 | [`STDCRYPTO`](../modules/stdcrypto.md) | `v0.4.0` | 2d | none (completed) | `$&stdcrypto.fn → libcrypto`; A6 | SHA-256/384/512 + HMAC-SHA-256/384/512 | 🟡 C12 | -| [x] | P3 | H2 | 31 | [`STDCOMPRESS`](../modules/stdcompress.md) | `v0.4.0` | 6d | none (completed) | `$&stdcompress.fn → libz + libzstd`; A6 | gzip / gunzip / deflate / inflate / zstdCompress / zstdDecompress | 🟡 C13 | -| [x] | P3 | H3 | 32 | [`STDHTTP`](../modules/stdhttp.md) | `v0.4.0` | 4d | none (options) | STDURL; `$&stdhttp.fn → libcurl`; A6 | HTTP/1.1 client + pure-M wire-format helpers | 🟡 C14 | +| [x] | P3 | H1 | 30 | [`STDCRYPTO`](../modules/stdcrypto.md) | `v0.4.0` | 2d | none (completed) | `$&stdcrypto.fn → libcrypto`; A6 | SHA-256/384/512 + HMAC-SHA-256/384/512 (+ IRIS-native arm: `$SYSTEM.Encryption`) | 🟡 C12 | +| [x] | P3 | H2 | 31 | [`STDCOMPRESS`](../modules/stdcompress.md) | `v0.4.0` | 6d | none (completed) | `$&stdcompress.fn → libz + libzstd`; A6 | gzip / gunzip / deflate / inflate / zstdCompress / zstdDecompress (+ IRIS-native arm: embedded Python zlib+ctypes/zstd) | 🟡 C13 | +| [x] | P3 | H3 | 32 | [`STDHTTP`](../modules/stdhttp.md) | `v0.4.0` | 4d | none (options) | STDURL; `$&stdhttp.fn → libcurl`; A6 | HTTP/1.1 client + pure-M wire-format helpers (+ IRIS-native arm: `%Net.HttpRequest`) | 🟡 C14 | | [ ] | — | T1 | 33 | [`STDHARN`](../modules/stdharn.md) | — | 3d | P2 `^%MONLBL` coverage (STDCOV) · P4 STDWATCH hooks | STDASSERT (no-halt orchestration mode) | Resident pure-M test/coverage harness — frames `^STDASSERT` suites for m-cli 5.1 (server-side delegation) | ✅ `internal/harness` (P0–P1) | **Aggregate.** ~108d shipped across all 32 landed modules (sum of the Effort column above). **Full engine suite green on `main` 2026-05-08: 32 suites, 2483/2483 assertions.** All three Phase 3 modules engine-green: STDCRYPTO H1 (23/23), STDCOMPRESS H2 (59/59), -STDHTTP H3 (68/68). **All numbered tickets T1–T30 closed.** Optional +STDHTTP H3 (68/68). **IRIS-native backends landed (PR #1, 2026-06-14, +reconciled onto the s9–s12 IRIS sweep):** each optional module gained +an `if $zversion["IRIS"` arm in its dispatch helper (STDCRYPTO → +`$SYSTEM.Encryption`, STDHTTP → `%Net.HttpRequest`, STDCOMPRESS → +embedded-Python zlib+zstd) — the engine seam is the inlined `$zversion` +probe (master's idiom), **not** a public `$$engine^STDOS()` helper (that +part of the PR was dropped as superseded). Dual-engine verified locally: +STDCRYPTO 23/23 and STDHTTP 67/67 green on IRIS (m-test-iris) as well as +YDB; STDCOMPRESS-IRIS needs working embedded Python (the iris-community +image lacks it — see discoveries 2026-06-14). **All numbered tickets T1–T30 closed.** Optional add-ons (rows tagged `none (options)`: T15 / T16 / T17 / T18 / T19 / T22 / STDHTTP iter 3) sit behind concrete-consumer drivers and are not gating any release. Per-module deep history (scaffolding, migrations, diff --git a/src/STDCOMPRESS.m b/src/STDCOMPRESS.m index a1ae590..8556923 100644 --- a/src/STDCOMPRESS.m +++ b/src/STDCOMPRESS.m @@ -36,9 +36,16 @@ ; build; declared in tools/std_compress.xc). Streaming for larger ; payloads is queued. ; - ; Backend: $&stdcompress. → libz (gzip / deflate) + libzstd - ; (zstd). Source at src/callouts/stdcompress.c; descriptor at - ; tools/std_compress.xc. + ; Backend (engine-branched in dispatchC / dispatchD on $zversion["IRIS"): + ; YottaDB: $&stdcompress. → libz (gzip / deflate) + libzstd + ; (zstd). Source src/callouts/stdcompress.c; descriptor + ; tools/std_compress.xc. + ; IRIS: embedded Python — zlib (wbits 31 gzip / -15 raw deflate) + ; and libzstd.so.1 via ctypes (no zstd Python module is + ; shipped, but the system .so is). M<->Python binary is + ; bridged latin-1 (codepoint==byte). Same wire formats + ; (RFC 1952 / 1951 / 8478), so the *TST.m vectors hold on + ; both engines. ; ; Deployment runbook (full detail in docs/modules/stdcompress.md): ; 1. tools/build-callouts.sh ; produce so//stdcompress.so @@ -198,7 +205,9 @@ ; doc: @internal ; doc: XECUTE-wraps $&stdcompress.(data,.out,lvl). ; doc: Returns "" on success, "MISSING" if .so unloaded, - ; doc: "FAIL" if libz/libzstd returned non-success. + ; doc: "FAIL" if libz/libzstd returned non-success. On IRIS, branches + ; doc: to the embedded-Python backend (irisC). + if $zversion["IRIS" quit $$irisC($$irisFn(sym),data,.out,lvl) new $etrap,rc,cmd set $etrap="set $ecode="""" set rc=-1 quit ""MISSING""" set rc=0 @@ -211,7 +220,9 @@ ; dispatchD(sym,data,out) ; Decompress dispatch — 2-arg $&. Returns status. ; doc: @internal - ; doc: Same XECUTE-wrap rationale as dispatchC. + ; doc: Same XECUTE-wrap rationale as dispatchC. On IRIS, branches to + ; doc: the embedded-Python backend (irisD). + if $zversion["IRIS" quit $$irisD($$irisFn(sym),data,.out) new $etrap,rc,cmd set $etrap="set $ecode="""" set rc=-1 quit ""MISSING""" set rc=0 @@ -222,3 +233,64 @@ if 'rc quit "FAIL" quit "" ; + ; ---------- IRIS-native backend (embedded Python: zlib + ctypes/zstd) - + ; IRIS has no $&pkg.fn ABI and ships no string-level gzip/zstd class, so + ; the IRIS arm drives embedded Python: zlib (wbits 31 gzip / -15 raw + ; deflate) and libzstd.so.1 via ctypes. M<->Python binary is bridged by + ; latin-1 (codepoint==byte for 0..255). The helpers are defined once per + ; process in __main__; every IRIS call is XECUTE-wrapped so tree-sitter-m + ; never sees ##class / OREF-dot syntax (same rationale as the $& wrap). + ; +irisFn(sym) ; Map a YDB callout symbol to its Python helper name. + ; doc: @internal + quit $select(sym="gzip":"gz",sym="deflate":"df",sym="zstdCompress":"zc",sym="gunzip":"gunz",sym="inflate":"inf",sym="zstdDecompress":"zd",1:"") + ; +irisInit() ; Define the zlib/zstd Python helpers in __main__. + ; doc: @internal + ; doc: Re-run per call (idempotent): redefining the helpers in + ; doc: __main__ is cheap and ctypes.CDLL of an already-loaded .so + ; doc: refcounts the same handle rather than reloading. A per-process + ; doc: memo via a `^||` process-private global was dropped — the + ; doc: pinned tree-sitter-m grammar can't parse `^||` (discoveries + ; doc: 2026-06-14), and the re-exec cost is negligible vs the codec. + new c,b,main + set c="import zlib,ctypes"_$char(10) + set c=c_"def gz(s,l):"_$char(10)_" o=zlib.compressobj(l,zlib.DEFLATED,31);return (o.compress(s.encode('latin-1'))+o.flush()).decode('latin-1')"_$char(10) + set c=c_"def gunz(s):"_$char(10)_" return zlib.decompress(s.encode('latin-1'),31).decode('latin-1')"_$char(10) + set c=c_"def df(s,l):"_$char(10)_" o=zlib.compressobj(l,zlib.DEFLATED,-15);return (o.compress(s.encode('latin-1'))+o.flush()).decode('latin-1')"_$char(10) + set c=c_"def inf(s):"_$char(10)_" return zlib.decompress(s.encode('latin-1'),-15).decode('latin-1')"_$char(10) + set c=c_"_z=ctypes.CDLL('libzstd.so.1')"_$char(10) + set c=c_"_z.ZSTD_compressBound.restype=ctypes.c_size_t"_$char(10) + set c=c_"_z.ZSTD_compress.restype=ctypes.c_size_t"_$char(10) + set c=c_"_z.ZSTD_decompress.restype=ctypes.c_size_t"_$char(10) + set c=c_"_z.ZSTD_isError.restype=ctypes.c_uint"_$char(10) + set c=c_"_z.ZSTD_getFrameContentSize.restype=ctypes.c_ulonglong"_$char(10) + set c=c_"def zc(s,l):"_$char(10)_" src=s.encode('latin-1');cap=_z.ZSTD_compressBound(len(src));d=ctypes.create_string_buffer(cap);n=_z.ZSTD_compress(d,cap,src,len(src),l);return d.raw[:n].decode('latin-1')"_$char(10) + set c=c_"def zd(s):"_$char(10)_" src=s.encode('latin-1')"_$char(10)_" cs=_z.ZSTD_getFrameContentSize(src,len(src))"_$char(10)_" if cs>=2**64-2:"_$char(10)_" raise ValueError('bad zstd frame')"_$char(10)_" d=ctypes.create_string_buffer(int(cs) if cs>0 else 1)"_$char(10)_" n=_z.ZSTD_decompress(d,int(cs),src,len(src))"_$char(10)_" if _z.ZSTD_isError(n):"_$char(10)_" raise ValueError('zstd error')"_$char(10)_" return d.raw[:n].decode('latin-1')"_$char(10) + xecute "set main=##class(%SYS.Python).Import(""__main__"")" + xecute "set b=##class(%SYS.Python).Builtins()" + xecute "do b.exec(c,main.""__dict__"")" + quit + ; +irisC(fn,data,out,lvl) ; IRIS compress via the Python helper fn(data,lvl). + ; doc: @internal + ; doc: "" on success, "FAIL" if Python raised. The call is wrapped in an + ; doc: ObjectScript try/catch, NOT an M $ETRAP: a Python does not + ; doc: unwind cleanly through $ETRAP (it hangs), but try/catch catches it + ; doc: and the init+compress run as one guarded block. + new ok + if fn="" quit "FAIL" + set ok=0 + xecute "try { do irisInit^STDCOMPRESS() set out=##class(%SYS.Python).Import(""__main__"")."_fn_"(data,lvl) set ok=1 } catch ex { set ok=0 }" + quit $select(ok:"",1:"FAIL") + ; +irisD(fn,data,out) ; IRIS decompress via the Python helper fn(data). + ; doc: @internal + ; doc: "" on success, "FAIL" if Python raised (corrupt input). Same + ; doc: ObjectScript try/catch rationale as irisC. + new ok + if fn="" quit "FAIL" + set ok=0 + xecute "try { do irisInit^STDCOMPRESS() set out=##class(%SYS.Python).Import(""__main__"")."_fn_"(data) set ok=1 } catch ex { set ok=0 }" + quit $select(ok:"",1:"FAIL") + ; diff --git a/src/STDCRYPTO.m b/src/STDCRYPTO.m index 6e25d67..1b46bf5 100644 --- a/src/STDCRYPTO.m +++ b/src/STDCRYPTO.m @@ -36,10 +36,14 @@ ; $$available^STDCRYPTO() — 1 iff stdcrypto callout ; is loaded ; - ; Backend: $&stdcrypto. → libcrypto (OpenSSL EVP_Digest + HMAC). - ; The C source is at src/callouts/std_crypto.c; the YDB call-out - ; descriptor is at tools/std_crypto.xc; the build harness is - ; tools/build-callouts.sh. + ; Backend (engine-branched in dispatch3 / dispatch4 on $zversion["IRIS"): + ; YottaDB: $&stdcrypto. → libcrypto (OpenSSL EVP_Digest + HMAC). + ; C source src/callouts/std_crypto.c; descriptor + ; tools/std_crypto.xc; built by tools/build-callouts.sh. + ; IRIS: $SYSTEM.Encryption.SHAHash / .HMACSHA (built-in classes; + ; no callout, no .so). Same raw-byte digest output, so the + ; public hex/Bytes API and the *TST.m vectors are identical + ; on both engines. ; ; YottaDB ABI note — argc-prefixed C signatures: YDB's ; $&pkg.fn(args) external-call ABI prepends an `int argc` to @@ -267,7 +271,9 @@ dispatch3(sym,inp,out,isDigest) ; Invoke $&stdcrypto.(inp,.out). ; doc: @internal ; doc: Wraps $& in an XECUTE'd command string. Returns 1 on - ; doc: success, 0 on failure with $ECODE set. + ; doc: success, 0 on failure with $ECODE set. On IRIS, branches to + ; doc: irisDigest ($SYSTEM.Encryption.SHAHash) instead of the YDB callout. + if $zversion["IRIS" quit $$irisDigest(sym,inp,.out) new $etrap,rc,cmd set $etrap="set $ecode="""" set rc=-1 quit -1" set rc=0 @@ -281,7 +287,9 @@ ; dispatch4(sym,key,msg,out) ; Invoke $&stdcrypto.(key,msg,.out). ; doc: @internal - ; doc: Same XECUTE-wrap rationale as dispatch3. + ; doc: Same XECUTE-wrap rationale as dispatch3. On IRIS, branches to + ; doc: irisHmac ($SYSTEM.Encryption.HMACSHA) instead of the YDB callout. + if $zversion["IRIS" quit $$irisHmac(sym,key,msg,.out) new $etrap,rc,cmd set $etrap="set $ecode="""" set rc=-1 quit -1" set rc=0 @@ -292,6 +300,40 @@ set $ecode=",U-STDCRYPTO-HMAC-FAIL," quit 0 ; + ; ---------- IRIS-native backend ($SYSTEM.Encryption) ---------- + ; IRIS has no $&pkg.fn callout ABI; it ships SHA / HMAC as built-in + ; ObjectScript class methods. The calls are XECUTE-wrapped for the + ; same reason as the YDB arm — $SYSTEM.. is not M the + ; tree-sitter-m grammar parses, so it must live inside a string. + ; Both produce raw digest bytes, identical to libcrypto's output. + ; +digestBits(sym) ; SHA bit-width implied by sym name (256 / 384 / 512). + ; doc: @internal + quit $select(sym["512":512,sym["384":384,1:256) + ; +irisDigest(sym,inp,out) ; IRIS SHA digest into out via $SYSTEM.Encryption.SHAHash. + ; doc: @internal + ; doc: Returns 1 on success, 0 (with $ECODE) on failure. + new $etrap,cmd,bits + set $etrap="set $ecode="""" quit 0" + set bits=$$digestBits(sym) + set cmd="set out=$system.Encryption.SHAHash("_bits_",inp)" + xecute cmd + if $length($get(out))'=(bits\8) set $ecode=",U-STDCRYPTO-DIGEST-FAIL," quit 0 + quit 1 + ; +irisHmac(sym,key,msg,out) ; IRIS HMAC into out via $SYSTEM.Encryption.HMACSHA. + ; doc: @internal + ; doc: Returns 1 on success, 0 (with $ECODE) on failure. Arg order is + ; doc: HMACSHA(bits,data,key) — verified against RFC 4231 vectors. + new $etrap,cmd,bits + set $etrap="set $ecode="""" quit 0" + set bits=$$digestBits(sym) + set cmd="set out=$system.Encryption.HMACSHA("_bits_",msg,key)" + xecute cmd + if $length($get(out))'=(bits\8) set $ecode=",U-STDCRYPTO-HMAC-FAIL," quit 0 + quit 1 + ; zeros(n) ; n NUL bytes — pre-allocates the O:ydb_string_t* output. ; doc: @internal ; doc: Pre-allocation for YDB callout output buffers. diff --git a/src/STDHTTP.m b/src/STDHTTP.m index f6e2e2e..a42893e 100644 --- a/src/STDHTTP.m +++ b/src/STDHTTP.m @@ -316,7 +316,9 @@ set resp("body")=respBody ; doc: @since v0.4.0 ; doc: @stable stable ; doc: @see $$request^STDHTTP - ; doc: Never raises — clears $ECODE on the way out. + ; doc: Never raises — clears $ECODE on the way out. On IRIS the HTTP + ; doc: backend is the built-in %Net.HttpRequest class, always present. + if $zversion["IRIS" quit 1 new $etrap,rc,cmd if $$env^STDOS("ydb_xc_stdhttp")="" quit 0 set $etrap="set $ecode="""" set rc=0 quit 0" @@ -368,7 +370,9 @@ set resp("body")=respBody dispatchPerform(method,url,headerBlock,body,timeoutMs,follow,verify,statusCode,respHeaders,respBody,errMsg) ; Invoke $&stdhttp.http_perform(...). ; doc: @internal ; doc: XECUTE-wraps the namespaced $&pkg.fn call. Returns the - ; doc: C-side rc on success, -99 if the callout is unavailable. + ; doc: C-side rc on success, -99 if the callout is unavailable. On IRIS + ; doc: it dispatches to irisPerform (%Net.HttpRequest) instead. + if $zversion["IRIS" quit $$irisPerform(method,url,headerBlock,body,timeoutMs,follow,verify,.statusCode,.respHeaders,.respBody,.errMsg) new $etrap,rc,cmd if $$env^STDOS("ydb_xc_stdhttp")="" quit -99 set $etrap="set $ecode="""" set rc=-99 quit -99" @@ -378,3 +382,49 @@ set resp("body")=respBody set $ecode="" quit rc ; + ; ---------- IRIS-native backend (%Net.HttpRequest) ---------- + ; IRIS has no $&pkg.fn / libcurl callout; it ships a built-in HTTP/1.1 + ; client. The OREF / ##class syntax is XECUTE-wrapped (same rationale as + ; the $& wrap) so tree-sitter-m never parses it. The request object REQ + ; and send-status HSC persist across the XECUTE'd statements via the + ; shared symbol table. Output (statusCode/respHeaders/respBody/errMsg) + ; mirrors the libcurl contract so request^STDHTTP parses both engines + ; identically: respHeaders is rebuilt as a "StatusLine CRLF headers + ; CRLFCRLF" block for parseHeaderStream. + ; +irisPerform(method,url,headerBlock,body,timeoutMs,follow,verify,statusCode,respHeaders,respBody,errMsg) ; IRIS HTTP via %Net.HttpRequest. + ; doc: @internal + new parts,host,port,https,target,ok,i,line,nm,vl,nl,hk,REQ,HSC + set statusCode=0,respHeaders="",respBody="",errMsg="" + do parse^STDURL(url,.parts) + set host=$get(parts("host")),port=+$get(parts("port")),target=$$requestTarget(.parts) + set https=$select($get(parts("scheme"))="https":1,1:0) + if 'port set port=$select(https:443,1:80) + ; Create + configure the request object (REQ persists across XECUTEs). + set ok=0 + xecute "try { set REQ=##class(%Net.HttpRequest).%New() set REQ.Server=host,REQ.Port=port,REQ.Https=https,REQ.Timeout=$select(timeoutMs>999:timeoutMs\1000,1:1) set ok=1 } catch e { set errMsg=$zerror set ok=0 }" + if 'ok set:errMsg="" errMsg="STDHTTP-IRIS-INIT-FAIL" quit 7 + ; Request headers — split the CRLF block into "Name: value" lines. + set nl=$char(13,10) + for i=1:1:$length(headerBlock,nl) do + . set line=$piece(headerBlock,nl,i) quit:line="" + . set nm=$piece(line,":",1),vl=$piece(line,":",2,$length(line,":")) + . if $extract(vl)=" " set vl=$extract(vl,2,$length(vl)) + . xecute "do REQ.SetHeader(nm,vl)" + ; Request body. + if $length(body) xecute "do REQ.EntityBody.Write(body)" + ; Send — Send() returns an error %Status (no throw) on DNS/TCP/TLS fail. + set ok=0 + xecute "try { set HSC=REQ.Send(method,target) set ok=##class(%SYSTEM.Status).IsOK(HSC) } catch e { set ok=0,HSC="""",errMsg=$zerror }" + if 'ok do quit 7 + . if $get(HSC)'="" xecute "set errMsg=$piece(##class(%SYSTEM.Status).GetErrorText(HSC),$char(13,10),1)" + . set:$get(errMsg)="" errMsg="STDHTTP-IRIS-SEND-FAIL" + ; Success — status, body, and a reconstructed header block. + xecute "set statusCode=REQ.HttpResponse.StatusCode" + xecute "set respBody=REQ.HttpResponse.Data.Read($select(REQ.HttpResponse.Data.Size>0:REQ.HttpResponse.Data.Size,1:1))" + xecute "set respHeaders=REQ.HttpResponse.StatusLine_nl" + set hk="" + for xecute "set hk=$order(REQ.HttpResponse.Headers(hk))" quit:hk="" xecute "set respHeaders=respHeaders_hk_"": ""_REQ.HttpResponse.GetHeader(hk)_nl" + set respHeaders=respHeaders_nl + quit 0 + ; diff --git a/tests/STDCOMPRESSTST.m b/tests/STDCOMPRESSTST.m index 9847730..5706008 100644 --- a/tests/STDCOMPRESSTST.m +++ b/tests/STDCOMPRESSTST.m @@ -39,10 +39,13 @@ ; ---- helpers ---- ; mkBinary(n) ; Build an n-byte string of bytes 0..255 cycling. - ; doc: $ZCHAR for byte semantics regardless of $ZCHSET. + ; doc: $CHAR is byte-exact under YDB byte mode (ydb_chset=M, the + ; doc: contract for these suites) and on IRIS (codepoint==byte for + ; doc: 0..255), so the suite is engine-portable. $ZCHAR would be + ; doc: YDB-only and breaks IRIS parsing (). new s,i set s="" - for i=0:1:n-1 set s=s_$zchar(i#256) + for i=0:1:n-1 set s=s_$char(i#256) quit s ; mkRepeated(unit,times) ; Build unit repeated `times` times — highly compressible. @@ -63,8 +66,8 @@ new buf,ok set ok=$$gzip^STDCOMPRESS("hello",.buf) do true^STDASSERT(.pass,.fail,ok,"gzip succeeded") - do eq^STDASSERT(.pass,.fail,$zascii($extract(buf,1)),31,"byte 1 = 0x1F") - do eq^STDASSERT(.pass,.fail,$zascii($extract(buf,2)),139,"byte 2 = 0x8B") + do eq^STDASSERT(.pass,.fail,$ascii($extract(buf,1)),31,"byte 1 = 0x1F") + do eq^STDASSERT(.pass,.fail,$ascii($extract(buf,2)),139,"byte 2 = 0x8B") quit ; tGzipRoundTripAscii(pass,fail) ;@TEST "gzip -> gunzip round-trips an ASCII string" @@ -149,7 +152,7 @@ quit ; tInflateRejectsGarbage(pass,fail) ;@TEST "inflate() of garbage bytes raises LIBZ-FAIL" - do raises^STDASSERT(.pass,.fail,"new raw set raw=$$inflate^STDCOMPRESS($zchar(0,0,0,0,0),.raw)","LIBZ-FAIL","garbage raises LIBZ-FAIL") + do raises^STDASSERT(.pass,.fail,"new raw set raw=$$inflate^STDCOMPRESS($char(0,0,0,0,0),.raw)","LIBZ-FAIL","garbage raises LIBZ-FAIL") quit ; ; ---- zstd ---- @@ -158,10 +161,10 @@ new buf,ok set ok=$$zstdCompress^STDCOMPRESS("hello",.buf) do true^STDASSERT(.pass,.fail,ok,"zstd compress succeeded") - do eq^STDASSERT(.pass,.fail,$zascii($extract(buf,1)),40,"byte 1 = 0x28") - do eq^STDASSERT(.pass,.fail,$zascii($extract(buf,2)),181,"byte 2 = 0xB5") - do eq^STDASSERT(.pass,.fail,$zascii($extract(buf,3)),47,"byte 3 = 0x2F") - do eq^STDASSERT(.pass,.fail,$zascii($extract(buf,4)),253,"byte 4 = 0xFD") + do eq^STDASSERT(.pass,.fail,$ascii($extract(buf,1)),40,"byte 1 = 0x28") + do eq^STDASSERT(.pass,.fail,$ascii($extract(buf,2)),181,"byte 2 = 0xB5") + do eq^STDASSERT(.pass,.fail,$ascii($extract(buf,3)),47,"byte 3 = 0x2F") + do eq^STDASSERT(.pass,.fail,$ascii($extract(buf,4)),253,"byte 4 = 0xFD") quit ; tZstdRoundTripAscii(pass,fail) ;@TEST "zstdCompress -> zstdDecompress round-trips ASCII"