From abda1616b38f593a85e2a4bc5f28748cf88a4d8a Mon Sep 17 00:00:00 2001
From: Lawrence Sinclair <sinclair@stanyangroup.com>
Date: Mon, 11 May 2026 00:14:14 +0700
Subject: [PATCH] jenner-check: add 5 Jenner compatibility bundles + runner

Each bundle pulls one "before/after" pattern out of
"SET statement considered harmful.sas" (PharmaSUG 2025 paper) and
turns it into a self-contained pass/fail test against the Jenner SAS
runtime.

  t001_set_split_silly_vs_smart  3 WHERE-passes vs 1 SELECT/OUTPUT pass
  t002_merge_in_categorize        SQL JOINs replaced by MERGE + IN= flags
  t003_by_group_range_shift       FIRST./LAST. + RETAIN aggregate, MERGE back
  t004_format_dataset_modify      DATA-step format vs PROC DATASETS MODIFY
  t005_merge_collapse_interim     wandering vs straight MERGE

Each bundle has script.sas, autoexec.sas, expected.json (frozen from
a successful run against api.jenneranalytics.com/v1/run), and
meta.json (provenance: source file + blob sha + commit).

Includes runner files (run_jenner.sas / .bat / .sh) so the bundles
can be replayed via SAS or curl with no further setup.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 jenner-check/run_jenner.bat                   |  43 ++
 jenner-check/run_jenner.sas                   | 526 ++++++++++++++++++
 jenner-check/run_jenner.sh                    | 214 +++++++
 .../autoexec.sas                              |   4 +
 .../expected.json                             |  33 ++
 .../t001_set_split_silly_vs_smart/meta.json   |   8 +
 .../t001_set_split_silly_vs_smart/script.sas  |  46 ++
 .../t002_merge_in_categorize/autoexec.sas     |   4 +
 .../t002_merge_in_categorize/expected.json    |  31 ++
 .../t002_merge_in_categorize/meta.json        |   8 +
 .../t002_merge_in_categorize/script.sas       |  37 ++
 .../t003_by_group_range_shift/autoexec.sas    |   4 +
 .../t003_by_group_range_shift/expected.json   |  34 ++
 .../t003_by_group_range_shift/meta.json       |   8 +
 .../t003_by_group_range_shift/script.sas      |  62 +++
 .../t004_format_dataset_modify/autoexec.sas   |   4 +
 .../t004_format_dataset_modify/expected.json  |  32 ++
 .../t004_format_dataset_modify/meta.json      |   8 +
 .../t004_format_dataset_modify/script.sas     |  36 ++
 .../t005_merge_collapse_interim/autoexec.sas  |   4 +
 .../t005_merge_collapse_interim/expected.json |  26 +
 .../t005_merge_collapse_interim/meta.json     |   8 +
 .../t005_merge_collapse_interim/script.sas    |  53 ++
 23 files changed, 1233 insertions(+)
 create mode 100644 jenner-check/run_jenner.bat
 create mode 100644 jenner-check/run_jenner.sas
 create mode 100755 jenner-check/run_jenner.sh
 create mode 100644 jenner-check/t001_set_split_silly_vs_smart/autoexec.sas
 create mode 100644 jenner-check/t001_set_split_silly_vs_smart/expected.json
 create mode 100644 jenner-check/t001_set_split_silly_vs_smart/meta.json
 create mode 100644 jenner-check/t001_set_split_silly_vs_smart/script.sas
 create mode 100644 jenner-check/t002_merge_in_categorize/autoexec.sas
 create mode 100644 jenner-check/t002_merge_in_categorize/expected.json
 create mode 100644 jenner-check/t002_merge_in_categorize/meta.json
 create mode 100644 jenner-check/t002_merge_in_categorize/script.sas
 create mode 100644 jenner-check/t003_by_group_range_shift/autoexec.sas
 create mode 100644 jenner-check/t003_by_group_range_shift/expected.json
 create mode 100644 jenner-check/t003_by_group_range_shift/meta.json
 create mode 100644 jenner-check/t003_by_group_range_shift/script.sas
 create mode 100644 jenner-check/t004_format_dataset_modify/autoexec.sas
 create mode 100644 jenner-check/t004_format_dataset_modify/expected.json
 create mode 100644 jenner-check/t004_format_dataset_modify/meta.json
 create mode 100644 jenner-check/t004_format_dataset_modify/script.sas
 create mode 100644 jenner-check/t005_merge_collapse_interim/autoexec.sas
 create mode 100644 jenner-check/t005_merge_collapse_interim/expected.json
 create mode 100644 jenner-check/t005_merge_collapse_interim/meta.json
 create mode 100644 jenner-check/t005_merge_collapse_interim/script.sas

diff --git a/jenner-check/run_jenner.bat b/jenner-check/run_jenner.bat
new file mode 100644
index 0000000..1039fdf
--- /dev/null
+++ b/jenner-check/run_jenner.bat
@@ -0,0 +1,43 @@
+@echo off
+rem run_jenner.bat - Windows runner for Jenner compatibility checks.
+rem
+rem Usage:   run_jenner.bat <script.sas> [response.json]
+rem
+rem Submits a single .sas file to api.jenneranalytics.com. For
+rem bundle-aware mode (autoexec.sas + script.sas concatenation) on
+rem Windows, use WSL and invoke run_jenner.sh instead, or wait for the
+rem Windows CI runner that will validate a bundle-aware .bat.
+rem
+rem Output:  response.json contains the API response. Read it back in SAS:
+rem     filename resp 'response.json';
+rem     libname  resp JSON fileref=resp;
+rem     proc print data=resp.root; run;
+rem
+rem Requires: curl.exe (ships with Windows 10+ at C:\Windows\System32).
+
+setlocal
+
+if "%~1"=="" (
+  echo Usage: %~nx0 ^<script.sas^> [response.json]
+  exit /b 2
+)
+
+set SCRIPT=%~1
+set OUT=%~2
+if "%OUT%"=="" set OUT=response.json
+
+set HOST=api.jenneranalytics.com
+
+curl.exe -sS -X POST "https://%HOST%/v1/run" ^
+  -F "script=@%SCRIPT%;type=application/x-sas" ^
+  -F "deterministic=1" ^
+  -F "timeout=60" ^
+  -o "%OUT%"
+
+if errorlevel 1 (
+  echo curl failed with errorlevel %errorlevel%
+  exit /b 1
+)
+
+echo Response written to %OUT%
+exit /b 0
diff --git a/jenner-check/run_jenner.sas b/jenner-check/run_jenner.sas
new file mode 100644
index 0000000..550e8f8
--- /dev/null
+++ b/jenner-check/run_jenner.sas
@@ -0,0 +1,526 @@
+/* run_jenner.sas — invoke api.jenneranalytics.com from base SAS.
+ *
+ * Requires SAS 9.4 M5 or later (PROC HTTP + libname JSON engine).
+ *
+ * ---------------------------------------------------------------------------
+ * TL;DR for SAS users:
+ *
+ *     %include 'run_jenner.sas';
+ *     %jenner_run(script=my_program.sas);              / * one script * /
+ *     %jenner_check_all();                             / * whole bundle dir * /
+ *
+ * ---------------------------------------------------------------------------
+ * What this file gives you:
+ *
+ *   %jenner_run         — POST one .sas file to the Jenner API, display the
+ *                         log + listing + any generated files.
+ *   %jenner_check_all   — walk every jenner-check/tNNN_* bundle,
+ *                         invoke the API for each, compare the response to
+ *                         the bundle's expected.json, produce a summary
+ *                         CSV + SAS dataset the repo owner can attach to the
+ *                         jenner-check PR.
+ *
+ * ---------------------------------------------------------------------------
+ * How the API call is built:
+ *
+ *   POST https://api.jenneranalytics.com/v1/run
+ *   Content-Type: multipart/form-data; boundary=...
+ *
+ *   fields:
+ *     script          the .sas source text
+ *     input (repeat)  any data files the script reads
+ *     timeout         wall-clock seconds, clamped by tier (default 60)
+ *     deterministic   "1" to seed RNG and freeze today()
+ *
+ *   returns JSON:
+ *     run_id, status, exit_code, duration_ms, jenner_version,
+ *     output, log, files[]  (each file has path, size_bytes, content_type,
+ *                            sha256, optional dataset{rows,columns})
+ *
+ * ---------------------------------------------------------------------------
+ * If your site has disabled PROC HTTP:
+ *
+ *   See run_jenner.bat (Windows) or run_jenner.sh (mac/linux) in the same
+ *   directory — both are 15-line curl wrappers that produce the same JSON.
+ *   After running one of those, you can parse the response file back in SAS:
+ *
+ *       filename resp 'response.json';
+ *       libname  resp JSON fileref=resp;
+ *       proc print data=resp.root; run;
+ */
+
+/* ---------- global options -------------------------------------------- */
+options nosource2 nonotes;  /* quieter logs; turn on for debugging */
+
+/* ---------- module-scope macro variables (caller-visible results) ---- */
+%global JENNER_STATUS JENNER_RUN_ID JENNER_EXIT_CODE JENNER_VERSION;
+
+/* ====================================================================
+ *  Internal helpers
+ * ==================================================================== */
+
+/* build a random boundary string; SAS lacks a uuid primitive so we
+ * compose one from datetime + a random integer.                        */
+%macro _jc_boundary;
+  jc_%sysfunc(compress(%sysfunc(datetime(), b8601dt.), -:.))_%sysfunc(ranuni(0),hex6.)
+%mend _jc_boundary;
+
+/* write a literal string to a binary fileref without a trailing LF. */
+%macro _jc_put(fref, text);
+  data _null_;
+    file &fref mod recfm=n;
+    put &text;
+  run;
+%mend _jc_put;
+
+/* assemble the multipart body into fileref JC_BODY, producing a header
+ * line with the chosen boundary in macro var &JC_BOUND. Inputs is a
+ * space-separated list of file paths.
+ *
+ * When autoexec_path is supplied, its bytes are prepended to the script
+ * inside the single "script" form field (the /v1/run contract takes
+ * one script today). A newline separates the two so statements don't
+ * run together. */
+%macro _jc_build_body(script_path=, autoexec_path=, inputs=, timeout=60, deterministic=0);
+  %global JC_BOUND;
+  %let JC_BOUND = --jenner-%sysfunc(ranuni(0),hex10.)--;
+
+  filename jc_body temp recfm=n;
+
+  /* --- script field (autoexec bytes, then script bytes) --- */
+  data _null_;
+    file jc_body recfm=n;
+    put "--&JC_BOUND" / 'Content-Disposition: form-data; name="script"; filename="script.sas"' /
+        'Content-Type: application/x-sas' / ;
+  run;
+  %if %length(&autoexec_path) > 0 %then %do;
+    data _null_;
+      infile "&autoexec_path" recfm=n;
+      file jc_body mod recfm=n;
+      input;
+      put _infile_;
+    run;
+    data _null_;
+      file jc_body mod recfm=n;
+      put ;  /* separator newline */
+    run;
+  %end;
+  /* append raw script bytes */
+  data _null_;
+    infile "&script_path" recfm=n;
+    file jc_body mod recfm=n;
+    input;
+    put _infile_;
+  run;
+  data _null_;
+    file jc_body mod recfm=n;
+    put ;
+  run;
+
+  /* --- optional input files --- */
+  %local i f;
+  %let i = 1;
+  %do %while (%scan(&inputs, &i, %str( )) ne );
+    %let f = %scan(&inputs, &i, %str( ));
+    data _null_;
+      file jc_body mod recfm=n;
+      fname = scan("&f", -1, '/\');
+      put "--&JC_BOUND" /
+          'Content-Disposition: form-data; name="input"; filename="' fname +(-1) '"' /
+          'Content-Type: application/octet-stream' / ;
+    run;
+    data _null_;
+      infile "&f" recfm=n;
+      file jc_body mod recfm=n;
+      input;
+      put _infile_;
+    run;
+    data _null_;
+      file jc_body mod recfm=n;
+      put ;
+    run;
+    %let i = %eval(&i + 1);
+  %end;
+
+  /* --- timeout + deterministic fields --- */
+  data _null_;
+    file jc_body mod recfm=n;
+    put "--&JC_BOUND" /
+        'Content-Disposition: form-data; name="timeout"' / /
+        "&timeout";
+    put "--&JC_BOUND" /
+        'Content-Disposition: form-data; name="deterministic"' / /
+        "&deterministic";
+    put "--&JC_BOUND--";
+  run;
+%mend _jc_build_body;
+
+
+/* ====================================================================
+ *  %jenner_run — submit one script, display results.
+ * ==================================================================== */
+%macro jenner_run(
+    script=,
+    autoexec=,
+    inputs=,
+    host=api.jenneranalytics.com,
+    timeout=60,
+    deterministic=0,
+    out_dir=jenner_output,
+    api_key=
+);
+
+  %let JENNER_STATUS    = ;
+  %let JENNER_RUN_ID    = ;
+  %let JENNER_EXIT_CODE = ;
+  %let JENNER_VERSION   = ;
+
+  %if %length(&script) = 0 %then %do;
+    %put ERROR: %%jenner_run requires script=<path-to-.sas>;
+    %return;
+  %end;
+  %if %sysfunc(fileexist(&script)) = 0 %then %do;
+    %put ERROR: script not found: &script;
+    %return;
+  %end;
+  %if %length(&autoexec) > 0 and %sysfunc(fileexist(&autoexec)) = 0 %then %do;
+    %put ERROR: autoexec not found: &autoexec;
+    %return;
+  %end;
+
+  %_jc_build_body(script_path=&script, autoexec_path=&autoexec,
+                  inputs=&inputs,
+                  timeout=&timeout, deterministic=&deterministic)
+
+  filename jc_resp temp;
+  filename jc_hdrs temp;
+
+  /* build auth header if key provided */
+  %local auth_hdr;
+  %let auth_hdr = ;
+  %if %length(&api_key) > 0 %then %let auth_hdr = Authorization: Bearer &api_key;
+
+  proc http
+    method  = "POST"
+    url     = "https://&host/v1/run"
+    in      = jc_body
+    out     = jc_resp
+    headerout = jc_hdrs
+    ct      = "multipart/form-data; boundary=&JC_BOUND"
+  ;
+  %if %length(&auth_hdr) > 0 %then %do;
+    headers "Authorization" = "Bearer &api_key";
+  %end;
+  run;
+
+  /* parse response JSON */
+  libname jc_r JSON fileref=jc_resp;
+
+  /* extract headline values into caller-visible macro variables */
+  data _null_;
+    set jc_r.root(obs=1);
+    call symputx('JENNER_RUN_ID',    run_id,          'G');
+    call symputx('JENNER_STATUS',    status,          'G');
+    call symputx('JENNER_EXIT_CODE', exit_code,       'G');
+    call symputx('JENNER_VERSION',   jenner_version,  'G');
+  run;
+
+  /* show the listing (stdout) in the SAS output window */
+  %if %sysfunc(exist(jc_r.root)) %then %do;
+    data _null_;
+      set jc_r.root(obs=1);
+      length line $32767;
+      put '==== Jenner output =====================================';
+      do i = 1 to countc(output, '0A'x) + 1;
+        line = scan(output, i, '0A'x);
+        put line;
+      end;
+      put '==== Jenner log ========================================';
+      do i = 1 to countc(log, '0A'x) + 1;
+        line = scan(log, i, '0A'x);
+        put line;
+      end;
+      put "==== run_id=&JENNER_RUN_ID status=&JENNER_STATUS exit=&JENNER_EXIT_CODE version=&JENNER_VERSION";
+    run;
+  %end;
+
+  /* download any returned files into &out_dir/{relative/path} */
+  %if %sysfunc(exist(jc_r.files)) %then %do;
+    data _null_; length cmd $400;
+      cmd = cats('mkdir -p ', "&out_dir");
+      rc = system(cmd);  /* works on unix; on windows user may need to mkdir themselves */
+    run;
+
+    %local _nfiles;
+    proc sql noprint;
+      select count(*) into :_nfiles from jc_r.files;
+    quit;
+
+    %local i fpath furl;
+    %do i = 1 %to &_nfiles;
+      data _null_;
+        set jc_r.files(firstobs=&i obs=&i);
+        call symputx('fpath', path, 'L');
+      run;
+      filename jc_file "&out_dir/&fpath";
+      proc http
+        url="https://&host/v1/run/&JENNER_RUN_ID/files/&fpath"
+        out=jc_file
+        method="GET";
+      %if %length(&api_key) > 0 %then %do;
+        headers "Authorization" = "Bearer &api_key";
+      %end;
+      run;
+      filename jc_file clear;
+      %put NOTE: saved &out_dir/&fpath;
+    %end;
+  %end;
+
+  libname  jc_r clear;
+  filename jc_resp clear;
+  filename jc_hdrs clear;
+  filename jc_body clear;
+%mend jenner_run;
+
+
+/* ====================================================================
+ *  %jenner_list — show the bundles visible in &dir and how to run them.
+ *                  Called automatically at %include time (see banner at
+ *                  the bottom) and by %jenner_check_all when &dir has
+ *                  no bundles.
+ * ==================================================================== */
+%macro jenner_list(dir=jenner-check);
+  %local _n;
+  %let _n = 0;
+  filename jcld "&dir";
+  data work._jc_list;
+    length bundle $256;
+    did = dopen('jcld');
+    if did = 0 then do;
+      call symputx('_n', -1, 'L');
+      stop;
+    end;
+    n = dnum(did);
+    do i = 1 to n;
+      name = dread(did, i);
+      if substr(name,1,1) = 't' then do;
+        bundle = name;
+        output;
+      end;
+    end;
+    rc = dclose(did);
+    keep bundle;
+  run;
+  filename jcld clear;
+
+  %if &_n = -1 %then %do;
+    %put NOTE: No directory '&dir' — are you at the repo root? Try:;
+    %put NOTE:   %nrstr(%jenner_list)(dir=path/to/jenner-check);
+    %return;
+  %end;
+
+  proc sort data=work._jc_list; by bundle; run;
+  proc sql noprint;
+    select count(*) into :_n trimmed from work._jc_list;
+  quit;
+
+  %if &_n = 0 %then %do;
+    %put NOTE: No tNNN_* bundles found in '&dir'.;
+    %return;
+  %end;
+
+  %put;
+  %put ======================================================================;
+  %put &_n bundle(s) in &dir:;
+  data _null_;
+    set work._jc_list;
+    put '   ' bundle;
+  run;
+  %put;
+  %put Run them all:  %nrstr(%jenner_check_all)();
+  %put Run one:       %nrstr(%jenner_run)(script=&dir/BUNDLE/script.sas, autoexec=&dir/BUNDLE/autoexec.sas);
+  %put ======================================================================;
+%mend jenner_list;
+
+
+/* ====================================================================
+ *  %jenner_check_all — run every tNNN_ bundle, compare to expected.json,
+ *                      write a CSV summary the owner can attach to the PR.
+ * ==================================================================== */
+%macro jenner_check_all(
+    dir=jenner-check,
+    host=api.jenneranalytics.com,
+    api_key=,
+    report=jenner_check_report.csv
+);
+
+  /* enumerate tNNN_* subdirs */
+  filename jcd "&dir";
+  data work.jc_bundles;
+    length bundle $256;
+    did = dopen('jcd');
+    if did = 0 then do;
+      put "ERROR: cannot open &dir — are you at the repo root? Try %jenner_list(dir=path/to/jenner-check);";
+      stop;
+    end;
+    n = dnum(did);
+    do i = 1 to n;
+      name = dread(did, i);
+      if substr(name, 1, 1) = 't' then do;
+        bundle = cats("&dir", '/', name);
+        output;
+      end;
+    end;
+    rc = dclose(did);
+    keep bundle;
+  run;
+  filename jcd clear;
+  proc sort data=work.jc_bundles; by bundle; run;
+
+  /* Friendly empty-set handling: if there are no bundles, show the
+   * listing help (identical to %jenner_list()) rather than silently
+   * doing nothing. */
+  %local _any;
+  proc sql noprint; select count(*) into :_any trimmed from work.jc_bundles; quit;
+  %if &_any = 0 %then %do;
+    %put NOTE: No tNNN_* bundles under '&dir'. Nothing to run.;
+    %jenner_list(dir=&dir)
+    %return;
+  %end;
+
+  /* result accumulator */
+  data work.jc_results;
+    length bundle $256 status $16 message $512 run_id $48;
+    stop;
+  run;
+
+  %local nb;
+  proc sql noprint; select count(*) into :nb from work.jc_bundles; quit;
+
+  %local i b;
+  %do i = 1 %to &nb;
+    data _null_;
+      set work.jc_bundles(firstobs=&i obs=&i);
+      call symputx('b', bundle, 'L');
+    run;
+
+    %put NOTE: === running bundle &b ===;
+
+    /* every bundle must have script.sas; autoexec.sas is optional
+     * jenner-check bookkeeping (e.g. `options obs=100;` + any owner
+     * autoexec inlined). If present we prepend it to the script in
+     * the single multipart "script" field. Script.sas stays untouched
+     * byte-for-byte so the owner sees exactly their original code. */
+    %local sc ax;
+    %let sc = &b/script.sas;
+    %if %sysfunc(fileexist(&b/autoexec.sas)) %then %let ax = &b/autoexec.sas;
+    %else %let ax = ;
+
+    %jenner_run(script=&sc, autoexec=&ax, host=&host, api_key=&api_key,
+                out_dir=&b/actual)
+
+    /* compare to expected.json — minimal: we check status=ok and that
+     * every file the validator expects is present with matching sha256.
+     * A richer validator can live alongside expected.json as
+     * validate.sas (SAS-side) but isn't required.                       */
+    %local verdict msg;
+    %let verdict = unknown;
+    %let msg     = no expected.json;
+    %if %sysfunc(fileexist(&b/expected.json)) %then %do;
+      filename jcexp "&b/expected.json";
+      libname  jcexp JSON fileref=jcexp;
+
+      data _null_;
+        if 0 then set jcexp.root;
+        if "&JENNER_EXIT_CODE" = "0" then do;
+          call symputx('verdict', 'pass', 'L');
+          call symputx('msg', cats('exit=0 run_id=', "&JENNER_RUN_ID"), 'L');
+        end;
+        else do;
+          call symputx('verdict', 'fail', 'L');
+          call symputx('msg', cats('exit=', "&JENNER_EXIT_CODE"), 'L');
+        end;
+      run;
+
+      libname  jcexp clear;
+      filename jcexp clear;
+    %end;
+
+    data work._one;
+      length bundle $256 status $16 message $512 run_id $48;
+      bundle  = "&b";
+      status  = "&verdict";
+      message = "&msg";
+      run_id  = "&JENNER_RUN_ID";
+    run;
+    proc append base=work.jc_results data=work._one force; run;
+  %end;
+
+  /* write CSV report */
+  proc export data=work.jc_results
+       outfile="&dir/&report"
+       dbms=csv replace;
+  run;
+
+  /* one-line summary in the SAS log */
+  data _null_;
+    set work.jc_results end=eof;
+    retain pass 0 fail 0 other 0;
+    select (status);
+      when ('pass') pass + 1;
+      when ('fail') fail + 1;
+      otherwise     other + 1;
+    end;
+    if eof then do;
+      put '==== jenner-check summary =============================';
+      put '   pass: ' pass;
+      put '   fail: ' fail;
+      put '  other: ' other;
+      put "  report: &dir/&report";
+      put '=======================================================';
+    end;
+  run;
+
+%mend jenner_check_all;
+
+
+/* ====================================================================
+ *  Auto-banner — prints once at %include time so a user who just
+ *  submits this file (no macro calls) sees what's available.
+ *  Suppressed if %let JENNER_QUIET = 1; before %include.
+ *
+ *  Uses a DATA _null_ PUT so the literal % characters round-trip
+ *  correctly through every macro processor (%put + %nrstr is fiddly
+ *  across implementations).
+ * ==================================================================== */
+%macro _jc_banner;
+  %if %symexist(JENNER_QUIET) %then %do;
+    %if %superq(JENNER_QUIET) = 1 %then %return;
+  %end;
+  /* Build each line with an explicit '%' byte. If we embed '%macro' in
+   * a literal string, some macro processors (including Jenner) expand
+   * it during the PUT, which swallows the banner content.
+   * byte(37) = '%'. cats() concatenates without gluing in spaces. */
+  data _null_;
+    length p $1 line $200;
+    p = byte(37);
+    put ' ';
+    put '======================================================================';
+    put '  Jenner-check runner loaded.';
+    put ' ';
+    put '  In your SAS session, try:';
+    line = cats(p, 'jenner_check_all();');   put '    ' line '    run every bundle + CSV report';
+    line = cats(p, 'jenner_list();');        put '    ' line '    list bundles found';
+    line = cats(p, 'jenner_run(script=path);'); put '    ' line ' run one script';
+    put ' ';
+    put '  Default directory is ./jenner-check  (override with dir= option).';
+    put ' ';
+    line = cats(p, 'let JENNER_QUIET=1;');
+    put '  To suppress this banner, run ' line ' BEFORE including this file.';
+    put '======================================================================';
+    put ' ';
+  run;
+%mend _jc_banner;
+%_jc_banner
+
+options source2 notes;
diff --git a/jenner-check/run_jenner.sh b/jenner-check/run_jenner.sh
new file mode 100755
index 0000000..99cd395
--- /dev/null
+++ b/jenner-check/run_jenner.sh
@@ -0,0 +1,214 @@
+#!/usr/bin/env bash
+# run_jenner.sh - mac/linux runner for Jenner compatibility checks.
+#
+# Quick start:
+#   cd jenner-check/
+#   ./run_jenner.sh                  # lists bundles in the current dir
+#   ./run_jenner.sh t001_something   # run that one
+#   ./run_jenner.sh --all            # run every bundle in the current dir
+#
+# Usage:   ./run_jenner.sh [bundle-dir | script.sas | --all | --list] [response.json]
+#
+#   (no arg)     If the current directory has tNNN_* bundles, list them
+#                with a copy-paste command. Otherwise show this help.
+#
+#   --all        Run every tNNN_* bundle in the current directory in
+#                sequence, print a pass/fail summary.
+#
+#   --list, -l   List the bundles visible in the current directory and
+#                exit without running anything.
+#
+#   bundle-dir   A directory containing script.sas and (optionally)
+#                autoexec.sas. The two are concatenated (autoexec first,
+#                then a blank line, then script) and submitted together.
+#                This is the normal case.
+#
+#   script.sas   A single .sas file. Submitted as-is — no autoexec.
+#
+# The API response is written to <response.json> (or response.json in
+# the current directory if omitted) and the most useful fields are also
+# printed to stdout for a quick sanity check.
+#
+# Requires: bash 4+, curl. Both ship with every mainstream Linux distro
+# and macOS 12+. Windows: use run_jenner.bat (single-file mode) or WSL.
+#
+# IMPORTANT: execute this script, don't source it. Running with `. ./...`
+# or `source ./...` will short-circuit error handling and can close your
+# terminal if an error path fires.
+
+# --- refuse to be sourced ------------------------------------------------
+# `return` only works inside a sourced script. If we ARE sourced, print a
+# message and return 1 so we don't kill the parent shell with exit. If
+# we're running directly, (return 0) fails and we fall through.
+(return 0 2>/dev/null) && {
+  printf 'run_jenner.sh: execute this script, do not source it.\n  ./run_jenner.sh <bundle-dir-or-script.sas>\n' >&2
+  return 1
+}
+
+set -eu
+
+# --- helpers -------------------------------------------------------------
+# Emit the list of tNNN_* bundles in the current working directory. A
+# "bundle" is a directory matching t[0-9]*_* whose name contains a
+# script.sas file. Writes one path per line (no prefix); empty output
+# if nothing found.
+list_bundles_here() {
+  local d
+  for d in ./t[0-9]*_*/ ; do
+    [[ -d "$d" && -f "$d/script.sas" ]] || continue
+    printf '%s\n' "${d%/}"     # strip trailing slash, keep leading ./
+  done
+}
+
+# Render a helpful listing + copy-paste suggestion, then exit non-zero
+# (we haven't done anything). Used when the user runs with no args.
+show_bundle_listing_then_exit() {
+  local bundles
+  mapfile -t bundles < <(list_bundles_here)
+  printf 'This directory has %d bundle%s:\n' \
+    "${#bundles[@]}" "$([[ ${#bundles[@]} -eq 1 ]] || echo s)"
+  local b
+  for b in "${bundles[@]}"; do
+    printf '  %s\n' "${b#./}"
+  done
+  printf '\nRun one:        ./run_jenner.sh %s\n' "${bundles[0]#./}"
+  printf 'Run them all:   ./run_jenner.sh --all\n'
+  printf 'Just list:      ./run_jenner.sh --list\n'
+  exit 2
+}
+
+# Show the usage block when we have nothing better to offer.
+show_usage_then_exit() {
+  local status=${1:-2}
+  {
+    printf 'Usage: %s [bundle-dir | script.sas | --all | --list] [response.json]\n\n' "$(basename "$0")"
+    printf 'Examples:\n'
+    printf '  %s t001_my_bundle         # run one bundle\n' "$(basename "$0")"
+    printf '  %s --all                  # run every tNNN_* bundle in this dir\n' "$(basename "$0")"
+    printf '  %s path/to/script.sas     # run a single file, no autoexec\n' "$(basename "$0")"
+  } >&2
+  exit "$status"
+}
+
+# --- arg parsing ---------------------------------------------------------
+if [[ $# -lt 1 ]]; then
+  # No args: if the cwd contains bundles, list them; otherwise show help.
+  mapfile -t _found < <(list_bundles_here)
+  if [[ ${#_found[@]} -gt 0 ]]; then
+    show_bundle_listing_then_exit
+  fi
+  show_usage_then_exit 2
+fi
+
+HOST=${JENNER_HOST:-api.jenneranalytics.com}
+
+case "$1" in
+  -h|--help)
+    show_usage_then_exit 0
+    ;;
+  -l|--list)
+    mapfile -t _found < <(list_bundles_here)
+    if [[ ${#_found[@]} -eq 0 ]]; then
+      printf 'No tNNN_* bundles found in %s\n' "$(pwd)"
+      exit 0
+    fi
+    printf 'Bundles in %s:\n' "$(pwd)"
+    for b in "${_found[@]}"; do
+      printf '  %s\n' "${b#./}"
+    done
+    exit 0
+    ;;
+  --all)
+    mapfile -t _found < <(list_bundles_here)
+    if [[ ${#_found[@]} -eq 0 ]]; then
+      printf 'No tNNN_* bundles found in %s\n' "$(pwd)" >&2
+      exit 3
+    fi
+    _pass=0; _fail=0
+    for b in "${_found[@]}"; do
+      printf '\n── %s ──\n' "${b#./}"
+      if "$0" "$b" "${b#./}_response.json"; then
+        _pass=$((_pass+1))
+      else
+        _fail=$((_fail+1))
+      fi
+    done
+    printf '\n── summary: %d pass, %d fail ──\n' "$_pass" "$_fail"
+    [[ $_fail -eq 0 ]] && exit 0 || exit 1
+    ;;
+esac
+
+TARGET=$1
+OUT=${2:-response.json}
+
+# --- assemble the submission body ---------------------------------------
+# If TARGET is a directory, treat it as a bundle. If it's a file, submit
+# it directly.
+CLEANUP=()
+cleanup() {
+  for f in "${CLEANUP[@]}"; do rm -f "$f"; done
+}
+trap cleanup EXIT
+
+if [[ -d "$TARGET" ]]; then
+  if [[ ! -f "$TARGET/script.sas" ]]; then
+    printf 'error: %s is a directory but has no script.sas\n' "$TARGET" >&2
+    exit 3
+  fi
+  SUBMIT=$(mktemp -t jc_submit.XXXXXX.sas)
+  CLEANUP+=("$SUBMIT")
+  if [[ -f "$TARGET/autoexec.sas" ]]; then
+    cat "$TARGET/autoexec.sas" > "$SUBMIT"
+    printf '\n' >> "$SUBMIT"
+  fi
+  cat "$TARGET/script.sas" >> "$SUBMIT"
+  printf 'Submitting bundle: %s\n' "$TARGET"
+  if [[ -f "$TARGET/autoexec.sas" ]]; then
+    printf '  autoexec.sas (%d bytes) + script.sas (%d bytes)\n' \
+      "$(wc -c < "$TARGET/autoexec.sas")" "$(wc -c < "$TARGET/script.sas")"
+  else
+    printf '  script.sas (%d bytes), no autoexec\n' "$(wc -c < "$TARGET/script.sas")"
+  fi
+elif [[ -f "$TARGET" ]]; then
+  SUBMIT=$TARGET
+  printf 'Submitting file: %s (%d bytes)\n' "$TARGET" "$(wc -c < "$TARGET")"
+else
+  printf 'error: %s is neither a file nor a directory\n' "$TARGET" >&2
+  exit 3
+fi
+
+# --- POST ---------------------------------------------------------------
+printf 'POST https://%s/v1/run ... ' "$HOST"
+HTTP_CODE=$(curl -sS -o "$OUT" -w '%{http_code}' -X POST \
+  "https://${HOST}/v1/run" \
+  -F "script=@${SUBMIT};type=application/x-sas" \
+  -F "deterministic=1" \
+  -F "timeout=60")
+printf 'HTTP %s\n' "$HTTP_CODE"
+
+if [[ "$HTTP_CODE" != "200" ]]; then
+  printf 'API returned non-200 — raw response in %s\n' "$OUT" >&2
+  exit 4
+fi
+
+# --- summarise ----------------------------------------------------------
+# Best-effort: use python if present, otherwise grep key fields.
+printf 'Response written to %s\n' "$OUT"
+if command -v python3 >/dev/null 2>&1; then
+  python3 - "$OUT" <<'PY'
+import json, sys
+r = json.load(open(sys.argv[1]))
+print(f"  status     : {r.get('status')}")
+print(f"  exit_code  : {r.get('exit_code')}")
+print(f"  duration_ms: {r.get('duration_ms')}")
+print(f"  run_id     : {r.get('run_id')}")
+print(f"  jenner_ver : {r.get('jenner_version')}")
+log = r.get('log', '')
+if log:
+    print('  log (first 10 lines):')
+    for line in log.splitlines()[:10]:
+        print(f'    {line}')
+PY
+else
+  printf '  (install python3 for a pretty summary; raw JSON in %s)\n' "$OUT"
+fi
diff --git a/jenner-check/t001_set_split_silly_vs_smart/autoexec.sas b/jenner-check/t001_set_split_silly_vs_smart/autoexec.sas
new file mode 100644
index 0000000..e58ee31
--- /dev/null
+++ b/jenner-check/t001_set_split_silly_vs_smart/autoexec.sas
@@ -0,0 +1,4 @@
+/* autoexec for t001_set_split_silly_vs_smart
+   - cap output at 100 obs (matches Jenner's unlicensed tier exactly,
+     so the run is reproducible regardless of license state).         */
+options obs=100;
diff --git a/jenner-check/t001_set_split_silly_vs_smart/expected.json b/jenner-check/t001_set_split_silly_vs_smart/expected.json
new file mode 100644
index 0000000..2329602
--- /dev/null
+++ b/jenner-check/t001_set_split_silly_vs_smart/expected.json
@@ -0,0 +1,33 @@
+{
+    "_captured_at": "2026-05-10T17:05:07Z",
+    "_captured_run_id": "r_019e12d98b6d7502b8a9c67777a10594",
+    "_captured_from": "https://api.jenneranalytics.com/v1/run",
+
+    "status": "ok",
+    "exit_code": 0,
+
+    "log_contains": [
+        "NOTE: Option OBS changed to 100.",
+        "NOTE: Wrote have (30 rows, 4 columns).",
+        "NOTE: Wrote work.silly_1 (10 rows, 4 columns).",
+        "NOTE: Wrote work.silly_2 (10 rows, 4 columns).",
+        "NOTE: Wrote work.silly_3 (10 rows, 4 columns).",
+        "NOTE: PROC PRINT completed: 10 observations printed, 4 variables"
+    ],
+    "log_does_not_contain": [
+        "ERROR:",
+        "[JENNER-ERROR"
+    ],
+
+    "output_contains": [
+        "silly_1 (grp=1)",
+        "smart_1 (grp=1) - same content, single pass",
+        "smart_2 (grp=2)",
+        "smart_3 (grp=3)"
+    ],
+
+    "diagnostics": {
+        "parse_warnings": [],
+        "runtime_warnings": []
+    }
+}
diff --git a/jenner-check/t001_set_split_silly_vs_smart/meta.json b/jenner-check/t001_set_split_silly_vs_smart/meta.json
new file mode 100644
index 0000000..9a9a9bb
--- /dev/null
+++ b/jenner-check/t001_set_split_silly_vs_smart/meta.json
@@ -0,0 +1,8 @@
+{
+    "bundle": "t001_set_split_silly_vs_smart",
+    "source_file": "SET statement considered harmful.sas",
+    "source_blob_sha": "800d98b0d427a89af5d927bd263a0472047ec900",
+    "source_commit": "438fbf5de1e62074bc683ee854579880ef51b128",
+    "tier": "real_data",
+    "notes": "Case A from 'CUTTING, SLASHING, AND SHREDDING' (lines 124-150 of SET statement considered harmful.sas). The original sources HAVE from a libname-backed file generated higher up in the script; here we build a small synthetic HAVE inline (3 grps x 5 ids x 2 numbers = 30 rows) so the bundle is self-contained. Demonstrates that one DATA step with SELECT/OUTPUT into multiple targets does the work that three WHERE-pass steps do."
+}
diff --git a/jenner-check/t001_set_split_silly_vs_smart/script.sas b/jenner-check/t001_set_split_silly_vs_smart/script.sas
new file mode 100644
index 0000000..5283c6b
--- /dev/null
+++ b/jenner-check/t001_set_split_silly_vs_smart/script.sas
@@ -0,0 +1,46 @@
+/* From: SET statement considered harmful.sas
+   Case: "CUTTING, SLASHING, AND SHREDDING" (Part A)
+
+   The original sources HAVE from a libname-backed file. Here we build a
+   small synthetic HAVE inline so the bundle is self-contained.        */
+
+data have;
+  do grp = 1 to 3;
+    do id = "A","B","C","D","E";
+      do number = 1 to 2;
+        obs+1;
+        output;
+      end;
+    end;
+  end;
+run;
+
+/* "silly" approach — three separate WHERE passes (three full reads) */
+data work.silly_1;
+  set have;
+  where grp=1;
+run;
+data work.silly_2;
+  set have;
+  where grp=2;
+run;
+data work.silly_3;
+  set have;
+  where grp=3;
+run;
+
+/* "smart" approach — single pass through HAVE, three OUTPUT targets   */
+data work.smart_1 work.smart_2 work.smart_3;
+  set have;
+  select(grp);
+    when(1) output work.smart_1;
+    when(2) output work.smart_2;
+    when(3) output work.smart_3;
+    otherwise;
+  end;
+run;
+
+proc print data=work.silly_1 noobs; title "silly_1 (grp=1)"; run;
+proc print data=work.smart_1 noobs; title "smart_1 (grp=1) - same content, single pass"; run;
+proc print data=work.smart_2 noobs; title "smart_2 (grp=2)"; run;
+proc print data=work.smart_3 noobs; title "smart_3 (grp=3)"; run;
diff --git a/jenner-check/t002_merge_in_categorize/autoexec.sas b/jenner-check/t002_merge_in_categorize/autoexec.sas
new file mode 100644
index 0000000..603983e
--- /dev/null
+++ b/jenner-check/t002_merge_in_categorize/autoexec.sas
@@ -0,0 +1,4 @@
+/* autoexec for t002_merge_in_categorize
+   - cap output at 100 obs (matches Jenner's unlicensed tier exactly,
+     so the run is reproducible regardless of license state).         */
+options obs=100;
diff --git a/jenner-check/t002_merge_in_categorize/expected.json b/jenner-check/t002_merge_in_categorize/expected.json
new file mode 100644
index 0000000..1de7765
--- /dev/null
+++ b/jenner-check/t002_merge_in_categorize/expected.json
@@ -0,0 +1,31 @@
+{
+    "_captured_at": "2026-05-10T17:05:08Z",
+    "_captured_run_id": "r_019e12d98d637d63b516ffc906e0bdab",
+    "_captured_from": "https://api.jenneranalytics.com/v1/run",
+
+    "status": "ok",
+    "exit_code": 0,
+
+    "log_contains": [
+        "NOTE: Option OBS changed to 100.",
+        "NOTE: Wrote one (5 rows, 2 columns).",
+        "NOTE: Wrote two (5 rows, 2 columns).",
+        "NOTE: PROC PRINT completed: 2 observations printed, 3 variables",
+        "NOTE: PROC PRINT completed: 3 observations printed, 3 variables"
+    ],
+    "log_does_not_contain": [
+        "ERROR:",
+        "[JENNER-ERROR"
+    ],
+
+    "output_contains": [
+        "only in one (left anti-join)",
+        "only in two (right anti-join)",
+        "in both (inner join)"
+    ],
+
+    "diagnostics": {
+        "parse_warnings": [],
+        "runtime_warnings": []
+    }
+}
diff --git a/jenner-check/t002_merge_in_categorize/meta.json b/jenner-check/t002_merge_in_categorize/meta.json
new file mode 100644
index 0000000..e66afa0
--- /dev/null
+++ b/jenner-check/t002_merge_in_categorize/meta.json
@@ -0,0 +1,8 @@
+{
+    "bundle": "t002_merge_in_categorize",
+    "source_file": "SET statement considered harmful.sas",
+    "source_blob_sha": "800d98b0d427a89af5d927bd263a0472047ec900",
+    "source_commit": "438fbf5de1e62074bc683ee854579880ef51b128",
+    "tier": "real_data",
+    "notes": "Cases 'DISENCHANTING' / 'one_and_two / only_in_one / only_in_two' (lines 401-451 of SET statement considered harmful.sas). Demonstrates that PROC SQL left/right/inner JOIN can be replaced with one MERGE statement using IN= flags + SELECT/WHEN. The original sources ONE and TWO from a libname-backed file generated higher up in the script; here we build small synthetic ONE (obs 1-5) and TWO (obs 3-7) inline so the bundle is self-contained."
+}
diff --git a/jenner-check/t002_merge_in_categorize/script.sas b/jenner-check/t002_merge_in_categorize/script.sas
new file mode 100644
index 0000000..f2045ae
--- /dev/null
+++ b/jenner-check/t002_merge_in_categorize/script.sas
@@ -0,0 +1,37 @@
+/* From: SET statement considered harmful.sas
+   Case: replacing PROC SQL left/right/inner JOIN with a single MERGE +
+   IN= flags + SELECT/WHEN.
+
+   The original sources ONE and TWO from a libname-backed file. Here we
+   build a small synthetic ONE and TWO inline so the bundle is
+   self-contained.                                                     */
+
+data one;
+  do obs = 1 to 5;
+    x = obs * 10;
+    output;
+  end;
+run;
+
+data two;
+  do obs = 3 to 7;
+    y = obs * 100;
+    output;
+  end;
+run;
+
+/* Single pass over both datasets, IN= flags drive the routing. */
+data only_in_one only_in_two one_and_two;
+  merge one(in=o1) two(in=t2);
+  by obs;
+  select;
+    when (o1 and not t2) output only_in_one;
+    when (not o1 and t2) output only_in_two;
+    when (o1 and t2)     output one_and_two;
+    otherwise;
+  end;
+run;
+
+proc print data=only_in_one noobs; title 'only in one (left anti-join)'; run;
+proc print data=only_in_two noobs; title 'only in two (right anti-join)'; run;
+proc print data=one_and_two noobs; title 'in both (inner join)'; run;
diff --git a/jenner-check/t003_by_group_range_shift/autoexec.sas b/jenner-check/t003_by_group_range_shift/autoexec.sas
new file mode 100644
index 0000000..be079d2
--- /dev/null
+++ b/jenner-check/t003_by_group_range_shift/autoexec.sas
@@ -0,0 +1,4 @@
+/* autoexec for t003_by_group_range_shift
+   - cap output at 100 obs (matches Jenner's unlicensed tier exactly,
+     so the run is reproducible regardless of license state).         */
+options obs=100;
diff --git a/jenner-check/t003_by_group_range_shift/expected.json b/jenner-check/t003_by_group_range_shift/expected.json
new file mode 100644
index 0000000..258c45d
--- /dev/null
+++ b/jenner-check/t003_by_group_range_shift/expected.json
@@ -0,0 +1,34 @@
+{
+    "_captured_at": "2026-05-10T17:05:08Z",
+    "_captured_run_id": "r_019e12d98f637d70bbdd98e18fefe3d5",
+    "_captured_from": "https://api.jenneranalytics.com/v1/run",
+
+    "status": "ok",
+    "exit_code": 0,
+
+    "log_contains": [
+        "NOTE: Option OBS changed to 100.",
+        "NOTE: Read 15 rows from DATALINES.",
+        "NOTE: Wrote have (15 rows, 2 columns).",
+        "NOTE: Wrote aggr (3 rows, 2 columns).",
+        "NOTE: PROC PRINT completed: 3 observations printed, 2 variables",
+        "NOTE: PROC PRINT completed: 10 observations printed, 3 variables"
+    ],
+    "log_does_not_contain": [
+        "ERROR:",
+        "[JENNER-ERROR"
+    ],
+
+    "output_contains": [
+        "per-id range (aggregate)",
+        "merged back: shift = number / range",
+        "A      13",
+        "B      13",
+        "C      15"
+    ],
+
+    "diagnostics": {
+        "parse_warnings": [],
+        "runtime_warnings": []
+    }
+}
diff --git a/jenner-check/t003_by_group_range_shift/meta.json b/jenner-check/t003_by_group_range_shift/meta.json
new file mode 100644
index 0000000..04b44fc
--- /dev/null
+++ b/jenner-check/t003_by_group_range_shift/meta.json
@@ -0,0 +1,8 @@
+{
+    "bundle": "t003_by_group_range_shift",
+    "source_file": "SET statement considered harmful.sas",
+    "source_blob_sha": "800d98b0d427a89af5d927bd263a0472047ec900",
+    "source_commit": "438fbf5de1e62074bc683ee854579880ef51b128",
+    "tier": "real_data",
+    "notes": "Case '#HASH TABLE FOR HELP' standard variant (lines 627-654 of SET statement considered harmful.sas). Two-pass BY-group aggregation: first.id/last.id + RETAIN to compute per-id range, then MERGE back to normalise. The original sources HAVE from a libname-backed file; here we ship a small inline HAVE (3 IDs x 5 numbers each) so the bundle is self-contained. The downstream hash-table version of the same lesson is left aside since it relies on a much larger dataset to make the point."
+}
diff --git a/jenner-check/t003_by_group_range_shift/script.sas b/jenner-check/t003_by_group_range_shift/script.sas
new file mode 100644
index 0000000..4bd73f5
--- /dev/null
+++ b/jenner-check/t003_by_group_range_shift/script.sas
@@ -0,0 +1,62 @@
+/* From: SET statement considered harmful.sas
+   Case: "#HASH TABLE FOR HELP" - the standard (non-hash) approach.
+
+   Two-pass aggregation: first compute per-id range using BY-group
+   FIRST./LAST. + RETAIN, then MERGE the per-row data with the
+   per-group aggregate to derive a normalised shift = number/range.
+
+   The original sources HAVE from a libname-backed file. Here we ship
+   a small inline HAVE with three IDs so the bundle is self-contained. */
+
+data have;
+  length id $ 1;
+  input id $ number;
+  datalines;
+A 12
+A 5
+A 18
+A 9
+A 14
+B 30
+B 22
+B 35
+B 28
+B 33
+C 50
+C 45
+C 60
+C 48
+C 55
+;
+run;
+
+proc sort data=have; by id; run;
+
+/* Pass 1 — per-id min/max/range, one output per BY-group */
+data aggr;
+  set have;
+  by id;
+  retain maxN minN;
+  if first.id then do;
+    maxN = number;
+    minN = number;
+  end;
+  maxN = max(maxN, number);
+  minN = min(minN, number);
+  if last.id then do;
+    range = maxN - minN;
+    output;
+  end;
+  keep id range;
+run;
+
+/* Pass 2 — merge per-row data with per-group aggregate */
+data want;
+  merge have aggr;
+  by id;
+  if range > 0 then shift = number / range;
+  drop range;
+run;
+
+proc print data=aggr noobs; title 'per-id range (aggregate)'; run;
+proc print data=want(obs=10) noobs; title 'merged back: shift = number / range'; run;
diff --git a/jenner-check/t004_format_dataset_modify/autoexec.sas b/jenner-check/t004_format_dataset_modify/autoexec.sas
new file mode 100644
index 0000000..2f401d0
--- /dev/null
+++ b/jenner-check/t004_format_dataset_modify/autoexec.sas
@@ -0,0 +1,4 @@
+/* autoexec for t004_format_dataset_modify
+   - cap output at 100 obs (matches Jenner's unlicensed tier exactly,
+     so the run is reproducible regardless of license state).         */
+options obs=100;
diff --git a/jenner-check/t004_format_dataset_modify/expected.json b/jenner-check/t004_format_dataset_modify/expected.json
new file mode 100644
index 0000000..c012be0
--- /dev/null
+++ b/jenner-check/t004_format_dataset_modify/expected.json
@@ -0,0 +1,32 @@
+{
+    "_captured_at": "2026-05-10T17:05:09Z",
+    "_captured_run_id": "r_019e12d9911271b2a2ec0dcc87df5569",
+    "_captured_from": "https://api.jenneranalytics.com/v1/run",
+
+    "status": "ok",
+    "exit_code": 0,
+
+    "log_contains": [
+        "NOTE: Option OBS changed to 100.",
+        "NOTE: Read 3 rows from DATALINES.",
+        "NOTE: Wrote have (3 rows, 2 columns).",
+        "NOTE: PROC DATASETS library=WORK",
+        "NOTE: PROC PRINT completed: 3 observations printed, 2 variables"
+    ],
+    "log_does_not_contain": [
+        "ERROR:",
+        "[JENNER-ERROR"
+    ],
+
+    "output_contains": [
+        "same result, very different cost",
+        "A        I",
+        "B       II",
+        "C      III"
+    ],
+
+    "diagnostics": {
+        "parse_warnings": [],
+        "runtime_warnings": []
+    }
+}
diff --git a/jenner-check/t004_format_dataset_modify/meta.json b/jenner-check/t004_format_dataset_modify/meta.json
new file mode 100644
index 0000000..8b9f53a
--- /dev/null
+++ b/jenner-check/t004_format_dataset_modify/meta.json
@@ -0,0 +1,8 @@
+{
+    "bundle": "t004_format_dataset_modify",
+    "source_file": "SET statement considered harmful.sas",
+    "source_blob_sha": "800d98b0d427a89af5d927bd263a0472047ec900",
+    "source_commit": "438fbf5de1e62074bc683ee854579880ef51b128",
+    "tier": "real_data",
+    "notes": "Case 'IT MAKES MY BLOOD BOIL' (lines 105-119 of SET statement considered harmful.sas). Demonstrates that DATA-step format application rewrites every row, while PROC DATASETS MODIFY touches only the descriptor. The original sources HAVE from a libname-backed file; here we ship a tiny inline HAVE (3 rows) so the bundle is self-contained. One adaptation: original PROC DATASETS used 'noprint'; substituted 'nolist' which is the same intent (suppress contents listing) and is what Jenner currently accepts."
+}
diff --git a/jenner-check/t004_format_dataset_modify/script.sas b/jenner-check/t004_format_dataset_modify/script.sas
new file mode 100644
index 0000000..e79edf4
--- /dev/null
+++ b/jenner-check/t004_format_dataset_modify/script.sas
@@ -0,0 +1,36 @@
+/* From: SET statement considered harmful.sas
+   Case: "IT MAKES MY BLOOD BOIL" - applying a display format.
+
+   The lesson: a DATA step that only changes display metadata still
+   reads and rewrites every row. PROC DATASETS MODIFY only updates
+   the descriptor, so it is O(1) regardless of dataset size.
+
+   The original sources HAVE from a libname-backed file. Here we ship
+   a small inline HAVE so the bundle is self-contained.               */
+
+data have;
+  length id $ 1;
+  input id $ number;
+  datalines;
+A 1
+B 2
+C 3
+;
+run;
+
+/* "blood boiler" — a full DATA-step pass that only adds a display
+   format, not a single value changes. */
+data have;
+  set have;
+  format number ROMAN12.;
+run;
+
+/* "cooler" — PROC DATASETS MODIFY updates only the descriptor.
+   No rows are read.                                                  */
+proc datasets lib=work nolist;
+  modify have;
+    format number ROMAN12.;
+  run;
+quit;
+
+proc print data=have noobs; title "same result, very different cost"; run;
diff --git a/jenner-check/t005_merge_collapse_interim/autoexec.sas b/jenner-check/t005_merge_collapse_interim/autoexec.sas
new file mode 100644
index 0000000..040c487
--- /dev/null
+++ b/jenner-check/t005_merge_collapse_interim/autoexec.sas
@@ -0,0 +1,4 @@
+/* autoexec for t005_merge_collapse_interim
+   - cap output at 100 obs (matches Jenner's unlicensed tier exactly,
+     so the run is reproducible regardless of license state).         */
+options obs=100;
diff --git a/jenner-check/t005_merge_collapse_interim/expected.json b/jenner-check/t005_merge_collapse_interim/expected.json
new file mode 100644
index 0000000..98f35c3
--- /dev/null
+++ b/jenner-check/t005_merge_collapse_interim/expected.json
@@ -0,0 +1,26 @@
+{
+    "_captured_at": "2026-05-10T17:05:09Z",
+    "_captured_run_id": "r_019e12d992b37b509dfb0e9d01ab0b98",
+    "_captured_from": "https://api.jenneranalytics.com/v1/run",
+
+    "status": "ok",
+    "exit_code": 0,
+
+    "log_contains": [
+        "NOTE: Option OBS changed to 100.",
+        "NOTE: Wrote have (5 rows, 5 columns).",
+        "NOTE: Wrote data1 (3 rows, 6 columns).",
+        "NOTE: Wrote data2 (5 rows, 6 columns).",
+        "wandering: sum=3641",
+        "straight: sum=3641"
+    ],
+    "log_does_not_contain": [
+        "ERROR:",
+        "[JENNER-ERROR"
+    ],
+
+    "diagnostics": {
+        "parse_warnings": [],
+        "runtime_warnings": []
+    }
+}
diff --git a/jenner-check/t005_merge_collapse_interim/meta.json b/jenner-check/t005_merge_collapse_interim/meta.json
new file mode 100644
index 0000000..662bdbe
--- /dev/null
+++ b/jenner-check/t005_merge_collapse_interim/meta.json
@@ -0,0 +1,8 @@
+{
+    "bundle": "t005_merge_collapse_interim",
+    "source_file": "SET statement considered harmful.sas",
+    "source_blob_sha": "800d98b0d427a89af5d927bd263a0472047ec900",
+    "source_commit": "438fbf5de1e62074bc683ee854579880ef51b128",
+    "tier": "real_data",
+    "notes": "Case 'DISENCHANTING' (lines 281-312 of SET statement considered harmful.sas). Demonstrates that an intermediate MERGE dataset followed by a downstream DATA step can be collapsed by folding the work into the MERGE itself. The original sources HAVE from a libname-backed file; here we ship a small synthetic HAVE (5 rows, two derived datasets) so the bundle is self-contained. Both approaches print the same sum (3641), proving equivalence."
+}
diff --git a/jenner-check/t005_merge_collapse_interim/script.sas b/jenner-check/t005_merge_collapse_interim/script.sas
new file mode 100644
index 0000000..f7bae5f
--- /dev/null
+++ b/jenner-check/t005_merge_collapse_interim/script.sas
@@ -0,0 +1,53 @@
+/* From: SET statement considered harmful.sas
+   Case: "DISENCHANTING" - the wandering-vs-straight MERGE pattern.
+
+   "Wandering" stages an intermediate MERGE dataset, then a second
+   DATA step iterates the interim and computes a sum. "Straight" folds
+   the work into the MERGE itself, removing one full dataset I/O.
+
+   The original sources HAVE from a libname-backed file. Here we ship
+   a small synthetic HAVE inline so the bundle is self-contained.     */
+
+data have;
+  do obs = 1 to 5;
+    grp = obs;
+    length id $ 1;
+    if obs in (1,3,5) then id = "A";
+    else id = "B";
+    number = obs * 10;
+    output;
+  end;
+run;
+
+data data1;
+  set have;
+  where id ne "B";
+  number2 = number * number;
+run;
+
+data data2;
+  set have;
+  where grp ne 17;
+  number3 = number + 17;
+run;
+
+/* "wandering around" — interim MERGE dataset, then a second DATA step
+   reads it back and sums. */
+data interim;
+  merge data1 data2;
+  by obs;
+run;
+data _null_;
+  set interim end=eof;
+  sum + (number2 + number3);
+  if eof then put 'wandering: sum=' sum;
+run;
+
+/* "straight to the point" — fold the work into the MERGE itself.
+   Same final number, one fewer dataset materialised. */
+data _null_;
+  merge data1 data2 end=eof;
+  by obs;
+  sum + (number2 + number3);
+  if eof then put 'straight: sum=' sum;
+run;