nano-step · luohui1 · Jun 13, 2026 · gemini-code-assist · Jun 13, 2026
diff --git a/README.md b/README.md
@@ -242,14 +242,14 @@ EVAL_SKIP_AUTH_CHECK=1 eval-harness run --skill=<your-skill> --dry-run
 # Layer 1 — single check kind in isolation
 bash scripts/eval/lib/score.sh check <one-check.yaml> <workdir> <transcript>
 
-# Full test suite — 20 suites covering every primitive
+# Full test suite — 21 suites covering every primitive
 for t in scripts/eval/tests/*.sh; do bash "$t"; done
 # → all should print PASS
 ```
 
 If you need to know whether a specific factor is being checked, point at the case YAML — `.checks[]` is the complete list of factors that case enforces. There is no hidden scoring.
 
-### Verified test suites (20/20 green on `main`)
+### Verified test suites (21/21 green on `main`)
 
 | Suite | Covers |
 |---|---|
@@ -269,6 +269,7 @@ If you need to know whether a specific factor is being checked, point at the cas
 | `fix_proposal_render.sh`      | `fix_proposal` renders in `diff.md` (closes BLK-5) |
 | `bypass.sh`                   | `EVAL_BYPASS=1` exits 0 + writes bypass event (closes BLK-1) |
 | `shell_safety.sh`             | `score_shell` filter accepts jq/pipes/wc; rejects rm/curl/`$()`/backtick/`>`; honors `unsafe_shell:` opt-in (closes BLK-2) |
+| `shell_no_expectation.sh`     | `score_shell` treats missing `expect_*` fields as harness errors, not ordinary FAILs |
 | `fixture_path_traversal.sh`   | Fixture copy rejects absolute paths + `..` segments (closes BLK-3) |
 | `attribution_portable.sh`     | Attribution works under GNU + BSD grep (closes BLK-4) |
 | `transcript_empty_guard.sh`   | Missing/empty transcript → harness error not vacuous PASS (closes BLK-7) |

diff --git a/scripts/eval/lib/score.sh b/scripts/eval/lib/score.sh
@@ -68,6 +68,21 @@ score_shell() {
   local expect_exact; expect_exact="$(yq -r '.expect_exact // empty' "$check_file")"
   local unsafe_opt_in; unsafe_opt_in="$(yq -r '.unsafe_shell // false' "$check_file" 2>/dev/null || echo false)"
 
+  if ! grep -qE '^[[:space:]]*expect_(regex|min|exact)[[:space:]]*:' "$check_file"; then
-  if ! grep -qE '^[[:space:]]*expect_(regex|min|exact)[[:space:]]*:' "$check_file"; then
+  if [[ -z "$expect_regex" && -z "$expect_min" && -z "$expect_exact" ]]; then
-  if ! grep -qE '^[[:space:]]*expect_(regex|min|exact)[[:space:]]*:' "$check_file"; then
+  if [[ -z "$expect_regex" && -z "$expect_min" && -z "$expect_exact" ]]; then
+    jq -n \
+      --arg cmd "$cmd" \
+      '{
+        kind: "shell",
+        passed: false,
+        failed_check_id: ("shell:" + $cmd),
+        expected: "at least one expect_* field (expect_regex / expect_min / expect_exact)",
+        actual: "none set - check YAML for typo like expected_*",
+        diff_hint: "this check is misconfigured; treating as harness error",
+        error: true
+      }'
+    return 0
+  fi
+
   if [[ "$unsafe_opt_in" != "true" && "${EVAL_ALLOW_UNSAFE_SHELL:-0}" != "1" ]] && score_shell_is_unsafe "$cmd"; then
     jq -n \
       --arg cmd "$cmd" \

diff --git a/scripts/eval/tests/shell_no_expectation.sh b/scripts/eval/tests/shell_no_expectation.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+source "$SCRIPT_DIR/../lib/yq-shim.sh"
+source "$SCRIPT_DIR/../lib/llm_judge.sh"
+source "$SCRIPT_DIR/../lib/autofix.sh"
+source "$SCRIPT_DIR/../lib/score.sh"
+
+WORK="$(mktemp -d -t eval-harness-shell-no-expect.XXXXXX)"
+trap 'rm -rf "$WORK"' EXIT
+
+cat > "$WORK/no-expectation.yaml" <<YAML
+kind: shell
+cmd: "printf ok"
+YAML
+
+out="$(score_shell "$WORK/no-expectation.yaml" "$WORK")"
+passed="$(echo "$out" | jq -r '.passed')"
+err="$(echo "$out" | jq -r '.error // false')"
+expected="$(echo "$out" | jq -r '.expected')"
+actual="$(echo "$out" | jq -r '.actual')"
+hint="$(echo "$out" | jq -r '.diff_hint')"
+
+[[ "$passed" == "false" ]] || { echo "FAIL: missing expectations should not pass" >&2; echo "$out" >&2; exit 1; }
+[[ "$err" == "true" ]] || { echo "FAIL: missing expectations should set error=true, got $err" >&2; echo "$out" >&2; exit 1; }
+[[ "$expected" == *"expect_regex"* ]] || { echo "FAIL: expected should mention expect_* fields, got: $expected" >&2; exit 1; }
+[[ "$actual" == *"none set"* ]] || { echo "FAIL: actual should mention no expectations, got: $actual" >&2; exit 1; }
+[[ "$hint" == *"misconfigured"* ]] || { echo "FAIL: hint should identify a misconfigured check, got: $hint" >&2; exit 1; }
+
+echo "PASS: shell checks with no expect_* fields surface as harness errors"
+exit 0