databricks · sreekanth-db · Jul 3, 2026 · Jul 3, 2026
@@ -113,6 +113,9 @@ jobs:
           COMPARATOR_WAREHOUSE: ${{ vars.COMPARATOR_WAREHOUSE }}
           PRO_WAREHOUSE_ID: ${{ vars.PRO_WAREHOUSE_ID }}
           RUN_NAME: ci-weekly-${{ github.run_id }}
+          # Deep error comparison runs in shadow: every Thrift-vs-SEA error divergence is recorded
+          # in the report/CSV, but the run stays green.
+          ERROR_COMPARISON_MODE: shadow
           # Scoping knobs — workflow_dispatch.inputs populate these when
           # manually triggered. For schedule events, inputs are empty,
           # which the script treats as "run everything".

diff --git a/src/test/java/com/databricks/jdbc/comparator/COMPARATOR_README.md b/src/test/java/com/databricks/jdbc/comparator/COMPARATOR_README.md
@@ -268,13 +268,30 @@ If both are present for a method, **runOnly takes precedence**: argument combina
 | `NEGATIVE_ASYNC` | Databricks async extension — getExecutionResult before/after execute, executeAsync on invalid SQL |
 
 Negative suites compare each endpoint's **error behavior** (exception class, SQLState, vendor code,
-message) via the `ERROR_COMPARISON_MODE` gate (default `shadow`). See
-[`error/`](error/) for the comparison engine.
+message) via the `ERROR_COMPARISON_MODE` gate. See [`error/`](error/) for the comparison engine.
+
+### `ERROR_COMPARISON_MODE`
+
+| Mode | Behavior |
+|---|---|
+| `off` | Legacy: only the exception class is checked (`isAssignableFrom`). |
+| `shadow` | Deep error comparison runs and is recorded as `DIFF` rows, but **never fails** the run. |
+
+The default (both `run-comparator.sh` and the weekly CI) is **shadow**: the comparison runs and
+every Thrift-vs-SEA error divergence is recorded in the report/CSV, but the run stays green. Diffs
+are reviewed from the report; baseline noisy/accepted ones with
+`-DSKIP_DIFF_PATTERNS='<substring>|<substring>'`.
+
+Exception-class comparison is intentionally **strict** (exact match), not the legacy subclass
+tolerance. Message text is compared **raw** (no normalization) — a deliberate choice; message-only
+divergences show up as `DIFF` rows.
+
+> An enforcement mode that *fails* the run on divergences is intentionally not included yet — it
+> can be added later from observed shadow-run data (which divergences are real vs. accepted).
 
 Suites whose cases mutate or destroy connection/session state (`NEGATIVE_CONNECTION_STATE`,
-`NEGATIVE_TRANSACTION`, and — later — connection/cancel/volume cases) open their **own dedicated,
-uncached connections** via `ConnectionFactory.openFresh(side)` and close them in a `finally`, so
-they never poison the shared connections the other suites reuse. Providers request this by
-overriding the `execute(conn1, conn2, ConnectionFactory, testCase, label)` overload of
-`SuiteProvider`.
+`NEGATIVE_TRANSACTION`, `NEGATIVE_CONNECTION`, `NEGATIVE_CANCEL_TIMEOUT`) open their **own dedicated,
+uncached connections** via `ConnectionFactory` and close them in a `finally`, so they never poison
+the shared connections the other suites reuse. Providers request this by overriding the
+`execute(conn1, conn2, ConnectionFactory, testCase, label)` overload of `SuiteProvider`.
 
diff --git a/src/test/java/com/databricks/jdbc/comparator/error/ErrorPolicy.java b/src/test/java/com/databricks/jdbc/comparator/error/ErrorPolicy.java
@@ -3,30 +3,25 @@
 /**
  * Controls whether errors are compared deeply, via the {@code ERROR_COMPARISON_MODE} gate.
  *
- * <p>Modes advance with the rollout: {@code off} (legacy class-only check) → {@code shadow}
- * (compare and report, never fail CI) → {@code authoritative} (un-baselined error diffs fail the
- * build). The default is {@code shadow}: deep comparison runs and records DIFF rows, but a DIFF row
- * does not fail the build (only a re-thrown Throwable does), so the default is CI-safe. Read the
- * active policy via {@link #active()}.
+ * <p>Modes: {@code off} (legacy class-only check) and {@code shadow} (deep comparison — compare and
+ * report). The default is {@code shadow}: deep comparison runs and records DIFF rows in the
+ * report/CSV, but a DIFF never fails the run (only a re-thrown Throwable does). Read the active
+ * policy via {@link #active()}.
  *
- * <p>Message normalization and tolerance/baseline handling are intentionally omitted for now — we
- * compare the raw fields (class, SQLState, vendor code, message) and will add tolerance later, from
- * observed shadow-run data, only if it proves necessary.
+ * <p>Message normalization and tolerance/baseline handling are intentionally omitted — we compare
+ * the raw fields (class, SQLState, vendor code, message). (An enforcement mode that fails the run
+ * on divergences is out of scope here; it can be added later, from observed shadow-run data.)
  */
 public final class ErrorPolicy {
 
   public enum Mode {
     OFF,
-    SHADOW,
-    AUTHORITATIVE
+    SHADOW
   }
 
   private static final String MODE_PROPERTY = "ERROR_COMPARISON_MODE";
 
-  /**
-   * Default when the flag is unset, empty, or unrecognized. Shadow is CI-safe (records, never
-   * fails). Advanced to {@code authoritative} in the final rollout PR.
-   */
+  /** Default when the flag is unset, empty, or unrecognized. Shadow reports but never fails. */
   private static final Mode DEFAULT_MODE = Mode.SHADOW;
 
   private final Mode mode;
@@ -55,8 +50,6 @@ private static Mode parseMode(String raw) {
     switch (raw.trim().toLowerCase()) {
       case "shadow":
         return Mode.SHADOW;
-      case "authoritative":
-        return Mode.AUTHORITATIVE;
       case "off":
         return Mode.OFF;
       default:
@@ -65,7 +58,7 @@ private static Mode parseMode(String raw) {
         System.err.println(
             "[comparator] Unknown ERROR_COMPARISON_MODE '"
                 + raw
-                + "' (expected off|shadow|authoritative); defaulting to "
+                + "' (expected off|shadow); defaulting to "
                 + DEFAULT_MODE.name().toLowerCase()
                 + ".");
         return DEFAULT_MODE;
@@ -76,7 +69,7 @@ public Mode mode() {
     return mode;
   }
 
-  /** True when deep error comparison should run at all (shadow or authoritative). */
+  /** True when deep error comparison should run at all (shadow). */
   public boolean isDeepComparisonEnabled() {
     return mode != Mode.OFF;
   }

diff --git a/src/test/java/com/databricks/jdbc/comparator/run-comparator.sh b/src/test/java/com/databricks/jdbc/comparator/run-comparator.sh
@@ -104,6 +104,12 @@ METADATA_SKIP_SCHEMAS="${METADATA_SKIP_SCHEMAS:-information_schema}"
 METADATA_PARALLEL_THREADS="${METADATA_PARALLEL_THREADS:-50}"
 SKIP_DIFF_PATTERNS="${SKIP_DIFF_PATTERNS:-}"  # geo and variant fixes merged — no filtering needed
 
+# Error comparison mode: off | shadow.
+#   off    — legacy: only the exception class is checked
+#   shadow — deep error comparison (class/SQLState/code/message/one-sided) recorded as DIFF rows in
+#            the report/CSV; never fails the run (DEFAULT)
+ERROR_COMPARISON_MODE="${ERROR_COMPARISON_MODE:-shadow}"
+
 # Workspace setup (empty = skip, "recreate" = drop + create all test data)
 WORKSPACE_SETUP="${WORKSPACE_SETUP:-}"
 
@@ -306,6 +312,9 @@ fi
 if [ -n "${WORKSPACE_SETUP}" ]; then
   MVN_ARGS="${MVN_ARGS} -DWORKSPACE_SETUP=${WORKSPACE_SETUP}"
 fi
+if [ -n "${ERROR_COMPARISON_MODE}" ]; then
+  MVN_ARGS="${MVN_ARGS} -DERROR_COMPARISON_MODE=${ERROR_COMPARISON_MODE}"
+fi
 
 MVN_ARGS="${MVN_ARGS} -DMETADATA_FILTER_CONFIG=${FILTER_FILE}"