From af4f586258c9bab345e5e20e016229db03415aae Mon Sep 17 00:00:00 2001 From: Kevin Elliott Date: Tue, 9 Jun 2026 22:58:52 -0700 Subject: [PATCH 1/2] Corpus parity: formatter-enum expansion, try/catch for binary chains, success-conditional descriptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new TS corpus runner (acars-decoder-typescript, 289 samples) failed 41 samples on first run, splitting into three root-cause classes. This commit fixes all three at the spec/emitter level: Class B — formatter enum too narrow (10 samples, 44-family): The v1 enum forced specs to map month/day/eta/off/on/in/fuel-remaining through generic `timestamp`/`fuel`, flattening distinct legacy items (MSG_MON, MSG_DAY, ON "Landing Time", ' FUEL_REM') into TIMESTAMP/FOB items. The unit tests never caught it; the corpus deep-equal did. - schema: formatter_call.type adds eta/out/off/on/in/day/month/ departure_day/arrival_day/fuel_remaining/remaining_fields - ir.ts: FormatterCall union extended to match - emit-typescript: methodMap additions + `remaining_fields` emission (`if (data.length > N) ResultFormatter.unknownArr(result, data.slice(N))` — the Label_44_Base.addRemainingFields pattern, closing the bulk-port agent's documented trailing-fields caveat) - specs 44/{ON,IN,OFF,ETA,POS}: items now use the precise types; 44/POS drops its fuel item entirely (legacy sets raw.fuel_in_tons with NO formatted item — the field's auto-raw emit covers it) Class A — success-conditional descriptions (30 samples; arinc_702, 16/AUTPOS, H1/Paren): Legacy plugins built on defaultResult() leave description "Unknown" on failed decodes and set the real description only on success (in-body or via Arinc702Helper). The generated wrapper set the spec description unconditionally. Fixed by setting those three specs' descriptions to "Unknown"; the existing hatches already set the real description on their success paths (verified), so successful samples are unaffected. Class C — uncaught throw in binary chains (1 sample, H1/OHMA): Legacy wrapped base64→inflate→decode chains in try/catch and failed gracefully; the generated chain threw. emit-typescript now wraps the decode body in try/catch (→ failUnknown) whenever the parse steps include a binary kind (ascii85/base64/deflate/text_decode/hex_decode). Rust/C are unaffected: their helpers return empty buffers rather than throwing. Runtime (TS): remainingFuel now tolerates undefined/NaN (mirrors the legacy parseFuel isNaN guard and the currentFuel precedent) so when-gated spec fields can call it unconditionally. All 68 specs validate; TS target regenerates clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- codegen/src/emit-typescript.ts | 51 ++++++++++++++++--- codegen/src/ir.ts | 11 ++++ runtimes/typescript/utils/result_formatter.ts | 6 ++- schema/ads-v1.schema.json | 11 ++++ spec/labels/16/AUTPOS.yaml | 4 +- spec/labels/44/ETA.yaml | 11 ++-- spec/labels/44/IN.yaml | 9 ++-- spec/labels/44/OFF.yaml | 11 ++-- spec/labels/44/ON.yaml | 9 ++-- spec/labels/44/POS.yaml | 11 ++-- spec/labels/H1/Paren.yaml | 4 +- spec/wildcards/arinc_702.yaml | 4 +- 12 files changed, 109 insertions(+), 33 deletions(-) diff --git a/codegen/src/emit-typescript.ts b/codegen/src/emit-typescript.ts index 9183a44..2db5a67 100644 --- a/codegen/src/emit-typescript.ts +++ b/codegen/src/emit-typescript.ts @@ -75,29 +75,48 @@ export function emitTypeScript(spec: SpecIR): string { // the hand-written plugins (which only have raw.position). const consumedByFormatter = collectFormatterRefs(spec.formatted); + // Binary decode chains (ascii85 / base64 / deflate / text_decode / + // hex_decode) throw on malformed input. The hand-written plugins wrapped + // those chains in try/catch and failed gracefully; mirror that. + const binaryKinds = new Set([ + "decode_ascii85", + "base64", + "deflate", + "text_decode", + "hex_decode", + ]); + const needsTryCatch = spec.parse.steps.some((s) => binaryKinds.has(s.kind)); + const bodyIndent = needsTryCatch ? " " : " "; + if (needsTryCatch) out.push(` try {`); + for (const step of spec.parse.steps) { - emitParseStep(step, out, " "); + emitParseStep(step, out, bodyIndent); } // Fields or Variants. if (spec.variants) { - emitVariants(spec.variants, out, " ", consumedByFormatter); + emitVariants(spec.variants, out, bodyIndent, consumedByFormatter); } else if (spec.fields) { for (const field of spec.fields) { - emitField(field, out, " ", consumedByFormatter); + emitField(field, out, bodyIndent, consumedByFormatter); } } // Formatter. - emitFormatted(spec.formatted, out, " "); + emitFormatted(spec.formatted, out, bodyIndent); // Success path. if (!hasExplicitDecodeLevelSetting(spec)) { const level = spec.plugin.decodeLevel.toLowerCase(); const tsLevel = level === "full" ? "'full'" : "'partial'"; - out.push(` this.setDecodeLevel(result, true, ${tsLevel});`); + out.push(`${bodyIndent}this.setDecodeLevel(result, true, ${tsLevel});`); + } + out.push(`${bodyIndent}return result;`); + if (needsTryCatch) { + out.push(` } catch {`); + out.push(` return this.failUnknown(result, message.text, options);`); + out.push(` }`); } - out.push(` return result;`); out.push(` }`); out.push(`}`); return out.join("\n") + "\n"; @@ -294,6 +313,16 @@ function emitFormatterCall(item: FormatterCall, out: string[], indent: string): out.push(`${indent}hatches.${item.customName}(result);`); return; } + // remaining_fields: trailing CSV fields → remaining.text (mirrors the + // Label_44_Base.addRemainingFields pattern). + if (item.type === "remaining_fields") { + const from = renderArg(item.args["from"]); + const start = Number(item.args["start"] ?? 0); + out.push(`${indent}if (${from}.length > ${start}) {`); + out.push(`${indent} ResultFormatter.unknownArr(result, ${from}.slice(${start}));`); + out.push(`${indent}}`); + return; + } // Map IR formatter type → ResultFormatter method. const methodMap: Record = { position: "position", @@ -301,12 +330,22 @@ function emitFormatterCall(item: FormatterCall, out: string[], indent: string): speed: "speed", heading: "heading", timestamp: "timestamp", + eta: "eta", + out: "out", + off: "off", + on: "on", + in: "in", + day: "day", + month: "month", + departure_day: "departureDay", + arrival_day: "arrivalDay", callsign: "callsign", flight_number: "flightNumber", tail_number: "tail", airport_origin: "departureAirport", airport_destination: "arrivalAirport", fuel: "currentFuel", + fuel_remaining: "remainingFuel", free_text: "unknownArr", }; const method = methodMap[item.type]; diff --git a/codegen/src/ir.ts b/codegen/src/ir.ts index 71d4a2b..f345445 100644 --- a/codegen/src/ir.ts +++ b/codegen/src/ir.ts @@ -126,12 +126,23 @@ export interface FormatterCall { | "speed" | "heading" | "timestamp" + | "eta" + | "out" + | "off" + | "on" + | "in" + | "day" + | "month" + | "departure_day" + | "arrival_day" | "callsign" | "flight_number" | "tail_number" | "airport_origin" | "airport_destination" | "fuel" + | "fuel_remaining" + | "remaining_fields" | "free_text" | "custom"; customName?: string; diff --git a/runtimes/typescript/utils/result_formatter.ts b/runtimes/typescript/utils/result_formatter.ts index 341229c..94c8b39 100644 --- a/runtimes/typescript/utils/result_formatter.ts +++ b/runtimes/typescript/utils/result_formatter.ts @@ -223,7 +223,11 @@ export class ResultFormatter { }); } - static remainingFuel(decodeResult: DecodeResult, value: number) { + static remainingFuel(decodeResult: DecodeResult, value: number | undefined) { + // Tolerate undefined/NaN — mirrors the legacy parseFuel() isNaN guard + // (and the currentFuel precedent) so when-gated spec fields can call + // this unconditionally. + if (value === undefined || value === null || Number.isNaN(value)) return; decodeResult.raw.fuel_remaining = value; decodeResult.formatted.items.push({ type: 'fuel_remaining', diff --git a/schema/ads-v1.schema.json b/schema/ads-v1.schema.json index 7211a3c..44ddf76 100644 --- a/schema/ads-v1.schema.json +++ b/schema/ads-v1.schema.json @@ -468,6 +468,17 @@ "airport_origin", "airport_destination", "fuel", + "fuel_remaining", + "eta", + "out", + "off", + "on", + "in", + "day", + "month", + "departure_day", + "arrival_day", + "remaining_fields", "free_text", "custom" ] diff --git a/spec/labels/16/AUTPOS.yaml b/spec/labels/16/AUTPOS.yaml index 7bacf3c..8b7216d 100644 --- a/spec/labels/16/AUTPOS.yaml +++ b/spec/labels/16/AUTPOS.yaml @@ -13,5 +13,7 @@ parse: # representable in current DSL. custom: label_16_autpos_decode formatted: - description: "Position Report" + # Legacy plugin used defaultResult() — description stays "Unknown" on + # failed decodes; the hatch sets "Position Report" on its success path. + description: "Unknown" custom: label_16_autpos_format diff --git a/spec/labels/44/ETA.yaml b/spec/labels/44/ETA.yaml index b0d262a..09ccc47 100644 --- a/spec/labels/44/ETA.yaml +++ b/spec/labels/44/ETA.yaml @@ -47,8 +47,9 @@ formatted: - { type: altitude, value: $altitude } - { type: airport_origin, value: $departure_icao } - { type: airport_destination, value: $arrival_icao } - - { type: timestamp, kind: month, value: $month } - - { type: timestamp, kind: day, value: $day } - - { type: timestamp, kind: current, value: $timestamp } - - { type: timestamp, kind: eta, value: $eta_time } - - { type: fuel, kind: remaining, value: $fuel_remaining, when_present: true } + - { type: month, value: $month } + - { type: day, value: $day } + - { type: timestamp, value: $timestamp } + - { type: eta, value: $eta_time } + - { type: fuel_remaining, value: $fuel_remaining } + - { type: remaining_fields, from: $data, start: 9 } diff --git a/spec/labels/44/IN.yaml b/spec/labels/44/IN.yaml index 8af0805..143ea85 100644 --- a/spec/labels/44/IN.yaml +++ b/spec/labels/44/IN.yaml @@ -40,7 +40,8 @@ formatted: - { type: position, value: $position } - { type: airport_origin, value: $departure_icao } - { type: airport_destination, value: $arrival_icao } - - { type: timestamp, kind: month, value: $month } - - { type: timestamp, kind: day, value: $day } - - { type: timestamp, kind: in, value: $in_time } - - { type: fuel, kind: remaining, value: $fuel_remaining, when_present: true } + - { type: month, value: $month } + - { type: day, value: $day } + - { type: in, value: $in_time } + - { type: fuel_remaining, value: $fuel_remaining } + - { type: remaining_fields, from: $data, start: 7 } diff --git a/spec/labels/44/OFF.yaml b/spec/labels/44/OFF.yaml index 4d5c35e..d78250e 100644 --- a/spec/labels/44/OFF.yaml +++ b/spec/labels/44/OFF.yaml @@ -43,8 +43,9 @@ formatted: - { type: position, value: $position } - { type: airport_origin, value: $departure_icao } - { type: airport_destination, value: $arrival_icao } - - { type: timestamp, kind: month, value: $month } - - { type: timestamp, kind: day, value: $day } - - { type: timestamp, kind: off, value: $off_time } - - { type: timestamp, kind: eta, value: $eta_time } - - { type: fuel, kind: remaining, value: $fuel_remaining, when_present: true } + - { type: month, value: $month } + - { type: day, value: $day } + - { type: off, value: $off_time } + - { type: eta, value: $eta_time } + - { type: fuel_remaining, value: $fuel_remaining } + - { type: remaining_fields, from: $data, start: 8 } diff --git a/spec/labels/44/ON.yaml b/spec/labels/44/ON.yaml index 18b9999..71774de 100644 --- a/spec/labels/44/ON.yaml +++ b/spec/labels/44/ON.yaml @@ -40,7 +40,8 @@ formatted: - { type: position, value: $position } - { type: airport_origin, value: $departure_icao } - { type: airport_destination, value: $arrival_icao } - - { type: timestamp, kind: month, value: $month } - - { type: timestamp, kind: day, value: $day } - - { type: timestamp, kind: on, value: $on_time } - - { type: fuel, kind: remaining, value: $fuel_remaining, when_present: true } + - { type: month, value: $month } + - { type: day, value: $day } + - { type: on, value: $on_time } + - { type: fuel_remaining, value: $fuel_remaining } + - { type: remaining_fields, from: $data, start: 7 } diff --git a/spec/labels/44/POS.yaml b/spec/labels/44/POS.yaml index 4bd6104..d7b01a8 100644 --- a/spec/labels/44/POS.yaml +++ b/spec/labels/44/POS.yaml @@ -58,11 +58,12 @@ formatted: description: "Position Report" items: - { type: position, value: $position } - - { type: timestamp, kind: month, value: $month } - - { type: timestamp, kind: day, value: $day } - - { type: timestamp, kind: current, value: $timestamp } - - { type: timestamp, kind: eta, value: $eta } - - { type: fuel, units: tons, value: $fuel_in_tons, when_present: true } + - { type: month, value: $month } + - { type: day, value: $day } + - { type: timestamp, value: $timestamp } + - { type: eta, value: $eta } + # fuel_in_tons is raw-only in the legacy plugin (no formatted item); + # the field's auto-raw emit covers it now that no formatter consumes it. - { type: airport_origin, value: $departure_icao } - { type: airport_destination, value: $arrival_icao } - { type: altitude, value: $altitude } diff --git a/spec/labels/H1/Paren.yaml b/spec/labels/H1/Paren.yaml index 5290676..50e9e4a 100644 --- a/spec/labels/H1/Paren.yaml +++ b/spec/labels/H1/Paren.yaml @@ -14,5 +14,7 @@ parse: # altitude is parsed * 100. Whole-plugin hatch for parity. custom: label_h1_paren_parse formatted: - description: "Position Report" + # Legacy plugin used defaultResult() — description stays "Unknown" on + # failed decodes; the hatch sets "Position Report" on its success path. + description: "Unknown" custom: label_h1_paren_format diff --git a/spec/wildcards/arinc_702.yaml b/spec/wildcards/arinc_702.yaml index 645d4c9..cf44c7c 100644 --- a/spec/wildcards/arinc_702.yaml +++ b/spec/wildcards/arinc_702.yaml @@ -11,5 +11,7 @@ parse: # recursive H1 message decoding all live in hand-written per-language code. custom: arinc_702_dispatch formatted: - description: "ARINC 702 Message" + # Legacy plugin used defaultResult() — description stays "Unknown" unless + # Arinc702Helper sets one on a successful decode. + description: "Unknown" custom: arinc_702_format From 83908caabd56d3a0a21d6c44d80c690cf8cc08a6 Mon Sep 17 00:00:00 2001 From: Kevin Elliott Date: Tue, 9 Jun 2026 23:01:03 -0700 Subject: [PATCH 2/2] DSL: add field-level 'raw: false' flag for intermediate fields Fields like Label_44_POS's flight_level_raw exist only to feed a later field/formatter; the legacy plugin never stored them in result.raw. The auto-raw emit was adding a divergent raw key (caught by the last 2 corpus failures). 'raw: false' suppresses the auto-emit. Co-Authored-By: Claude Opus 4.7 (1M context) --- codegen/src/emit-typescript.ts | 2 +- codegen/src/ir.ts | 2 ++ codegen/src/parse-spec.ts | 1 + schema/ads-v1.schema.json | 5 +++++ spec/labels/44/POS.yaml | 2 ++ 5 files changed, 11 insertions(+), 1 deletion(-) diff --git a/codegen/src/emit-typescript.ts b/codegen/src/emit-typescript.ts index 2db5a67..e46b9ba 100644 --- a/codegen/src/emit-typescript.ts +++ b/codegen/src/emit-typescript.ts @@ -224,7 +224,7 @@ function emitField( const decodeExpr = field.decode ? renderDecodeCall(field.decode, renderExpr(field.from)) : renderExpr(field.from); - const skipAutoRaw = consumedByFormatter.has(field.name); + const skipAutoRaw = consumedByFormatter.has(field.name) || field.raw === false; if (field.when) { // Declare outside the if so downstream formatters / variant-shared code // can still reference the variable when the guard fails — it'll be diff --git a/codegen/src/ir.ts b/codegen/src/ir.ts index f345445..c439585 100644 --- a/codegen/src/ir.ts +++ b/codegen/src/ir.ts @@ -87,6 +87,8 @@ export interface FieldIR { decode?: DecodeCall; when?: Condition; default?: ValueExpr; + /** false = intermediate field; do not auto-emit into result.raw. */ + raw?: boolean; description?: string; } diff --git a/codegen/src/parse-spec.ts b/codegen/src/parse-spec.ts index aa9c515..1773147 100644 --- a/codegen/src/parse-spec.ts +++ b/codegen/src/parse-spec.ts @@ -153,6 +153,7 @@ function lowerField(field: any): FieldIR { decode: field.decode ? lowerDecode(field.decode) : undefined, when: field.when ? lowerCondition(field.when) : undefined, default: field.default !== undefined ? lowerExpr(field.default) : undefined, + raw: field.raw, description: field.description, }; } diff --git a/schema/ads-v1.schema.json b/schema/ads-v1.schema.json index 44ddf76..0210fc1 100644 --- a/schema/ads-v1.schema.json +++ b/schema/ads-v1.schema.json @@ -252,6 +252,11 @@ "decode": { "$ref": "#/$defs/decode_call" }, "when": { "$ref": "#/$defs/condition" }, "default": { "$ref": "#/$defs/value_expr" }, + "raw": { + "type": "boolean", + "default": true, + "description": "Set false for intermediate fields that feed later fields/formatters but must NOT auto-emit into result.raw." + }, "description": { "type": "string" } } }, diff --git a/spec/labels/44/POS.yaml b/spec/labels/44/POS.yaml index d7b01a8..f4cfddf 100644 --- a/spec/labels/44/POS.yaml +++ b/spec/labels/44/POS.yaml @@ -20,6 +20,8 @@ fields: - name: flight_level_raw from: $m.flight_level_or_ground + # Intermediate value feeding `altitude`; legacy never stored it in raw. + raw: false decode: fn: custom custom: parse_flight_level_or_ground