From 2ba315bed2ae00d3e7fc1d09a98d95636b411d43 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 10 May 2026 13:45:35 +0000 Subject: [PATCH] =?UTF-8?q?feat(generator):=20SEM004=20=E2=80=94=20flag=20?= =?UTF-8?q?dimensions.json=20units=20missing=20from=20units.json?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this diagnostic, BuildToBaseExpression silently falls back to identity conversion when an availableUnits entry doesn't match any unit declared in units.json. That's wrong for any non-base unit — a typo like "Metre" instead of "Meter", or a unit that hasn't been added to units.json yet, would compile clean and emit a From{Unit} factory whose body just passes the value through unchanged. SEM004 walks every dimension's availableUnits, dedups by (dimension, unitName), and warns with the unit name plus the offending dimension so the typo is easy to find. Verified with a deliberate "Metre" injection: produces the expected diagnostic and identifies Length as the owner. Reverted to clean metadata; current build is silent. Side-effect: the silent identity fallback in BuildToBaseExpression is preserved so the build still succeeds; SEM004 is the surfacing mechanism. If we want hard failure on unknown units in CI, treat warnings as errors at the project level (TreatWarningsAsErrors). CLAUDE.md generator-diagnostics list updated. --- CLAUDE.md | 1 + .../AnalyzerReleases.Unshipped.md | 1 + .../Generators/QuantitiesGenerator.cs | 60 +++++++++++++++++++ 3 files changed, 62 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index 1dd806c..de37b4a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -150,6 +150,7 @@ var converted = sourceString.As(); - **SEM001** — a relationship in `dimensions.json` references a dimension that does not exist (typo or rename). The operator is silently dropped. - **SEM002** — schema-level validation issue (missing `name`/`symbol`, empty `availableUnits`, duplicate type names, no vector forms declared). - **SEM003** — a relationship's explicit `forms` list references a vector form not declared on a participating dimension. Use `forms` to constrain a relationship to specific vector forms (e.g. `crossProducts: [{ "other": "Length", "result": "Torque", "forms": [3] }]`); when omitted, the legacy "emit at every common form" behaviour is preserved. + - **SEM004** — a dimension's `availableUnits` array references a unit name that isn't declared anywhere in `units.json`. Without the diagnostic the generator silently emits an identity-conversion `From{Unit}` factory, which is wrong for any non-base unit; SEM004 catches the typo at build time. - See `docs/physics-generator.md` for the full schema and an end-to-end "add a dimension" walk-through. This file is the entry point. For deeper material: diff --git a/Semantics.SourceGenerators/AnalyzerReleases.Unshipped.md b/Semantics.SourceGenerators/AnalyzerReleases.Unshipped.md index dd3509b..bf491e5 100644 --- a/Semantics.SourceGenerators/AnalyzerReleases.Unshipped.md +++ b/Semantics.SourceGenerators/AnalyzerReleases.Unshipped.md @@ -8,3 +8,4 @@ Rule ID | Category | Severity | Notes SEM001 | Semantics.SourceGenerators | Warning | Reports relationships in dimensions.json that reference unknown dimension names. SEM002 | Semantics.SourceGenerators | Warning | Reports schema-level validation issues in dimensions.json (missing fields, duplicate type names, etc). SEM003 | Semantics.SourceGenerators | Warning | Reports a relationship whose explicit `forms` list references a vector form not declared on a participating dimension. +SEM004 | Semantics.SourceGenerators | Warning | Reports a `dimensions.json` `availableUnits` entry that doesn't match any unit declared in `units.json`. diff --git a/Semantics.SourceGenerators/Generators/QuantitiesGenerator.cs b/Semantics.SourceGenerators/Generators/QuantitiesGenerator.cs index 59866c4..c335fdd 100644 --- a/Semantics.SourceGenerators/Generators/QuantitiesGenerator.cs +++ b/Semantics.SourceGenerators/Generators/QuantitiesGenerator.cs @@ -45,6 +45,14 @@ public class QuantitiesGenerator : GeneratorBase defaultSeverity: DiagnosticSeverity.Warning, isEnabledByDefault: true); + private static readonly DiagnosticDescriptor UnknownUnitReference = new( + id: "SEM004", + title: "dimensions.json references a unit not declared in units.json", + messageFormat: "Unit '{0}' (referenced by dimension '{1}'.availableUnits) is not declared in units.json; the generated From{0} factory will use an identity conversion. Add the unit to units.json or fix the spelling.", + category: "Semantics.SourceGenerators", + defaultSeverity: DiagnosticSeverity.Warning, + isEnabledByDefault: true); + public QuantitiesGenerator() : base("dimensions.json") { } /// @@ -140,6 +148,13 @@ private void GenerateInner(SourceProductionContext context, DimensionsMetadata m Dictionary unitMap = BuildUnitMap(units); + // Issue #58/#48 follow-up: surface dimensions.json availableUnits entries that + // don't exist in units.json. The generator's BuildToBaseExpression silently falls + // back to identity conversion in that case, which is wrong for any non-base unit + // — a typo (e.g. "Kilometres" vs "Kilometers") would silently produce a factory + // with no scale factor. SEM004 catches that at build time. + ReportUnknownUnitReferences(context, metadata, unitMap); + // Phase A: Build maps and collect operators Dictionary dimensionMap = BuildDimensionMap(metadata); Dictionary typeFormMap = BuildTypeFormMap(metadata); @@ -549,6 +564,51 @@ private static Dictionary BuildUnitMap(UnitsMetadata uni return map; } + /// + /// Walks every availableUnits entry across the dimensions metadata and emits + /// SEM004 for any unit name that doesn't appear in . + /// Deduplicates by (unit, dimension) so a typo on a unit shared by many dimensions + /// reports once per offending dimension instead of per-form/overload. + /// + private static void ReportUnknownUnitReferences( + SourceProductionContext context, + DimensionsMetadata metadata, + Dictionary unitMap) + { + // If units.json wasn't loaded the map is empty; treating every unit as "unknown" + // would flood the build log. The CombinedMetadata loader already supplies a + // non-null UnitsMetadata even when units.json is missing — check for that case + // and bail rather than report a useless wall of warnings. + if (unitMap.Count == 0) + { + return; + } + + HashSet seen = []; + foreach (PhysicalDimension dim in metadata.PhysicalDimensions) + { + foreach (string unitName in dim.AvailableUnits) + { + if (string.IsNullOrEmpty(unitName) || unitMap.ContainsKey(unitName)) + { + continue; + } + + string key = $"{dim.Name}::{unitName}"; + if (!seen.Add(key)) + { + continue; + } + + context.ReportDiagnostic(Diagnostic.Create( + UnknownUnitReference, + Location.None, + unitName, + dim.Name)); + } + } + } + /// /// Emits one From{Unit} static factory per entry in . /// The first unit is treated as the SI base unit (no conversion). Subsequent units use the