From 2830db46c7e8b590f9d467625f30cf0aa4f0d7a1 Mon Sep 17 00:00:00 2001 From: VPRamon Date: Mon, 23 Feb 2026 14:56:04 +0100 Subject: [PATCH 1/3] feat: Add Rust-based generator for C++ unit-wrapper headers from units.csv --- .gitignore | 1 + CMakeLists.txt | 20 +- gen_cpp_units/Cargo.lock | 7 + gen_cpp_units/Cargo.toml | 8 + gen_cpp_units/src/main.rs | 506 ++++++++++++++++++++++++++++++++++ include/qtty/literals.hpp | 19 +- include/qtty/units/length.hpp | 27 +- 7 files changed, 547 insertions(+), 41 deletions(-) create mode 100644 gen_cpp_units/Cargo.lock create mode 100644 gen_cpp_units/Cargo.toml create mode 100644 gen_cpp_units/src/main.rs diff --git a/.gitignore b/.gitignore index e149c57..0fea79a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ build/ cmake-build-*/ out/ +gen_cpp_units/target/ # IDE files .vscode/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 5192fd7..ff5e16a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,9 +39,9 @@ else() set(QTTY_LIBRARY_PATH ${QTTY_ARTIFACT_DIR}/libqtty_ffi.so) endif() -# Generate C++ unit wrappers from qtty_ffi.h -find_package(Python3 COMPONENTS Interpreter REQUIRED) -set(QTTY_FFI_HEADER ${QTTY_FFI_INCLUDE_DIR}/qtty_ffi.h) +# Generate C++ unit wrappers from units.csv via the Rust gen_cpp_units binary. +# Running `cargo run` builds the binary (and the qtty-ffi library) if needed. +set(QTTY_UNITS_CSV ${QTTY_SUBMODULE_DIR}/qtty-ffi/units.csv) set(GENERATED_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/qtty/units/length.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/qtty/units/time.hpp @@ -53,10 +53,11 @@ set(GENERATED_HEADERS add_custom_command( OUTPUT ${GENERATED_HEADERS} - COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/gen_cpp_units.py - DEPENDS ${QTTY_FFI_HEADER} ${CMAKE_CURRENT_SOURCE_DIR}/gen_cpp_units.py + COMMAND ${CARGO_BIN} run --manifest-path ${CMAKE_CURRENT_SOURCE_DIR}/gen_cpp_units/Cargo.toml --release + -- ${QTTY_UNITS_CSV} ${CMAKE_CURRENT_SOURCE_DIR}/include/qtty + DEPENDS ${QTTY_UNITS_CSV} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - COMMENT "Generating C++ unit wrappers from qtty_ffi.h" + COMMENT "Generating C++ unit wrappers from units.csv (Rust)" VERBATIM ) @@ -231,8 +232,9 @@ install(EXPORT qtty_cppTargets # Generate unit files (if needed) add_custom_target( gen_units - COMMAND ${CARGO_BIN} run -p qtty-ffi --bin gen_units - WORKING_DIRECTORY ${QTTY_SUBMODULE_DIR} - COMMENT "Generating unit files" + COMMAND ${CARGO_BIN} run --manifest-path ${CMAKE_CURRENT_SOURCE_DIR}/gen_cpp_units/Cargo.toml --release + -- ${QTTY_UNITS_CSV} ${CMAKE_CURRENT_SOURCE_DIR}/include/qtty + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Generating C++ unit wrappers from units.csv (Rust)" VERBATIM ) diff --git a/gen_cpp_units/Cargo.lock b/gen_cpp_units/Cargo.lock new file mode 100644 index 0000000..e0f95a5 --- /dev/null +++ b/gen_cpp_units/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "gen_cpp_units" +version = "0.1.0" diff --git a/gen_cpp_units/Cargo.toml b/gen_cpp_units/Cargo.toml new file mode 100644 index 0000000..393948d --- /dev/null +++ b/gen_cpp_units/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "gen_cpp_units" +version = "0.1.0" +edition = "2021" +authors = ["Vallés Puig, Ramon <>"] +description = "Generator for C++ unit-wrapper headers from units.csv" + +[dependencies] diff --git a/gen_cpp_units/src/main.rs b/gen_cpp_units/src/main.rs new file mode 100644 index 0000000..520c951 --- /dev/null +++ b/gen_cpp_units/src/main.rs @@ -0,0 +1,506 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later +// Copyright (C) 2026 Vallés Puig, Ramon + +//! Generate C++ unit-wrapper headers from `units.csv`. +//! +//! Reads the canonical `units.csv` (source-of-truth for every supported unit), +//! then emits one header per physical dimension and a `literals.hpp` with +//! user-defined literal operators for every unit that has a usable C++ +//! identifier suffix. +//! +//! # Usage +//! +//! ```text +//! cargo run -p gen_cpp_units --bin gen_cpp_units --release \ +//! -- +//! ``` +//! +//! The second argument must point to the directory that contains `ffi_core.hpp` +//! (i.e., `include/qtty`). The script creates `units/` under that directory. + +use std::collections::HashMap; +use std::env; +use std::fmt::Write as FmtWrite; +use std::fs; +use std::path::{Path, PathBuf}; + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +const LICENSE: &str = "// SPDX-License-Identifier: AGPL-3.0-or-later\n\ + // Copyright (C) 2026 Vallés Puig, Ramon\n"; + +/// Dimension name → (output file name, discriminant leading digit). +const DIMENSIONS: &[(&str, &str, u32)] = &[ + ("Length", "length.hpp", 1), + ("Time", "time.hpp", 2), + ("Angle", "angular.hpp", 3), + ("Mass", "mass.hpp", 4), + ("Power", "power.hpp", 5), +]; + +// --------------------------------------------------------------------------- +// Data types +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone)] +struct UnitDef { + /// PascalCase name from the CSV, e.g. `"Kilometer"`. + name: String, + /// UPPER_SNAKE_CASE constant suffix, e.g. `"KILOMETER"`. + const_suffix: String, + /// Raw unit symbol, e.g. `"km"` or `"µm"`. + symbol: String, + /// Dimension name, e.g. `"Length"`. + dimension: String, +} + +// --------------------------------------------------------------------------- +// Entry point +// --------------------------------------------------------------------------- + +fn main() { + let args: Vec = env::args().collect(); + if args.len() != 3 { + eprintln!( + "Usage: gen_cpp_units " + ); + std::process::exit(1); + } + + let csv_path = Path::new(&args[1]); + let out_dir = PathBuf::from(&args[2]); + + let units = parse_csv(csv_path); + + // Group by dimension, preserving CSV order within each group. + let mut order: Vec<&str> = Vec::new(); + let mut by_dim: HashMap<&str, Vec<&UnitDef>> = HashMap::new(); + for unit in &units { + let dim = unit.dimension.as_str(); + if !by_dim.contains_key(dim) { + order.push(dim); + } + by_dim.entry(dim).or_default().push(unit); + } + + // Generate one header per dimension. + let units_dir = out_dir.join("units"); + fs::create_dir_all(&units_dir) + .expect("Failed to create units output directory"); + + for (dim_name, file_name, _) in DIMENSIONS { + if let Some(units) = by_dim.get(dim_name) { + let content = generate_dimension_header(dim_name, units); + let path = units_dir.join(file_name); + fs::write(&path, &content) + .unwrap_or_else(|_| panic!("Failed to write {file_name}")); + eprintln!("Generated {file_name} ({} units)", units.len()); + } else { + eprintln!("Warning: no units found for dimension {dim_name}"); + } + } + + // Generate literals.hpp. + let literals_content = generate_literals(&by_dim, &order); + let literals_path = out_dir.join("literals.hpp"); + fs::write(&literals_path, &literals_content) + .expect("Failed to write literals.hpp"); + eprintln!("Generated literals.hpp"); + + let total: usize = by_dim.values().map(|v| v.len()).sum(); + eprintln!("\nTotal units generated: {total}"); + for (dim, units) in &by_dim { + eprintln!(" {dim}: {} units", units.len()); + } +} + +// --------------------------------------------------------------------------- +// CSV parsing +// --------------------------------------------------------------------------- + +/// Parse `units.csv`, skipping comments and blank lines. +// +/// Expected format (5 comma-separated fields per row): +/// ```csv +/// discriminant,dimension,name,symbol,ratio +/// 10011,Length,Meter,m,1.0 +/// ``` +fn parse_csv(path: &Path) -> Vec { + let content = + fs::read_to_string(path).expect("Failed to read units.csv"); + let mut units = Vec::new(); + + for line in content.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with('#') { + continue; + } + + // Split on (at most) 5 commas to protect ratio values that may + // themselves contain commas (defensive, though none currently do). + let parts: Vec<&str> = line.splitn(5, ',').collect(); + if parts.len() != 5 { + eprintln!("Warning: skipping malformed line: {line}"); + continue; + } + + let discriminant: u32 = match parts[0].trim().parse() { + Ok(v) => v, + Err(_) => { + eprintln!("Warning: invalid discriminant on line: {line}"); + continue; + } + }; + + // Map the leading digit to a dimension name. + let dim_code = discriminant / 10_000; + let dimension = match dim_code { + 1 => "Length", + 2 => "Time", + 3 => "Angle", + 4 => "Mass", + 5 => "Power", + _ => { + eprintln!("Warning: unknown dimension code {dim_code} for discriminant {discriminant}"); + continue; + } + }; + + let name = parts[2].trim().to_owned(); + let symbol = parts[3].trim().to_owned(); + let const_suffix = pascal_to_upper_snake(&name); + + units.push(UnitDef { + name, + const_suffix, + symbol, + dimension: dimension.to_owned(), + }); + } + + units +} + +// --------------------------------------------------------------------------- +// Name conversion +// --------------------------------------------------------------------------- + +/// Convert a PascalCase identifier to UPPER_SNAKE_CASE. +// +/// Handles: +/// - `Meter` → `METER` +/// - `PlanckLength` → `PLANCK_LENGTH` +/// - `MicroArcsecond` → `MICRO_ARCSECOND` +/// - `AstronomicalUnit` → `ASTRONOMICAL_UNIT` +/// - `ErgPerSecond` → `ERG_PER_SECOND` +// +/// The algorithm inserts `_` before an uppercase letter when: +/// 1. The preceding character is lowercase, **or** +/// 2. The preceding character is uppercase **and** the following character is +/// lowercase (handles abbreviations like `HTTPSClient` → `HTTPS_CLIENT`). +fn pascal_to_upper_snake(name: &str) -> String { + let chars: Vec = name.chars().collect(); + let mut out = String::with_capacity(name.len() + 8); + + for (i, &c) in chars.iter().enumerate() { + if c.is_uppercase() && i > 0 { + let prev = chars[i - 1]; + let next = chars.get(i + 1).copied(); + let need_sep = prev.is_lowercase() + || (prev.is_uppercase() + && next.map_or(false, |n| n.is_lowercase())); + if need_sep { + out.push('_'); + } + } + for uc in c.to_uppercase() { + out.push(uc); + } + } + + out +} + +// --------------------------------------------------------------------------- +// Header generation — dimension files +// --------------------------------------------------------------------------- + +fn generate_dimension_header(_dimension: &str, units: &[&UnitDef]) -> String { + let mut s = String::new(); + + // License + pragma once + include + writeln!(s, "{LICENSE}").unwrap(); + writeln!(s, "#pragma once").unwrap(); + writeln!(s).unwrap(); + writeln!(s, "#include \"../ffi_core.hpp\"").unwrap(); + writeln!(s).unwrap(); + writeln!(s, "namespace qtty {{").unwrap(); + writeln!(s).unwrap(); + + // Tag struct declarations (one per line, all packed together) + for unit in units { + writeln!(s, "struct {}Tag {{}};", unit.name).unwrap(); + } + writeln!(s).unwrap(); + + // UnitTraits specializations + for unit in units { + writeln!( + s, + "template <> struct UnitTraits<{}Tag> {{", + unit.name + ) + .unwrap(); + writeln!( + s, + " static constexpr UnitId unit_id() {{ return UNIT_ID_{}; }}", + unit.const_suffix + ) + .unwrap(); + writeln!( + s, + " static constexpr std::string_view symbol() {{ return \"{}\"; }}", + escape_cpp_string(&unit.symbol) + ) + .unwrap(); + writeln!(s, "}};").unwrap(); + } + writeln!(s).unwrap(); + + // Type aliases + for unit in units { + writeln!( + s, + "using {name} = Quantity<{name}Tag>;", + name = unit.name + ) + .unwrap(); + } + writeln!(s).unwrap(); + + writeln!(s, "}} // namespace qtty").unwrap(); + + s +} + +// --------------------------------------------------------------------------- +// Header generation — literals.hpp +// --------------------------------------------------------------------------- + +fn generate_literals( + by_dim: &HashMap<&str, Vec<&UnitDef>>, + _order: &[&str], +) -> String { + let mut s = String::new(); + + // License + pragma once + includes + writeln!(s, "{LICENSE}").unwrap(); + writeln!(s, "#pragma once").unwrap(); + writeln!(s).unwrap(); + writeln!(s, "#include \"units/length.hpp\"").unwrap(); + writeln!(s, "#include \"units/time.hpp\"").unwrap(); + writeln!(s, "#include \"units/angular.hpp\"").unwrap(); + writeln!(s, "#include \"units/mass.hpp\"").unwrap(); + writeln!(s, "#include \"units/power.hpp\"").unwrap(); + writeln!(s).unwrap(); + writeln!(s, "namespace qtty {{").unwrap(); + writeln!(s).unwrap(); + writeln!(s, "// Inline namespace for user-defined literals").unwrap(); + writeln!(s, "inline namespace literals {{").unwrap(); + + let mut used_suffixes: HashMap = HashMap::new(); + + for (dim_name, _, _) in DIMENSIONS { + let Some(units) = by_dim.get(dim_name) else { + continue; + }; + + let mut section = String::new(); + let mut has_any = false; + + for unit in units { + let Some(suffix) = make_literal_suffix(&unit.symbol) else { + continue; + }; + + if let Some(existing) = used_suffixes.get(&suffix) { + eprintln!( + " Warning: skipping literal _{suffix} for {} \ + (conflicts with {existing})", + unit.name + ); + continue; + } + used_suffixes.insert(suffix.clone(), unit.name.clone()); + + writeln!(section).unwrap(); + writeln!( + section, + "constexpr {name} operator\"\"_{suffix}(long double value) {{", + name = unit.name + ) + .unwrap(); + writeln!( + section, + " return {name}(static_cast(value));", + name = unit.name + ) + .unwrap(); + writeln!(section, "}}").unwrap(); + writeln!( + section, + "constexpr {name} operator\"\"_{suffix}(unsigned long long value) {{", + name = unit.name + ) + .unwrap(); + writeln!( + section, + " return {name}(static_cast(value));", + name = unit.name + ) + .unwrap(); + writeln!(section, "}}").unwrap(); + has_any = true; + } + + if has_any { + writeln!(s).unwrap(); + writeln!(s, "// ====================").unwrap(); + writeln!(s, "// {dim_name} literals").unwrap(); + writeln!(s, "// ====================").unwrap(); + s.push_str(§ion); + } + } + + writeln!(s).unwrap(); + writeln!(s, "}} // namespace literals").unwrap(); + writeln!(s).unwrap(); + writeln!(s, "}} // namespace qtty").unwrap(); + + s +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/// Convert a unit symbol into a valid C++ identifier suffix for a UDL. +// +/// Returns `None` if the resulting string would be empty (no valid chars). +// +/// Transformation rules: +/// - `/` → `_per_` (e.g., `m/s` → `m_per_s`) +/// - `°` → `deg` +/// - `′` → `arcmin` +/// - `″` → `arcsec` +/// - `µ` → `u` +/// - `☉` → `sol` +/// - `⊕` → `earth` +/// - `☾` → `moon` +/// - `♃` → `jupiter` +/// - ` ` → `_` +/// - All remaining non-`[A-Za-z0-9_]` characters are dropped. +fn make_literal_suffix(symbol: &str) -> Option { + let s = symbol + .replace('/', "_per_") + .replace('°', "deg") + .replace('′', "arcmin") + .replace('″', "arcsec") + .replace('µ', "u") + .replace('☉', "sol") + .replace('⊕', "earth") + .replace('☾', "moon") + .replace('♃', "jupiter") + .replace(' ', "_"); + + let filtered: String = s + .chars() + .filter(|c| c.is_ascii_alphanumeric() || *c == '_') + .collect(); + + if filtered.is_empty() { + None + } else { + Some(filtered) + } +} + +/// Escape a string for embedding in a C++ string literal. +// +/// Currently only escapes backslashes and double-quotes; all other characters +/// (including UTF-8 multi-byte sequences like `µ`) are passed through as-is, +/// which is valid in C++11 and later string literals. +fn escape_cpp_string(s: &str) -> String { + s.replace('\\', "\\\\").replace('"', "\\\"") +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn snake_simple() { + assert_eq!(pascal_to_upper_snake("Meter"), "METER"); + assert_eq!(pascal_to_upper_snake("Kilometer"), "KILOMETER"); + } + + #[test] + fn snake_multi_word() { + assert_eq!(pascal_to_upper_snake("PlanckLength"), "PLANCK_LENGTH"); + assert_eq!(pascal_to_upper_snake("LightYear"), "LIGHT_YEAR"); + assert_eq!(pascal_to_upper_snake("MicroArcsecond"), "MICRO_ARCSECOND"); + assert_eq!(pascal_to_upper_snake("BohrRadius"), "BOHR_RADIUS"); + assert_eq!( + pascal_to_upper_snake("AstronomicalUnit"), + "ASTRONOMICAL_UNIT" + ); + assert_eq!(pascal_to_upper_snake("ErgPerSecond"), "ERG_PER_SECOND"); + assert_eq!(pascal_to_upper_snake("HourAngle"), "HOUR_ANGLE"); + assert_eq!(pascal_to_upper_snake("JulianCentury"), "JULIAN_CENTURY"); + assert_eq!(pascal_to_upper_snake("SynodicMonth"), "SYNODIC_MONTH"); + assert_eq!(pascal_to_upper_snake("SiderealDay"), "SIDEREAL_DAY"); + } + + #[test] + fn snake_abbreviation() { + // All-caps abbreviation followed by PascalCase word. + assert_eq!(pascal_to_upper_snake("HTTPSClient"), "HTTPS_CLIENT"); + // Single all-caps "word" — no underscore should be added. + assert_eq!(pascal_to_upper_snake("AU"), "AU"); + } + + #[test] + fn literal_suffix_simple() { + assert_eq!(make_literal_suffix("m").as_deref(), Some("m")); + assert_eq!(make_literal_suffix("km").as_deref(), Some("km")); + } + + #[test] + fn literal_suffix_slash() { + assert_eq!( + make_literal_suffix("m/s").as_deref(), + Some("m_per_s") + ); + } + + #[test] + fn literal_suffix_unicode() { + assert_eq!(make_literal_suffix("µm").as_deref(), Some("um")); + assert_eq!(make_literal_suffix("°").as_deref(), Some("deg")); + assert_eq!(make_literal_suffix("′").as_deref(), Some("arcmin")); + } + + #[test] + fn literal_suffix_strips_non_ascii() { + // λ̄_e → after removing non-ASCII chars the underscore and 'e' survive + assert_eq!(make_literal_suffix("λ̄_e").as_deref(), Some("_e")); + // A symbol that is purely non-ASCII yields None + assert_eq!(make_literal_suffix("λ"), None); + } +} diff --git a/include/qtty/literals.hpp b/include/qtty/literals.hpp index de3dbb4..b360c2f 100644 --- a/include/qtty/literals.hpp +++ b/include/qtty/literals.hpp @@ -3,11 +3,11 @@ #pragma once -#include "units/angular.hpp" #include "units/length.hpp" +#include "units/time.hpp" +#include "units/angular.hpp" #include "units/mass.hpp" #include "units/power.hpp" -#include "units/time.hpp" namespace qtty { @@ -189,8 +189,7 @@ constexpr ClassicalElectronRadius operator""_r_e(unsigned long long value) { constexpr ElectronReducedComptonWavelength operator""__e(long double value) { return ElectronReducedComptonWavelength(static_cast(value)); } -constexpr ElectronReducedComptonWavelength -operator""__e(unsigned long long value) { +constexpr ElectronReducedComptonWavelength operator""__e(unsigned long long value) { return ElectronReducedComptonWavelength(static_cast(value)); } @@ -316,8 +315,7 @@ constexpr NominalLunarDistance operator""_LD(unsigned long long value) { constexpr NominalEarthPolarRadius operator""_R_earthpol(long double value) { return NominalEarthPolarRadius(static_cast(value)); } -constexpr NominalEarthPolarRadius -operator""_R_earthpol(unsigned long long value) { +constexpr NominalEarthPolarRadius operator""_R_earthpol(unsigned long long value) { return NominalEarthPolarRadius(static_cast(value)); } @@ -331,24 +329,21 @@ constexpr NominalEarthRadius operator""_R_earth(unsigned long long value) { constexpr NominalEarthEquatorialRadius operator""_R_eartheq(long double value) { return NominalEarthEquatorialRadius(static_cast(value)); } -constexpr NominalEarthEquatorialRadius -operator""_R_eartheq(unsigned long long value) { +constexpr NominalEarthEquatorialRadius operator""_R_eartheq(unsigned long long value) { return NominalEarthEquatorialRadius(static_cast(value)); } constexpr EarthMeridionalCircumference operator""_C_mer(long double value) { return EarthMeridionalCircumference(static_cast(value)); } -constexpr EarthMeridionalCircumference -operator""_C_mer(unsigned long long value) { +constexpr EarthMeridionalCircumference operator""_C_mer(unsigned long long value) { return EarthMeridionalCircumference(static_cast(value)); } constexpr EarthEquatorialCircumference operator""_C_eq(long double value) { return EarthEquatorialCircumference(static_cast(value)); } -constexpr EarthEquatorialCircumference -operator""_C_eq(unsigned long long value) { +constexpr EarthEquatorialCircumference operator""_C_eq(unsigned long long value) { return EarthEquatorialCircumference(static_cast(value)); } diff --git a/include/qtty/units/length.hpp b/include/qtty/units/length.hpp index f0c629a..67efda6 100644 --- a/include/qtty/units/length.hpp +++ b/include/qtty/units/length.hpp @@ -151,15 +151,11 @@ template <> struct UnitTraits { static constexpr std::string_view symbol() { return "a₀"; } }; template <> struct UnitTraits { - static constexpr UnitId unit_id() { - return UNIT_ID_CLASSICAL_ELECTRON_RADIUS; - } + static constexpr UnitId unit_id() { return UNIT_ID_CLASSICAL_ELECTRON_RADIUS; } static constexpr std::string_view symbol() { return "r_e"; } }; template <> struct UnitTraits { - static constexpr UnitId unit_id() { - return UNIT_ID_ELECTRON_REDUCED_COMPTON_WAVELENGTH; - } + static constexpr UnitId unit_id() { return UNIT_ID_ELECTRON_REDUCED_COMPTON_WAVELENGTH; } static constexpr std::string_view symbol() { return "λ̄_e"; } }; template <> struct UnitTraits { @@ -231,9 +227,7 @@ template <> struct UnitTraits { static constexpr std::string_view symbol() { return "LD"; } }; template <> struct UnitTraits { - static constexpr UnitId unit_id() { - return UNIT_ID_NOMINAL_EARTH_POLAR_RADIUS; - } + static constexpr UnitId unit_id() { return UNIT_ID_NOMINAL_EARTH_POLAR_RADIUS; } static constexpr std::string_view symbol() { return "R_⊕pol"; } }; template <> struct UnitTraits { @@ -241,21 +235,15 @@ template <> struct UnitTraits { static constexpr std::string_view symbol() { return "R_⊕"; } }; template <> struct UnitTraits { - static constexpr UnitId unit_id() { - return UNIT_ID_NOMINAL_EARTH_EQUATORIAL_RADIUS; - } + static constexpr UnitId unit_id() { return UNIT_ID_NOMINAL_EARTH_EQUATORIAL_RADIUS; } static constexpr std::string_view symbol() { return "R_⊕eq"; } }; template <> struct UnitTraits { - static constexpr UnitId unit_id() { - return UNIT_ID_EARTH_MERIDIONAL_CIRCUMFERENCE; - } + static constexpr UnitId unit_id() { return UNIT_ID_EARTH_MERIDIONAL_CIRCUMFERENCE; } static constexpr std::string_view symbol() { return "C_mer"; } }; template <> struct UnitTraits { - static constexpr UnitId unit_id() { - return UNIT_ID_EARTH_EQUATORIAL_CIRCUMFERENCE; - } + static constexpr UnitId unit_id() { return UNIT_ID_EARTH_EQUATORIAL_CIRCUMFERENCE; } static constexpr std::string_view symbol() { return "C_eq"; } }; template <> struct UnitTraits { @@ -295,8 +283,7 @@ using Zettameter = Quantity; using Yottameter = Quantity; using BohrRadius = Quantity; using ClassicalElectronRadius = Quantity; -using ElectronReducedComptonWavelength = - Quantity; +using ElectronReducedComptonWavelength = Quantity; using AstronomicalUnit = Quantity; using LightYear = Quantity; using Parsec = Quantity; From 5def2ee16b6d0a1f94de15a250bd91316b0b02e3 Mon Sep 17 00:00:00 2001 From: VPRamon Date: Mon, 23 Feb 2026 14:58:58 +0100 Subject: [PATCH 2/3] feat: Remove Python script for generating C++ unit wrapper headers --- gen_cpp_units.py | 303 ----------------------------------------------- 1 file changed, 303 deletions(-) delete mode 100755 gen_cpp_units.py diff --git a/gen_cpp_units.py b/gen_cpp_units.py deleted file mode 100755 index 6f9583d..0000000 --- a/gen_cpp_units.py +++ /dev/null @@ -1,303 +0,0 @@ -#!/usr/bin/env python3 -""" -Generate C++ unit wrapper headers from qtty_ffi.h - -This script parses the qtty_ffi.h C header file (generated by the Rust qtty-ffi -crate) and automatically generates type-safe C++ wrapper headers for all defined -units. The generator creates: - -1. Tag structs: Empty types for template specialization (e.g., MeterTag) -2. UnitTraits: Maps tags to C FFI unit ID constants -3. Type aliases: Convenient names like Meter = Quantity -4. User-defined literals: Syntax like 10.0_m for intuitive quantity creation - -Architecture: -- Input: qtty/qtty-ffi/include/qtty_ffi.h (C header from Rust) -- Output: include/qtty/units/*.hpp (C++ wrappers by dimension) - include/qtty/literals.hpp (user-defined literals) -- Parsing: Regex-based extraction of unit definitions from comments -- Grouping: Discriminant ranges determine dimension (10000-19999 = Length, etc.) - -Key features: -- Automatic: No manual editing of generated headers -- Collision detection: Warns about duplicate literal suffixes -- Symbol conversion: Translates unit symbols to valid C++ identifiers -""" - -import re -from pathlib import Path -from typing import Dict, List, Tuple -from collections import defaultdict - -# File header template -HEADER_TEMPLATE = """#pragma once - -#include "../ffi_core.hpp" - -namespace qtty {{ - -{tag_declarations} - -{unit_traits} - -{type_aliases} - -}} // namespace qtty -""" - -# Literal file template -LITERALS_HEADER = """#pragma once - -#include "units/length.hpp" -#include "units/time.hpp" -#include "units/angular.hpp" -#include "units/mass.hpp" -#include "units/power.hpp" - -namespace qtty {{ - -// Inline namespace for user-defined literals -inline namespace literals {{ - -{literal_definitions} - -}} // namespace literals - -}} // namespace qtty -""" - -def to_pascal_case(name: str) -> str: - """Convert UPPER_SNAKE_CASE to PascalCase - - Example: KILOMETER -> Kilometer, LIGHT_YEAR -> LightYear - """ - parts = name.split('_') - return ''.join(p.capitalize() for p in parts if p) - -def parse_qtty_ffi_header(header_path: Path) -> Dict[str, List[Tuple[str, str, str]]]: - """Parse qtty_ffi.h and extract unit IDs with symbols grouped by dimension - - The qtty_ffi.h header contains unit definitions in a specific comment format: - /* DisplayName (symbol) */ UNIT_ID_NAME = discriminant, - - Example: - /* Meter (m) */ UNIT_ID_METER = 10012, - - This function: - 1. Extracts all unit definitions matching the expected pattern - 2. Groups units by dimension based on discriminant range: - - 10000-19999: Length - - 20000-29999: Time - - 30000-39999: Angle - - 40000-49999: Mass - - 50000-59999: Power - 3. Returns a dict mapping dimension name to list of (const_name, unit_name, symbol) - - Returns: - Dict mapping dimension name -> [(UNIT_ID_NAME, UnitName, "symbol"), ...] - """ - units_by_dimension = defaultdict(list) - - with open(header_path, 'r', encoding='utf-8') as f: - content = f.read() - - # Extract units with their symbols from comments - # Pattern: /* UnitName (symbol) */ UNIT_ID_NAME = discriminant - unit_pattern = r'/\*\s*(\w+)\s*\(([^)]+)\)\s*\*/\s*UNIT_ID_(\w+)\s*=\s*(\d+)' - - for match in re.finditer(unit_pattern, content): - display_name = match.group(1) - symbol = match.group(2) - const_name = match.group(3) - discriminant = int(match.group(4)) - - # Determine dimension from discriminant (first digit) - dim_code = discriminant // 10000 - dimension_map = { - 1: 'Length', - 2: 'Time', - 3: 'Angle', - 4: 'Mass', - 5: 'Power' - } - - if dim_code in dimension_map: - dimension = dimension_map[dim_code] - unit_name = to_pascal_case(const_name) - units_by_dimension[dimension].append((const_name, unit_name, symbol)) - - return dict(units_by_dimension) - -def generate_header_for_dimension(dimension: str, units: List[Tuple[str, str, str]]) -> str: - """Generate a complete header file for a dimension""" - - # Generate tag declarations - tag_declarations = [] - for _, name, _ in units: - tag_declarations.append(f"struct {name}Tag {{}};") - - # Generate unit traits specializations - unit_traits = [] - for const_name, name, symbol in units: - unit_traits.append(f"""template<> struct UnitTraits<{name}Tag> {{ - static constexpr UnitId unit_id() {{ return UNIT_ID_{const_name}; }} - static constexpr std::string_view symbol() {{ return "{symbol}"; }} -}};""") - - # Generate type aliases - type_aliases = [] - for _, name, _ in units: - type_aliases.append(f"using {name} = Quantity<{name}Tag>;") - - return HEADER_TEMPLATE.format( - tag_declarations='\n'.join(tag_declarations), - unit_traits='\n'.join(unit_traits), - type_aliases='\n'.join(type_aliases) - ) - -def make_literal_suffix(symbol: str) -> str: - """Convert a symbol into a valid C++ literal suffix - - C++ user-defined literals have strict syntax requirements: - - Must be a valid identifier (alphanumeric + underscore) - - Cannot start with underscore (reserved) - - No special characters except underscore - - This function transforms unit symbols to meet these requirements: - - Replace '/' with '_per_' (e.g., 'm/s' -> 'm_per_s') - - Replace special Unicode characters (°, µ, etc.) with ASCII equivalents - - Remove any remaining invalid characters - - Returns None if the symbol cannot be made into a valid suffix. - - Examples: - 'm' -> 'm' - 'km/h' -> 'km_per_h' - '°C' -> 'degC' - 'µm' -> 'um' - """ - # Replace special characters - suffix = symbol.replace('/', '_per_') - suffix = suffix.replace('°', 'deg') - suffix = suffix.replace('′', 'arcmin') - suffix = suffix.replace('″', 'arcsec') - suffix = suffix.replace('µ', 'u') - suffix = suffix.replace('☉', 'sol') - suffix = suffix.replace('⊕', 'earth') - suffix = suffix.replace('☾', 'moon') - suffix = suffix.replace('♃', 'jupiter') - suffix = suffix.replace(' ', '_') - # Remove any remaining special characters - suffix = re.sub(r'[^a-zA-Z0-9_]', '', suffix) - return suffix if suffix else None - -def generate_literals_file(all_units: Dict[str, List[Tuple[str, str, str]]]) -> str: - """Generate the literals.hpp file for all units with valid symbols - - Creates user-defined literal operators for each unit that has a usable symbol. - Each literal gets two overloads: one for long double (123.45_m) and one for - unsigned long long (123_m) to support both floating-point and integer literals. - - Collision Detection: - If two units would produce the same literal suffix (e.g., 'nm' for both - Nanometer and NauticalMile), only the first is used and a warning is printed. - Users can still construct these units via their explicit constructors. - - Returns: - Complete content of literals.hpp as a string - """ - literal_sections = [] - - # Track used suffixes to detect collisions - used_suffixes = {} - - for dimension in ['Length', 'Time', 'Angle', 'Mass', 'Power']: - if dimension not in all_units: - continue - - units = all_units[dimension] - dimension_literals = [] - - for _, name, symbol in units: - suffix = make_literal_suffix(symbol) - if suffix: - # Check for collision - if suffix in used_suffixes: - # Skip this literal to avoid collision - # Could also use longer suffix like _nm vs _nanometer - print(f" Warning: Skipping literal _{suffix} for {name} (conflicts with {used_suffixes[suffix]})") - continue - - used_suffixes[suffix] = name - dimension_literals.append( - f"""constexpr {name} operator""_{suffix}(long double value) {{ - return {name}(static_cast(value)); -}} -constexpr {name} operator""_{suffix}(unsigned long long value) {{ - return {name}(static_cast(value)); -}}""") - - if dimension_literals: - literal_sections.append(f"// ====================\n// {dimension} literals\n// ====================\n") - literal_sections.append('\n\n'.join(dimension_literals)) - - return LITERALS_HEADER.format( - literal_definitions='\n\n'.join(literal_sections) - ) - -def main(): - script_dir = Path(__file__).parent - header_path = script_dir / 'qtty' / 'qtty-ffi' / 'include' / 'qtty_ffi.h' - include_dir = script_dir / 'include' / 'qtty' / 'units' - - if not header_path.exists(): - print(f"Error: {header_path} not found") - print("Make sure the qtty submodule is built (cargo build -p qtty-ffi)") - return 1 - - print(f"Reading units from: {header_path}") - units_by_dimension = parse_qtty_ffi_header(header_path) - - # Create include directory if it doesn't exist - include_dir.mkdir(parents=True, exist_ok=True) - - # Mapping of dimension names to file names - dimension_files = { - 'Length': 'length.hpp', - 'Time': 'time.hpp', - 'Angle': 'angular.hpp', - 'Mass': 'mass.hpp', - 'Power': 'power.hpp', - } - - # Generate header for each dimension - for dimension, filename in dimension_files.items(): - if dimension in units_by_dimension: - units = units_by_dimension[dimension] - header_content = generate_header_for_dimension(dimension, units) - - output_path = include_dir / filename - with open(output_path, 'w', encoding='utf-8') as f: - f.write(header_content) - - print(f"Generated {filename} with {len(units)} units") - else: - print(f"Warning: No units found for dimension {dimension}") - - # Generate literals.hpp - literals_path = script_dir / 'include' / 'qtty' / 'literals.hpp' - literals_content = generate_literals_file(units_by_dimension) - with open(literals_path, 'w', encoding='utf-8') as f: - f.write(literals_content) - print(f"Generated literals.hpp") - - # Print summary - total_units = sum(len(units) for units in units_by_dimension.values()) - print(f"\nTotal units generated: {total_units}") - for dimension, units in sorted(units_by_dimension.items()): - print(f" {dimension}: {len(units)} units") - - return 0 - -if __name__ == '__main__': - exit(main()) From f4ad13727211dff7a06c1b7c976b07d2b5757f81 Mon Sep 17 00:00:00 2001 From: VPRamon Date: Mon, 23 Feb 2026 15:16:34 +0100 Subject: [PATCH 3/3] format: clang-format generated qtty headers --- include/qtty/literals.hpp | 19 ++++++++++++------- include/qtty/units/length.hpp | 27 ++++++++++++++++++++------- 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/include/qtty/literals.hpp b/include/qtty/literals.hpp index b360c2f..de3dbb4 100644 --- a/include/qtty/literals.hpp +++ b/include/qtty/literals.hpp @@ -3,11 +3,11 @@ #pragma once -#include "units/length.hpp" -#include "units/time.hpp" #include "units/angular.hpp" +#include "units/length.hpp" #include "units/mass.hpp" #include "units/power.hpp" +#include "units/time.hpp" namespace qtty { @@ -189,7 +189,8 @@ constexpr ClassicalElectronRadius operator""_r_e(unsigned long long value) { constexpr ElectronReducedComptonWavelength operator""__e(long double value) { return ElectronReducedComptonWavelength(static_cast(value)); } -constexpr ElectronReducedComptonWavelength operator""__e(unsigned long long value) { +constexpr ElectronReducedComptonWavelength +operator""__e(unsigned long long value) { return ElectronReducedComptonWavelength(static_cast(value)); } @@ -315,7 +316,8 @@ constexpr NominalLunarDistance operator""_LD(unsigned long long value) { constexpr NominalEarthPolarRadius operator""_R_earthpol(long double value) { return NominalEarthPolarRadius(static_cast(value)); } -constexpr NominalEarthPolarRadius operator""_R_earthpol(unsigned long long value) { +constexpr NominalEarthPolarRadius +operator""_R_earthpol(unsigned long long value) { return NominalEarthPolarRadius(static_cast(value)); } @@ -329,21 +331,24 @@ constexpr NominalEarthRadius operator""_R_earth(unsigned long long value) { constexpr NominalEarthEquatorialRadius operator""_R_eartheq(long double value) { return NominalEarthEquatorialRadius(static_cast(value)); } -constexpr NominalEarthEquatorialRadius operator""_R_eartheq(unsigned long long value) { +constexpr NominalEarthEquatorialRadius +operator""_R_eartheq(unsigned long long value) { return NominalEarthEquatorialRadius(static_cast(value)); } constexpr EarthMeridionalCircumference operator""_C_mer(long double value) { return EarthMeridionalCircumference(static_cast(value)); } -constexpr EarthMeridionalCircumference operator""_C_mer(unsigned long long value) { +constexpr EarthMeridionalCircumference +operator""_C_mer(unsigned long long value) { return EarthMeridionalCircumference(static_cast(value)); } constexpr EarthEquatorialCircumference operator""_C_eq(long double value) { return EarthEquatorialCircumference(static_cast(value)); } -constexpr EarthEquatorialCircumference operator""_C_eq(unsigned long long value) { +constexpr EarthEquatorialCircumference +operator""_C_eq(unsigned long long value) { return EarthEquatorialCircumference(static_cast(value)); } diff --git a/include/qtty/units/length.hpp b/include/qtty/units/length.hpp index 67efda6..f0c629a 100644 --- a/include/qtty/units/length.hpp +++ b/include/qtty/units/length.hpp @@ -151,11 +151,15 @@ template <> struct UnitTraits { static constexpr std::string_view symbol() { return "a₀"; } }; template <> struct UnitTraits { - static constexpr UnitId unit_id() { return UNIT_ID_CLASSICAL_ELECTRON_RADIUS; } + static constexpr UnitId unit_id() { + return UNIT_ID_CLASSICAL_ELECTRON_RADIUS; + } static constexpr std::string_view symbol() { return "r_e"; } }; template <> struct UnitTraits { - static constexpr UnitId unit_id() { return UNIT_ID_ELECTRON_REDUCED_COMPTON_WAVELENGTH; } + static constexpr UnitId unit_id() { + return UNIT_ID_ELECTRON_REDUCED_COMPTON_WAVELENGTH; + } static constexpr std::string_view symbol() { return "λ̄_e"; } }; template <> struct UnitTraits { @@ -227,7 +231,9 @@ template <> struct UnitTraits { static constexpr std::string_view symbol() { return "LD"; } }; template <> struct UnitTraits { - static constexpr UnitId unit_id() { return UNIT_ID_NOMINAL_EARTH_POLAR_RADIUS; } + static constexpr UnitId unit_id() { + return UNIT_ID_NOMINAL_EARTH_POLAR_RADIUS; + } static constexpr std::string_view symbol() { return "R_⊕pol"; } }; template <> struct UnitTraits { @@ -235,15 +241,21 @@ template <> struct UnitTraits { static constexpr std::string_view symbol() { return "R_⊕"; } }; template <> struct UnitTraits { - static constexpr UnitId unit_id() { return UNIT_ID_NOMINAL_EARTH_EQUATORIAL_RADIUS; } + static constexpr UnitId unit_id() { + return UNIT_ID_NOMINAL_EARTH_EQUATORIAL_RADIUS; + } static constexpr std::string_view symbol() { return "R_⊕eq"; } }; template <> struct UnitTraits { - static constexpr UnitId unit_id() { return UNIT_ID_EARTH_MERIDIONAL_CIRCUMFERENCE; } + static constexpr UnitId unit_id() { + return UNIT_ID_EARTH_MERIDIONAL_CIRCUMFERENCE; + } static constexpr std::string_view symbol() { return "C_mer"; } }; template <> struct UnitTraits { - static constexpr UnitId unit_id() { return UNIT_ID_EARTH_EQUATORIAL_CIRCUMFERENCE; } + static constexpr UnitId unit_id() { + return UNIT_ID_EARTH_EQUATORIAL_CIRCUMFERENCE; + } static constexpr std::string_view symbol() { return "C_eq"; } }; template <> struct UnitTraits { @@ -283,7 +295,8 @@ using Zettameter = Quantity; using Yottameter = Quantity; using BohrRadius = Quantity; using ClassicalElectronRadius = Quantity; -using ElectronReducedComptonWavelength = Quantity; +using ElectronReducedComptonWavelength = + Quantity; using AstronomicalUnit = Quantity; using LightYear = Quantity; using Parsec = Quantity;