From 8c3d05621b535a86df71e7649c3feb21237fae8e Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Wed, 18 Feb 2026 00:09:55 +0000 Subject: [PATCH 1/7] feat: unify name demangling with single Lean implementation This PR replaces three independent name demangling implementations (Lean, C++, Python) with a single source of truth in Lean. The new `Lean.Compiler.NameDemangling` module handles the full pipeline: prefix parsing, postprocessing (suffix flags, private names, hygienic stripping, specialization contexts), backtrace line parsing, and C exports. The C++ runtime backtrace handler now calls `@[export]`ed Lean functions instead of its own reimplementation. The Python profiler demangler is replaced with a thin subprocess wrapper around a Lean CLI tool. Co-Authored-By: Claude Opus 4.6 --- script/profiler/lean_demangle.py | 801 ++----------------------- script/profiler/lean_demangle_cli.lean | 32 + script/profiler/test_demangle.py | 670 --------------------- src/Lean.lean | 1 + src/Lean/Compiler/NameDemangling.lean | 487 +++++++++++++++ src/runtime/CMakeLists.txt | 1 - src/runtime/demangle.cpp | 792 ------------------------ src/runtime/demangle.h | 26 - src/runtime/object.cpp | 22 +- tests/lean/run/demangling.lean | 281 +++++++++ 10 files changed, 868 insertions(+), 2245 deletions(-) create mode 100644 script/profiler/lean_demangle_cli.lean delete mode 100644 script/profiler/test_demangle.py create mode 100644 src/Lean/Compiler/NameDemangling.lean delete mode 100644 src/runtime/demangle.cpp delete mode 100644 src/runtime/demangle.h create mode 100644 tests/lean/run/demangling.lean diff --git a/script/profiler/lean_demangle.py b/script/profiler/lean_demangle.py index a637a2f7b390..5599940517ca 100644 --- a/script/profiler/lean_demangle.py +++ b/script/profiler/lean_demangle.py @@ -1,9 +1,11 @@ #!/usr/bin/env python3 """ -Lean name demangler. +Lean name demangler — thin wrapper around the Lean CLI tool. -Demangles C symbol names produced by the Lean 4 compiler back into -readable Lean hierarchical names. +Spawns ``lean --run lean_demangle_cli.lean`` as a persistent subprocess +and communicates via stdin/stdout pipes. This ensures a single source +of truth for demangling logic (the Lean implementation in +``Lean.Compiler.NameDemangling``). Usage as a filter (like c++filt): echo "l_Lean_Meta_Sym_main" | python lean_demangle.py @@ -13,767 +15,68 @@ print(demangle_lean_name("l_Lean_Meta_Sym_main")) """ +import atexit +import os +import subprocess import sys - -# --------------------------------------------------------------------------- -# String.mangle / unmangle -# --------------------------------------------------------------------------- - -def _is_ascii_alnum(ch): - """Check if ch is an ASCII letter or digit (matching Lean's isAlpha/isDigit).""" - return ('a' <= ch <= 'z') or ('A' <= ch <= 'Z') or ('0' <= ch <= '9') - - -def mangle_string(s): - """Port of Lean's String.mangle: escape a single string for C identifiers.""" - result = [] - for ch in s: - if _is_ascii_alnum(ch): - result.append(ch) - elif ch == '_': - result.append('__') - else: - code = ord(ch) - if code < 0x100: - result.append('_x' + format(code, '02x')) - elif code < 0x10000: - result.append('_u' + format(code, '04x')) - else: - result.append('_U' + format(code, '08x')) - return ''.join(result) - - -def _parse_hex(s, pos, n): - """Parse n lowercase hex digits at pos. Returns (new_pos, value) or None.""" - if pos + n > len(s): - return None - val = 0 - for i in range(n): - c = s[pos + i] - if '0' <= c <= '9': - val = (val << 4) | (ord(c) - ord('0')) - elif 'a' <= c <= 'f': - val = (val << 4) | (ord(c) - ord('a') + 10) - else: - return None - return (pos + n, val) - - -# --------------------------------------------------------------------------- -# Name mangling (for round-trip verification) -# --------------------------------------------------------------------------- - -def _check_disambiguation(m): - """Port of Lean's checkDisambiguation: does mangled string m need a '00' prefix?""" - pos = 0 - while pos < len(m): - ch = m[pos] - if ch == '_': - pos += 1 - continue - if ch == 'x': - return _parse_hex(m, pos + 1, 2) is not None - if ch == 'u': - return _parse_hex(m, pos + 1, 4) is not None - if ch == 'U': - return _parse_hex(m, pos + 1, 8) is not None - if '0' <= ch <= '9': - return True - return False - # all underscores or empty - return True - - -def _need_disambiguation(prev_component, mangled_next): - """Port of Lean's needDisambiguation.""" - # Check if previous component (as a string) ends with '_' - prev_ends_underscore = (isinstance(prev_component, str) and - len(prev_component) > 0 and - prev_component[-1] == '_') - return prev_ends_underscore or _check_disambiguation(mangled_next) - - -def mangle_name(components, prefix="l_"): - """ - Mangle a list of name components (str or int) into a C symbol. - Port of Lean's Name.mangle. - """ - if not components: - return prefix - - parts = [] - prev = None - for i, comp in enumerate(components): - if isinstance(comp, int): - if i == 0: - parts.append(str(comp) + '_') - else: - parts.append('_' + str(comp) + '_') - else: - m = mangle_string(comp) - if i == 0: - if _check_disambiguation(m): - parts.append('00' + m) - else: - parts.append(m) - else: - if _need_disambiguation(prev, m): - parts.append('_00' + m) - else: - parts.append('_' + m) - prev = comp - - return prefix + ''.join(parts) - - -# --------------------------------------------------------------------------- -# Name demangling -# --------------------------------------------------------------------------- - -def demangle_body(s): - """ - Demangle a string produced by Name.mangleAux (without prefix). - Returns a list of components (str or int). - - This is a faithful port of Lean's Name.demangleAux from NameMangling.lean. - """ - components = [] - length = len(s) - - def emit(comp): - components.append(comp) - - def decode_num(pos, n): - """Parse remaining digits, emit numeric component, continue.""" - while pos < length: - ch = s[pos] - if '0' <= ch <= '9': - n = n * 10 + (ord(ch) - ord('0')) - pos += 1 - else: - # Expect '_' (trailing underscore of numeric encoding) - pos += 1 # skip '_' - emit(n) - if pos >= length: - return pos - # Skip separator '_' and go to name_start - pos += 1 - return name_start(pos) - # End of string - emit(n) - return pos - - def name_start(pos): - """Start parsing a new name component.""" - if pos >= length: - return pos - ch = s[pos] - pos += 1 - if '0' <= ch <= '9': - # Check for '00' disambiguation - if ch == '0' and pos < length and s[pos] == '0': - pos += 1 - return demangle_main(pos, "", 0) - else: - return decode_num(pos, ord(ch) - ord('0')) - elif ch == '_': - return demangle_main(pos, "", 1) - else: - return demangle_main(pos, ch, 0) - - def demangle_main(pos, acc, ucount): - """Main demangling loop.""" - while pos < length: - ch = s[pos] - pos += 1 - - if ch == '_': - ucount += 1 - continue - - if ucount % 2 == 0: - # Even underscores: literal underscores in component name - acc += '_' * (ucount // 2) + ch - ucount = 0 - continue - - # Odd ucount: separator or escape - if '0' <= ch <= '9': - # End current str component, start number - emit(acc + '_' * (ucount // 2)) - if ch == '0' and pos < length and s[pos] == '0': - pos += 1 - return demangle_main(pos, "", 0) - else: - return decode_num(pos, ord(ch) - ord('0')) - - # Try hex escapes - if ch == 'x': - result = _parse_hex(s, pos, 2) - if result is not None: - new_pos, val = result - acc += '_' * (ucount // 2) + chr(val) - pos = new_pos - ucount = 0 - continue - - if ch == 'u': - result = _parse_hex(s, pos, 4) - if result is not None: - new_pos, val = result - acc += '_' * (ucount // 2) + chr(val) - pos = new_pos - ucount = 0 - continue - - if ch == 'U': - result = _parse_hex(s, pos, 8) - if result is not None: - new_pos, val = result - acc += '_' * (ucount // 2) + chr(val) - pos = new_pos - ucount = 0 - continue - - # Name separator - emit(acc) - acc = '_' * (ucount // 2) + ch - ucount = 0 - - # End of string - acc += '_' * (ucount // 2) - if acc: - emit(acc) - return pos - - name_start(0) - return components - - -# --------------------------------------------------------------------------- -# Prefix handling for lp_ (package prefix) -# --------------------------------------------------------------------------- - -def _is_valid_string_mangle(s): - """Check if s is a valid output of String.mangle (no trailing bare _).""" - pos = 0 - length = len(s) - while pos < length: - ch = s[pos] - if _is_ascii_alnum(ch): - pos += 1 - elif ch == '_': - if pos + 1 >= length: - return False # trailing bare _ - nch = s[pos + 1] - if nch == '_': - pos += 2 - elif nch == 'x' and _parse_hex(s, pos + 2, 2) is not None: - pos = _parse_hex(s, pos + 2, 2)[0] - elif nch == 'u' and _parse_hex(s, pos + 2, 4) is not None: - pos = _parse_hex(s, pos + 2, 4)[0] - elif nch == 'U' and _parse_hex(s, pos + 2, 8) is not None: - pos = _parse_hex(s, pos + 2, 8)[0] - else: - return False - else: - return False - return True - - -def _skip_string_mangle(s, pos): - """ - Skip past a String.mangle output in s starting at pos. - Returns the position after the mangled string (where we expect the separator '_'). - This is a greedy scan. - """ - length = len(s) - while pos < length: - ch = s[pos] - if _is_ascii_alnum(ch): - pos += 1 - elif ch == '_': - if pos + 1 < length: - nch = s[pos + 1] - if nch == '_': - pos += 2 - elif nch == 'x' and _parse_hex(s, pos + 2, 2) is not None: - pos = _parse_hex(s, pos + 2, 2)[0] - elif nch == 'u' and _parse_hex(s, pos + 2, 4) is not None: - pos = _parse_hex(s, pos + 2, 4)[0] - elif nch == 'U' and _parse_hex(s, pos + 2, 8) is not None: - pos = _parse_hex(s, pos + 2, 8)[0] - else: - return pos # bare '_': separator - else: - return pos - else: - return pos - return pos - - -def _find_lp_body(s): - """ - Given s = everything after 'lp_' in a symbol, find where the declaration - body (Name.mangleAux output) starts. - Returns the start index of the body within s, or None. - - Strategy: try all candidate split points where the package part is a valid - String.mangle output and the body round-trips. Prefer the longest valid - package name (most specific match). - """ - length = len(s) - - # Collect candidate split positions: every '_' that could be the separator - candidates = [] - pos = 0 - while pos < length: - if s[pos] == '_': - candidates.append(pos) - pos += 1 - - # Try each candidate; collect all valid splits - valid_splits = [] - for split_pos in candidates: - pkg_part = s[:split_pos] - if not pkg_part: - continue - if not _is_valid_string_mangle(pkg_part): - continue - body = s[split_pos + 1:] - if not body: - continue - components = demangle_body(body) - if not components: - continue - remangled = mangle_name(components, prefix="") - if remangled == body: - first = components[0] - # Score: prefer first component starting with uppercase - has_upper = isinstance(first, str) and first and first[0].isupper() - valid_splits.append((split_pos, has_upper)) - - if valid_splits: - # Among splits where first decl component starts uppercase, pick longest pkg. - # Otherwise pick shortest pkg. - upper_splits = [s for s in valid_splits if s[1]] - if upper_splits: - best = max(upper_splits, key=lambda x: x[0]) - else: - best = min(valid_splits, key=lambda x: x[0]) - return best[0] + 1 - - # Fallback: greedy String.mangle scan - greedy_pos = _skip_string_mangle(s, 0) - if greedy_pos < length and s[greedy_pos] == '_': - return greedy_pos + 1 - - return None - - -# --------------------------------------------------------------------------- -# Format name components for display -# --------------------------------------------------------------------------- - -def format_name(components): - """Format a list of name components as a dot-separated string.""" - return '.'.join(str(c) for c in components) - - -# --------------------------------------------------------------------------- -# Human-friendly postprocessing -# --------------------------------------------------------------------------- - -# Compiler-generated suffix components — exact match -_SUFFIX_FLAGS_EXACT = { - '_redArg': 'arity\u2193', - '_boxed': 'boxed', - '_impl': 'impl', -} - -# Compiler-generated suffix prefixes — match with optional _N index -# e.g., _lam, _lam_0, _lam_3, _lambda_0, _closed_2 -_SUFFIX_FLAGS_PREFIX = { - '_lam': '\u03bb', - '_lambda': '\u03bb', - '_elam': '\u03bb', - '_jp': 'jp', - '_closed': 'closed', -} - - -def _match_suffix(component): - """ - Check if a string component is a compiler-generated suffix. - Returns the flag label or None. - - Handles both exact matches (_redArg, _boxed) and indexed suffixes - (_lam_0, _lambda_2, _closed_0) produced by appendIndexAfter. - """ - if not isinstance(component, str): - return None - if component in _SUFFIX_FLAGS_EXACT: - return _SUFFIX_FLAGS_EXACT[component] - if component in _SUFFIX_FLAGS_PREFIX: - return _SUFFIX_FLAGS_PREFIX[component] - # Check for indexed suffix: prefix + _N - for prefix, label in _SUFFIX_FLAGS_PREFIX.items(): - if component.startswith(prefix + '_'): - rest = component[len(prefix) + 1:] - if rest.isdigit(): - return label - return None - - -def _strip_private(components): - """Strip _private.Module.0. prefix. Returns (stripped_parts, is_private).""" - if (len(components) >= 3 and isinstance(components[0], str) and - components[0] == '_private'): - for i in range(1, len(components)): - if components[i] == 0: - if i + 1 < len(components): - return components[i + 1:], True - break - return components, False - - -def _strip_spec_suffixes(components): - """Strip trailing spec_N components (from appendIndexAfter).""" - parts = list(components) - while parts and isinstance(parts[-1], str) and parts[-1].startswith('spec_'): - rest = parts[-1][5:] - if rest.isdigit(): - parts.pop() - else: - break - return parts - - -def _is_spec_index(component): - """Check if a component is a spec_N index (from appendIndexAfter).""" - return (isinstance(component, str) and - component.startswith('spec_') and component[5:].isdigit()) - - -def _parse_spec_entries(rest): - """Parse _at_..._spec pairs into separate spec context entries. - - Given components starting from the first _at_, returns: - - entries: list of component lists, one per _at_..._spec block - - remaining: components after the last _spec N (trailing suffixes) - """ - entries = [] - current_ctx = None - remaining = [] - skip_next = False - - for p in rest: - if skip_next: - skip_next = False - continue - if isinstance(p, str) and p == '_at_': - if current_ctx is not None: - entries.append(current_ctx) - current_ctx = [] - continue - if isinstance(p, str) and p == '_spec': - if current_ctx is not None: - entries.append(current_ctx) - current_ctx = None - skip_next = True - continue - if isinstance(p, str) and p.startswith('_spec'): - if current_ctx is not None: - entries.append(current_ctx) - current_ctx = None - continue - if current_ctx is not None: - current_ctx.append(p) - else: - remaining.append(p) - - if current_ctx is not None: - entries.append(current_ctx) - - return entries, remaining - - -def _process_spec_context(components): - """Process a spec context into a clean name and its flags. - - Returns (name_parts, flags) where name_parts are the cleaned components - and flags is a deduplicated list of flag labels from compiler suffixes. - """ - parts = list(components) - parts, _ = _strip_private(parts) - - name_parts = [] - ctx_flags = [] - seen = set() - - for p in parts: - flag = _match_suffix(p) - if flag is not None: - if flag not in seen: - ctx_flags.append(flag) - seen.add(flag) - elif _is_spec_index(p): - pass - else: - name_parts.append(p) - - return name_parts, ctx_flags - - -def postprocess_name(components): - """ - Transform raw demangled components into a human-friendly display string. - - Applies: - - Private name cleanup: _private.Module.0.Name.foo -> Name.foo [private] - - Hygienic name cleanup: strips _@.module._hygCtx._hyg.N - - Suffix folding: _redArg, _boxed, _lam_0, etc. -> [flags] - - Specialization: f._at_.g._spec.N -> f spec at g - Shown after base [flags], with context flags: spec at g[ctx_flags] - """ - if not components: - return "" - - parts = list(components) - flags = [] - spec_entries = [] - - # --- Strip _private prefix --- - parts, is_private = _strip_private(parts) - - # --- Strip hygienic suffixes: everything from _@ onward --- - at_idx = None - for i, p in enumerate(parts): - if isinstance(p, str) and p.startswith('_@'): - at_idx = i - break - if at_idx is not None: - parts = parts[:at_idx] - - # --- Handle specialization: _at_ ... _spec N --- - at_positions = [i for i, p in enumerate(parts) - if isinstance(p, str) and p == '_at_'] - if at_positions: - first_at = at_positions[0] - base = parts[:first_at] - rest = parts[first_at:] - - entries, remaining = _parse_spec_entries(rest) - for ctx_components in entries: - ctx_name, ctx_flags = _process_spec_context(ctx_components) - if ctx_name or ctx_flags: - spec_entries.append((ctx_name, ctx_flags)) - - parts = base + remaining - - # --- Collect suffix flags from the end --- - while parts: - last = parts[-1] - flag = _match_suffix(last) - if flag is not None: - flags.append(flag) - parts.pop() - elif isinstance(last, int) and len(parts) >= 2: - prev_flag = _match_suffix(parts[-2]) - if prev_flag is not None: - flags.append(prev_flag) - parts.pop() # remove the number - parts.pop() # remove the suffix - else: - break - else: - break - - if is_private: - flags.append('private') - - # --- Format result --- - name = '.'.join(str(c) for c in parts) if parts else '?' - result = name - if flags: - flag_str = ', '.join(flags) - result += f' [{flag_str}]' - - for ctx_name, ctx_flags in spec_entries: - ctx_str = '.'.join(str(c) for c in ctx_name) if ctx_name else '?' - if ctx_flags: - ctx_flag_str = ', '.join(ctx_flags) - result += f' spec at {ctx_str}[{ctx_flag_str}]' - else: - result += f' spec at {ctx_str}' - - return result - - -# --------------------------------------------------------------------------- -# Main demangling entry point -# --------------------------------------------------------------------------- - -def demangle_lean_name_raw(mangled): - """ - Demangle a Lean C symbol, preserving all internal name components. - - Returns the exact demangled name with all compiler-generated suffixes - intact. Use demangle_lean_name() for human-friendly output. - """ - try: - return _demangle_lean_name_inner(mangled, human_friendly=False) - except Exception: - return mangled +_process = None +_script_dir = os.path.dirname(os.path.abspath(__file__)) +_cli_script = os.path.join(_script_dir, "lean_demangle_cli.lean") + + +def _get_process(): + """Get or create the persistent Lean demangler subprocess.""" + global _process + if _process is not None and _process.poll() is None: + return _process + + lean = os.environ.get("LEAN", "lean") + _process = subprocess.Popen( + [lean, "--run", _cli_script], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + text=True, + bufsize=1, # line buffered + ) + atexit.register(_cleanup) + return _process + + +def _cleanup(): + global _process + if _process is not None: + try: + _process.stdin.close() + _process.wait(timeout=5) + except Exception: + _process.kill() + _process = None def demangle_lean_name(mangled): """ Demangle a C symbol name produced by the Lean 4 compiler. - Returns a human-friendly demangled name with compiler suffixes folded - into readable flags. Use demangle_lean_name_raw() to preserve all - internal components. + Returns a human-friendly demangled name, or the original string + if it is not a Lean symbol. """ try: - return _demangle_lean_name_inner(mangled, human_friendly=True) + proc = _get_process() + proc.stdin.write(mangled + "\n") + proc.stdin.flush() + result = proc.stdout.readline().rstrip("\n") + return result if result else mangled except Exception: return mangled -def _demangle_lean_name_inner(mangled, human_friendly=True): - """Inner demangle that may raise on malformed input.""" - - if mangled == "_lean_main": - return "[lean] main" - - # Handle lean_ runtime functions - if human_friendly and mangled.startswith("lean_apply_"): - rest = mangled[11:] - if rest.isdigit(): - return f"" - - # Strip .cold.N suffix (LLVM linker cold function clones) - cold_suffix = "" - core = mangled - dot_pos = core.find('.cold.') - if dot_pos >= 0: - cold_suffix = " " + core[dot_pos:] - core = core[:dot_pos] - elif core.endswith('.cold'): - cold_suffix = " .cold" - core = core[:-5] - - result = _demangle_core(core, human_friendly) - if result is None: - return mangled - return result + cold_suffix - - -def _demangle_core(mangled, human_friendly=True): - """Demangle a symbol without .cold suffix. Returns None if not a Lean name.""" - - fmt = postprocess_name if human_friendly else format_name - - # _init_ prefix - if mangled.startswith("_init_"): - rest = mangled[6:] - body, pkg_display = _strip_lean_prefix(rest) - if body is None: - return None - components = demangle_body(body) - if not components: - return None - name = fmt(components) - if pkg_display: - return f"[init] {name} ({pkg_display})" - return f"[init] {name}" - - # initialize_ prefix (module init functions) - if mangled.startswith("initialize_"): - rest = mangled[11:] - # With package: initialize_lp_{pkg}_{body} or initialize_l_{body} - body, pkg_display = _strip_lean_prefix(rest) - if body is not None: - components = demangle_body(body) - if components: - name = fmt(components) - if pkg_display: - return f"[module_init] {name} ({pkg_display})" - return f"[module_init] {name}" - # Without package: initialize_{Name.mangleAux(moduleName)} - if rest: - components = demangle_body(rest) - if components: - return f"[module_init] {fmt(components)}" - return None - - # l_ or lp_ prefix - body, pkg_display = _strip_lean_prefix(mangled) - if body is None: - return None - components = demangle_body(body) - if not components: - return None - name = fmt(components) - if pkg_display: - return f"{name} ({pkg_display})" - return name - - -def _strip_lean_prefix(s): - """ - Strip the l_ or lp_ prefix from a mangled symbol. - Returns (body, pkg_display) where body is the Name.mangleAux output - and pkg_display is None or a string describing the package. - Returns (None, None) if the string doesn't have a recognized prefix. - """ - if s.startswith("l_"): - return (s[2:], None) - - if s.startswith("lp_"): - after_lp = s[3:] - body_start = _find_lp_body(after_lp) - if body_start is not None: - pkg_mangled = after_lp[:body_start - 1] - # Unmangle the package name - pkg_components = demangle_body(pkg_mangled) - if pkg_components and len(pkg_components) == 1 and isinstance(pkg_components[0], str): - pkg_display = pkg_components[0] - else: - pkg_display = pkg_mangled - return (after_lp[body_start:], pkg_display) - # Fallback: treat everything after lp_ as body - return (after_lp, "?") - - return (None, None) - - -# --------------------------------------------------------------------------- -# CLI -# --------------------------------------------------------------------------- - def main(): - """Filter stdin or arguments, demangling Lean names.""" - import argparse - parser = argparse.ArgumentParser( - description="Demangle Lean 4 C symbol names (like c++filt for Lean)") - parser.add_argument('names', nargs='*', - help='Names to demangle (reads stdin if none given)') - parser.add_argument('--raw', action='store_true', - help='Output exact demangled names without postprocessing') - args = parser.parse_args() - - demangle = demangle_lean_name_raw if args.raw else demangle_lean_name - - if args.names: - for name in args.names: - print(demangle(name)) - else: - for line in sys.stdin: - print(demangle(line.rstrip('\n'))) + """Filter stdin, demangling Lean names.""" + for line in sys.stdin: + print(demangle_lean_name(line.rstrip("\n"))) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/script/profiler/lean_demangle_cli.lean b/script/profiler/lean_demangle_cli.lean new file mode 100644 index 000000000000..3221b1822ee1 --- /dev/null +++ b/script/profiler/lean_demangle_cli.lean @@ -0,0 +1,32 @@ +/- +Copyright (c) 2026 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Kim Morrison +-/ +module + +import Lean.Compiler.NameDemangling + +/-! +Lean name demangler CLI tool. Reads mangled symbol names from stdin (one per +line) and writes demangled names to stdout. Non-Lean symbols pass through +unchanged. Like `c++filt` but for Lean names. + +Usage: + echo "l_Lean_Meta_foo" | lean --run lean_demangle_cli.lean + cat symbols.txt | lean --run lean_demangle_cli.lean +-/ + +open Lean.Name.Demangle + +def main : IO Unit := do + let stdin ← IO.getStdin + let stdout ← IO.getStdout + repeat do + let line ← stdin.getLine + if line.isEmpty then break + let sym := line.trimRight + match demangleSymbol sym with + | some s => stdout.putStrLn s + | none => stdout.putStrLn sym + stdout.flush diff --git a/script/profiler/test_demangle.py b/script/profiler/test_demangle.py deleted file mode 100644 index effffaf0846c..000000000000 --- a/script/profiler/test_demangle.py +++ /dev/null @@ -1,670 +0,0 @@ -#!/usr/bin/env python3 -"""Tests for the Lean name demangler.""" - -import unittest -import json -import gzip -import tempfile -import os - -from lean_demangle import ( - mangle_string, mangle_name, demangle_body, format_name, - demangle_lean_name, demangle_lean_name_raw, postprocess_name, - _parse_hex, _check_disambiguation, -) - - -class TestStringMangle(unittest.TestCase): - """Test String.mangle (character-level escaping).""" - - def test_alphanumeric(self): - self.assertEqual(mangle_string("hello"), "hello") - self.assertEqual(mangle_string("abc123"), "abc123") - - def test_underscore(self): - self.assertEqual(mangle_string("a_b"), "a__b") - self.assertEqual(mangle_string("_"), "__") - self.assertEqual(mangle_string("__"), "____") - - def test_special_chars(self): - self.assertEqual(mangle_string("."), "_x2e") - self.assertEqual(mangle_string("a.b"), "a_x2eb") - - def test_unicode(self): - self.assertEqual(mangle_string("\u03bb"), "_u03bb") - self.assertEqual(mangle_string("\U0001d55c"), "_U0001d55c") - - def test_empty(self): - self.assertEqual(mangle_string(""), "") - - -class TestNameMangle(unittest.TestCase): - """Test Name.mangle (hierarchical name mangling).""" - - def test_simple(self): - self.assertEqual(mangle_name(["Lean", "Meta", "Sym", "main"]), - "l_Lean_Meta_Sym_main") - - def test_single_component(self): - self.assertEqual(mangle_name(["main"]), "l_main") - - def test_numeric_component(self): - self.assertEqual( - mangle_name(["_private", "Lean", "Meta", "Basic", 0, - "Lean", "Meta", "withMVarContextImp"]), - "l___private_Lean_Meta_Basic_0__Lean_Meta_withMVarContextImp") - - def test_component_with_underscore(self): - self.assertEqual(mangle_name(["a_b"]), "l_a__b") - self.assertEqual(mangle_name(["a_b", "c"]), "l_a__b_c") - - def test_disambiguation_digit_start(self): - self.assertEqual(mangle_name(["0foo"]), "l_000foo") - - def test_disambiguation_escape_start(self): - self.assertEqual(mangle_name(["a", "x27"]), "l_a_00x27") - - def test_numeric_root(self): - self.assertEqual(mangle_name([42]), "l_42_") - self.assertEqual(mangle_name([42, "foo"]), "l_42__foo") - - def test_component_ending_with_underscore(self): - self.assertEqual(mangle_name(["a_", "b"]), "l_a___00b") - - def test_custom_prefix(self): - self.assertEqual(mangle_name(["foo"], prefix="lp_pkg_"), - "lp_pkg_foo") - - -class TestDemangleBody(unittest.TestCase): - """Test demangle_body (the core Name.demangleAux algorithm).""" - - def test_simple(self): - self.assertEqual(demangle_body("Lean_Meta_Sym_main"), - ["Lean", "Meta", "Sym", "main"]) - - def test_single(self): - self.assertEqual(demangle_body("main"), ["main"]) - - def test_empty(self): - self.assertEqual(demangle_body(""), []) - - def test_underscore_in_component(self): - self.assertEqual(demangle_body("a__b"), ["a_b"]) - self.assertEqual(demangle_body("a__b_c"), ["a_b", "c"]) - - def test_numeric_component(self): - self.assertEqual(demangle_body("foo_42__bar"), ["foo", 42, "bar"]) - - def test_numeric_root(self): - self.assertEqual(demangle_body("42_"), [42]) - - def test_numeric_at_end(self): - self.assertEqual(demangle_body("foo_42_"), ["foo", 42]) - - def test_disambiguation_00(self): - self.assertEqual(demangle_body("a_00x27"), ["a", "x27"]) - - def test_disambiguation_00_at_root(self): - self.assertEqual(demangle_body("000foo"), ["0foo"]) - - def test_hex_escape_x(self): - self.assertEqual(demangle_body("a_x2eb"), ["a.b"]) - - def test_hex_escape_u(self): - self.assertEqual(demangle_body("_u03bb"), ["\u03bb"]) - - def test_hex_escape_U(self): - self.assertEqual(demangle_body("_U0001d55c"), ["\U0001d55c"]) - - def test_private_name(self): - body = "__private_Lean_Meta_Basic_0__Lean_Meta_withMVarContextImp" - self.assertEqual(demangle_body(body), - ["_private", "Lean", "Meta", "Basic", 0, - "Lean", "Meta", "withMVarContextImp"]) - - def test_boxed_suffix(self): - body = "foo___boxed" - self.assertEqual(demangle_body(body), ["foo", "_boxed"]) - - def test_redArg_suffix(self): - body = "foo_bar___redArg" - self.assertEqual(demangle_body(body), ["foo", "bar", "_redArg"]) - - def test_component_ending_underscore_disambiguation(self): - self.assertEqual(demangle_body("a___00b"), ["a_", "b"]) - - -class TestRoundTrip(unittest.TestCase): - """Test that mangle(demangle(x)) == x for various names.""" - - def _check_roundtrip(self, components): - mangled = mangle_name(components, prefix="") - demangled = demangle_body(mangled) - self.assertEqual(demangled, components, - f"Round-trip failed: {components} -> '{mangled}' -> {demangled}") - mangled_with_prefix = mangle_name(components, prefix="l_") - self.assertTrue(mangled_with_prefix.startswith("l_")) - body = mangled_with_prefix[2:] - demangled2 = demangle_body(body) - self.assertEqual(demangled2, components) - - def test_simple_names(self): - self._check_roundtrip(["Lean", "Meta", "main"]) - self._check_roundtrip(["a"]) - self._check_roundtrip(["Foo", "Bar", "baz"]) - - def test_numeric(self): - self._check_roundtrip(["foo", 0, "bar"]) - self._check_roundtrip([42]) - self._check_roundtrip(["a", 1, "b", 2, "c"]) - - def test_underscores(self): - self._check_roundtrip(["_private"]) - self._check_roundtrip(["a_b", "c_d"]) - self._check_roundtrip(["_at_", "_spec"]) - - def test_private_name(self): - self._check_roundtrip(["_private", "Lean", "Meta", "Basic", 0, - "Lean", "Meta", "withMVarContextImp"]) - - def test_boxed(self): - self._check_roundtrip(["Lean", "Meta", "foo", "_boxed"]) - - def test_redArg(self): - self._check_roundtrip(["Lean", "Meta", "foo", "_redArg"]) - - def test_specialization(self): - self._check_roundtrip(["List", "map", "_at_", "Foo", "bar", "_spec", 3]) - - def test_lambda(self): - self._check_roundtrip(["Foo", "bar", "_lambda", 0]) - self._check_roundtrip(["Foo", "bar", "_lambda", 2]) - - def test_closed(self): - self._check_roundtrip(["myConst", "_closed", 0]) - - def test_special_chars(self): - self._check_roundtrip(["a.b"]) - self._check_roundtrip(["\u03bb"]) - self._check_roundtrip(["a", "b\u2192c"]) - - def test_disambiguation_cases(self): - self._check_roundtrip(["a", "x27"]) - self._check_roundtrip(["0foo"]) - self._check_roundtrip(["a_", "b"]) - - def test_complex_real_names(self): - """Names modeled after real Lean compiler output.""" - self._check_roundtrip( - ["Lean", "MVarId", "withContext", "_at_", - "_private", "Lean", "Meta", "Sym", 0, - "Lean", "Meta", "Sym", "BackwardRule", "apply", - "_spec", 2, "_redArg", "_lambda", 0, "_boxed"]) - - -class TestDemangleRaw(unittest.TestCase): - """Test demangle_lean_name_raw (exact demangling, no postprocessing).""" - - def test_l_prefix(self): - self.assertEqual( - demangle_lean_name_raw("l_Lean_Meta_Sym_main"), - "Lean.Meta.Sym.main") - - def test_l_prefix_private(self): - result = demangle_lean_name_raw( - "l___private_Lean_Meta_Basic_0__Lean_Meta_withMVarContextImp") - self.assertEqual(result, - "_private.Lean.Meta.Basic.0.Lean.Meta.withMVarContextImp") - - def test_l_prefix_boxed(self): - result = demangle_lean_name_raw("l_foo___boxed") - self.assertEqual(result, "foo._boxed") - - def test_l_prefix_redArg(self): - result = demangle_lean_name_raw( - "l___private_Lean_Meta_Basic_0__Lean_Meta_withMVarContextImp___redArg") - self.assertEqual( - result, - "_private.Lean.Meta.Basic.0.Lean.Meta.withMVarContextImp._redArg") - - def test_lean_main(self): - self.assertEqual(demangle_lean_name_raw("_lean_main"), "[lean] main") - - def test_non_lean_names(self): - self.assertEqual(demangle_lean_name_raw("printf"), "printf") - self.assertEqual(demangle_lean_name_raw("malloc"), "malloc") - self.assertEqual(demangle_lean_name_raw("lean_apply_5"), "lean_apply_5") - self.assertEqual(demangle_lean_name_raw(""), "") - - def test_init_prefix(self): - result = demangle_lean_name_raw("_init_l_Lean_Meta_foo") - self.assertEqual(result, "[init] Lean.Meta.foo") - - def test_lp_prefix_simple(self): - mangled = mangle_name(["Lean", "Meta", "foo"], prefix="lp_std_") - self.assertEqual(mangled, "lp_std_Lean_Meta_foo") - result = demangle_lean_name_raw(mangled) - self.assertEqual(result, "Lean.Meta.foo (std)") - - def test_lp_prefix_underscore_pkg(self): - pkg_mangled = mangle_string("my_pkg") - self.assertEqual(pkg_mangled, "my__pkg") - mangled = mangle_name(["Lean", "Meta", "foo"], - prefix=f"lp_{pkg_mangled}_") - self.assertEqual(mangled, "lp_my__pkg_Lean_Meta_foo") - result = demangle_lean_name_raw(mangled) - self.assertEqual(result, "Lean.Meta.foo (my_pkg)") - - def test_lp_prefix_private_decl(self): - mangled = mangle_name( - ["_private", "X", 0, "Y", "foo"], prefix="lp_pkg_") - self.assertEqual(mangled, "lp_pkg___private_X_0__Y_foo") - result = demangle_lean_name_raw(mangled) - self.assertEqual(result, "_private.X.0.Y.foo (pkg)") - - def test_complex_specialization(self): - components = [ - "Lean", "MVarId", "withContext", "_at_", - "_private", "Lean", "Meta", "Sym", 0, - "Lean", "Meta", "Sym", "BackwardRule", "apply", - "_spec", 2, "_redArg", "_lambda", 0, "_boxed" - ] - mangled = mangle_name(components) - result = demangle_lean_name_raw(mangled) - expected = format_name(components) - self.assertEqual(result, expected) - - def test_cold_suffix(self): - result = demangle_lean_name_raw("l_Lean_Meta_foo___redArg.cold.1") - self.assertEqual(result, "Lean.Meta.foo._redArg .cold.1") - - def test_cold_suffix_plain(self): - result = demangle_lean_name_raw("l_Lean_Meta_foo.cold") - self.assertEqual(result, "Lean.Meta.foo .cold") - - def test_initialize_no_pkg(self): - result = demangle_lean_name_raw("initialize_Init_Control_Basic") - self.assertEqual(result, "[module_init] Init.Control.Basic") - - def test_initialize_with_l_prefix(self): - result = demangle_lean_name_raw("initialize_l_Lean_Meta_foo") - self.assertEqual(result, "[module_init] Lean.Meta.foo") - - def test_never_crashes(self): - """Demangling should never raise, just return the original.""" - weird_inputs = [ - "", "l_", "lp_", "lp_x", "_init_", "initialize_", - "l_____", "lp____", "l_00", "l_0", - "some random string", "l_ space", - ] - for inp in weird_inputs: - result = demangle_lean_name_raw(inp) - self.assertIsInstance(result, str) - - -class TestPostprocess(unittest.TestCase): - """Test postprocess_name (human-friendly suffix folding, etc.).""" - - def test_no_change(self): - self.assertEqual(postprocess_name(["Lean", "Meta", "main"]), - "Lean.Meta.main") - - def test_boxed(self): - self.assertEqual(postprocess_name(["foo", "_boxed"]), - "foo [boxed]") - - def test_redArg(self): - self.assertEqual(postprocess_name(["foo", "bar", "_redArg"]), - "foo.bar [arity\u2193]") - - def test_lambda_separate(self): - # _lam as separate component + numeric index - self.assertEqual(postprocess_name(["foo", "_lam", 0]), - "foo [\u03bb]") - - def test_lambda_indexed(self): - # _lam_0 as single string (appendIndexAfter) - self.assertEqual(postprocess_name(["foo", "_lam_0"]), - "foo [\u03bb]") - self.assertEqual(postprocess_name(["foo", "_lambda_2"]), - "foo [\u03bb]") - - def test_lambda_boxed(self): - # _lam_0 followed by _boxed - self.assertEqual( - postprocess_name(["Lean", "Meta", "Simp", "simpLambda", - "_lam_0", "_boxed"]), - "Lean.Meta.Simp.simpLambda [boxed, \u03bb]") - - def test_closed(self): - self.assertEqual(postprocess_name(["myConst", "_closed", 3]), - "myConst [closed]") - - def test_closed_indexed(self): - self.assertEqual(postprocess_name(["myConst", "_closed_0"]), - "myConst [closed]") - - def test_multiple_suffixes(self): - self.assertEqual(postprocess_name(["foo", "_redArg", "_boxed"]), - "foo [boxed, arity\u2193]") - - def test_redArg_lam(self): - # _redArg followed by _lam_0 (issue #4) - self.assertEqual( - postprocess_name(["Lean", "profileitIOUnsafe", - "_redArg", "_lam_0"]), - "Lean.profileitIOUnsafe [\u03bb, arity\u2193]") - - def test_private_name(self): - self.assertEqual( - postprocess_name(["_private", "Lean", "Meta", "Basic", 0, - "Lean", "Meta", "withMVarContextImp"]), - "Lean.Meta.withMVarContextImp [private]") - - def test_private_with_suffix(self): - self.assertEqual( - postprocess_name(["_private", "Lean", "Meta", "Basic", 0, - "Lean", "Meta", "foo", "_redArg"]), - "Lean.Meta.foo [arity\u2193, private]") - - def test_hygienic_strip(self): - self.assertEqual( - postprocess_name(["Lean", "Meta", "foo", "_@", "Lean", "Meta", - "_hyg", 42]), - "Lean.Meta.foo") - - def test_specialization(self): - self.assertEqual( - postprocess_name(["List", "map", "_at_", "Foo", "bar", - "_spec", 3]), - "List.map spec at Foo.bar") - - def test_specialization_with_suffix(self): - # Base suffix _boxed appears in [flags] before spec at - self.assertEqual( - postprocess_name(["Lean", "MVarId", "withContext", "_at_", - "Foo", "bar", "_spec", 2, "_boxed"]), - "Lean.MVarId.withContext [boxed] spec at Foo.bar") - - def test_spec_context_with_flags(self): - # Compiler suffixes in spec context become context flags - self.assertEqual( - postprocess_name(["Lean", "Meta", "foo", "_at_", - "Lean", "Meta", "bar", "_elam_1", "_redArg", - "_spec", 2]), - "Lean.Meta.foo spec at Lean.Meta.bar[\u03bb, arity\u2193]") - - def test_spec_context_flags_dedup(self): - # Duplicate flag labels are deduplicated - self.assertEqual( - postprocess_name(["f", "_at_", - "g", "_lam_0", "_elam_1", "_redArg", - "_spec", 1]), - "f spec at g[\u03bb, arity\u2193]") - - def test_multiple_at(self): - # Multiple _at_ entries become separate spec at clauses - self.assertEqual( - postprocess_name(["f", "_at_", "g", "_spec", 1, - "_at_", "h", "_spec", 2]), - "f spec at g spec at h") - - def test_multiple_at_with_flags(self): - # Multiple spec at with flags on base and contexts - self.assertEqual( - postprocess_name(["f", "_at_", "g", "_redArg", "_spec", 1, - "_at_", "h", "_lam_0", "_spec", 2, - "_boxed"]), - "f [boxed] spec at g[arity\u2193] spec at h[\u03bb]") - - def test_base_flags_before_spec(self): - # Base trailing suffixes appear in [flags] before spec at - self.assertEqual( - postprocess_name(["f", "_at_", "g", "_spec", 1, "_lam_0"]), - "f [\u03bb] spec at g") - - def test_spec_context_strip_spec_suffixes(self): - # spec_0 in context should be stripped - self.assertEqual( - postprocess_name(["Lean", "Meta", "transformWithCache", "visit", - "_at_", - "_private", "Lean", "Meta", "Transform", 0, - "Lean", "Meta", "transform", - "Lean", "Meta", "Sym", "unfoldReducible", - "spec_0", "spec_0", - "_spec", 1]), - "Lean.Meta.transformWithCache.visit " - "spec at Lean.Meta.transform.Lean.Meta.Sym.unfoldReducible") - - def test_spec_context_strip_private(self): - # _private in spec context should be stripped - self.assertEqual( - postprocess_name(["Array", "mapMUnsafe", "map", "_at_", - "_private", "Lean", "Meta", "Transform", 0, - "Lean", "Meta", "transformWithCache", "visit", - "_spec", 1]), - "Array.mapMUnsafe.map " - "spec at Lean.Meta.transformWithCache.visit") - - def test_empty(self): - self.assertEqual(postprocess_name([]), "") - - -class TestDemangleHumanFriendly(unittest.TestCase): - """Test demangle_lean_name (human-friendly output).""" - - def test_simple(self): - self.assertEqual(demangle_lean_name("l_Lean_Meta_main"), - "Lean.Meta.main") - - def test_boxed(self): - self.assertEqual(demangle_lean_name("l_foo___boxed"), - "foo [boxed]") - - def test_redArg(self): - self.assertEqual(demangle_lean_name("l_foo___redArg"), - "foo [arity\u2193]") - - def test_private(self): - self.assertEqual( - demangle_lean_name( - "l___private_Lean_Meta_Basic_0__Lean_Meta_foo"), - "Lean.Meta.foo [private]") - - def test_private_with_redArg(self): - self.assertEqual( - demangle_lean_name( - "l___private_Lean_Meta_Basic_0__Lean_Meta_foo___redArg"), - "Lean.Meta.foo [arity\u2193, private]") - - def test_cold_with_suffix(self): - self.assertEqual( - demangle_lean_name("l_Lean_Meta_foo___redArg.cold.1"), - "Lean.Meta.foo [arity\u2193] .cold.1") - - def test_lean_apply(self): - self.assertEqual(demangle_lean_name("lean_apply_5"), "") - self.assertEqual(demangle_lean_name("lean_apply_12"), "") - - def test_lean_apply_raw_unchanged(self): - self.assertEqual(demangle_lean_name_raw("lean_apply_5"), - "lean_apply_5") - - def test_init_private(self): - self.assertEqual( - demangle_lean_name( - "_init_l___private_X_0__Y_foo"), - "[init] Y.foo [private]") - - def test_complex_specialization(self): - components = [ - "Lean", "MVarId", "withContext", "_at_", - "_private", "Lean", "Meta", "Sym", 0, - "Lean", "Meta", "Sym", "BackwardRule", "apply", - "_spec", 2, "_redArg", "_lambda", 0, "_boxed" - ] - mangled = mangle_name(components) - result = demangle_lean_name(mangled) - # Base: Lean.MVarId.withContext with trailing _redArg, _lambda 0, _boxed - # Spec context: Lean.Meta.Sym.BackwardRule.apply (private stripped) - self.assertEqual( - result, - "Lean.MVarId.withContext [boxed, \u03bb, arity\u2193] " - "spec at Lean.Meta.Sym.BackwardRule.apply") - - def test_non_lean_unchanged(self): - self.assertEqual(demangle_lean_name("printf"), "printf") - self.assertEqual(demangle_lean_name("malloc"), "malloc") - self.assertEqual(demangle_lean_name(""), "") - - -class TestDemangleProfile(unittest.TestCase): - """Test the profile rewriter.""" - - def _make_profile_shared(self, strings): - """Create a profile with shared.stringArray (newer format).""" - return { - "meta": {"version": 28}, - "libs": [], - "shared": { - "stringArray": list(strings), - }, - "threads": [{ - "name": "main", - "pid": "1", - "tid": 1, - "funcTable": { - "name": list(range(len(strings))), - "isJS": [False] * len(strings), - "relevantForJS": [False] * len(strings), - "resource": [-1] * len(strings), - "fileName": [None] * len(strings), - "lineNumber": [None] * len(strings), - "columnNumber": [None] * len(strings), - "length": len(strings), - }, - "frameTable": {"length": 0}, - "stackTable": {"length": 0}, - "samples": {"length": 0}, - "markers": {"length": 0}, - "resourceTable": {"length": 0}, - "nativeSymbols": {"length": 0}, - }], - "pages": [], - "counters": [], - } - - def _make_profile_per_thread(self, strings): - """Create a profile with per-thread stringArray (samply format).""" - return { - "meta": {"version": 28}, - "libs": [], - "threads": [{ - "name": "main", - "pid": "1", - "tid": 1, - "stringArray": list(strings), - "funcTable": { - "name": list(range(len(strings))), - "isJS": [False] * len(strings), - "relevantForJS": [False] * len(strings), - "resource": [-1] * len(strings), - "fileName": [None] * len(strings), - "lineNumber": [None] * len(strings), - "columnNumber": [None] * len(strings), - "length": len(strings), - }, - "frameTable": {"length": 0}, - "stackTable": {"length": 0}, - "samples": {"length": 0}, - "markers": {"length": 0}, - "resourceTable": {"length": 0}, - "nativeSymbols": {"length": 0}, - }], - "pages": [], - "counters": [], - } - - def test_profile_rewrite_shared(self): - from lean_demangle_profile import rewrite_profile - strings = [ - "l_Lean_Meta_Sym_main", - "printf", - "lean_apply_5", - "l___private_Lean_Meta_Basic_0__Lean_Meta_foo", - ] - profile = self._make_profile_shared(strings) - rewrite_profile(profile) - sa = profile["shared"]["stringArray"] - self.assertEqual(sa[0], "Lean.Meta.Sym.main") - self.assertEqual(sa[1], "printf") - self.assertEqual(sa[2], "") - self.assertEqual(sa[3], "Lean.Meta.foo [private]") - - def test_profile_rewrite_per_thread(self): - from lean_demangle_profile import rewrite_profile - strings = [ - "l_Lean_Meta_Sym_main", - "printf", - "lean_apply_5", - "l___private_Lean_Meta_Basic_0__Lean_Meta_foo", - ] - profile = self._make_profile_per_thread(strings) - count = rewrite_profile(profile) - sa = profile["threads"][0]["stringArray"] - self.assertEqual(sa[0], "Lean.Meta.Sym.main") - self.assertEqual(sa[1], "printf") - self.assertEqual(sa[2], "") - self.assertEqual(sa[3], "Lean.Meta.foo [private]") - self.assertEqual(count, 3) - - def test_profile_json_roundtrip(self): - from lean_demangle_profile import process_profile_file - strings = ["l_Lean_Meta_main", "malloc"] - profile = self._make_profile_shared(strings) - - with tempfile.NamedTemporaryFile(mode='w', suffix='.json', - delete=False) as f: - json.dump(profile, f) - inpath = f.name - - outpath = inpath.replace('.json', '-demangled.json') - try: - process_profile_file(inpath, outpath) - with open(outpath) as f: - result = json.load(f) - self.assertEqual(result["shared"]["stringArray"][0], - "Lean.Meta.main") - self.assertEqual(result["shared"]["stringArray"][1], "malloc") - finally: - os.unlink(inpath) - if os.path.exists(outpath): - os.unlink(outpath) - - def test_profile_gzip_roundtrip(self): - from lean_demangle_profile import process_profile_file - strings = ["l_Lean_Meta_main", "malloc"] - profile = self._make_profile_shared(strings) - - with tempfile.NamedTemporaryFile(suffix='.json.gz', - delete=False) as f: - with gzip.open(f, 'wt') as gz: - json.dump(profile, gz) - inpath = f.name - - outpath = inpath.replace('.json.gz', '-demangled.json.gz') - try: - process_profile_file(inpath, outpath) - with gzip.open(outpath, 'rt') as f: - result = json.load(f) - self.assertEqual(result["shared"]["stringArray"][0], - "Lean.Meta.main") - finally: - os.unlink(inpath) - if os.path.exists(outpath): - os.unlink(outpath) - - -if __name__ == '__main__': - unittest.main() diff --git a/src/Lean.lean b/src/Lean.lean index f5e62c749ae6..b11461e90545 100644 --- a/src/Lean.lean +++ b/src/Lean.lean @@ -7,6 +7,7 @@ module public import Lean.Data public import Lean.Compiler +public import Lean.Compiler.NameDemangling public import Lean.Environment public import Lean.Modifiers public import Lean.ProjFns diff --git a/src/Lean/Compiler/NameDemangling.lean b/src/Lean/Compiler/NameDemangling.lean new file mode 100644 index 000000000000..1f74f30c4a7b --- /dev/null +++ b/src/Lean/Compiler/NameDemangling.lean @@ -0,0 +1,487 @@ +/- +Copyright (c) 2026 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Kim Morrison +-/ +module + +prelude +import Init.While +import Init.Data.String.TakeDrop +import Init.Data.String.Search +public import Lean.Compiler.NameMangling + +/-! +# Lean Name Demangling + +Human-friendly demangling of Lean compiler symbol names. Extends the core +`Name.demangle` from `NameMangling.lean` with prefix handling, compiler suffix +folding, and backtrace line parsing. + +This is the single source of truth for name demangling. The C runtime calls +these functions via `@[export]` for backtrace display. +-/ + +namespace Lean.Name.Demangle + +-- ============================================================================ +-- Raw position helpers (avoid dependent String.Pos proofs) +-- ============================================================================ + +private abbrev RawPos := String.Pos.Raw + +private def rawGet (s : String) (i : RawPos) : Char := + String.Pos.Raw.get s i + +private def rawNext (s : String) (i : RawPos) : RawPos := + String.Pos.Raw.next s i + +private def rawAtEnd (s : String) (i : RawPos) : Bool := + i.byteIdx >= s.utf8ByteSize + +private def rawExtract (s : String) (b e : RawPos) : String := + String.Pos.Raw.extract s b e + +private def rawEnd (s : String) : RawPos := ⟨s.utf8ByteSize⟩ + +-- ============================================================================ +-- Helpers +-- ============================================================================ + +private def isAllDigits (s : String) : Bool := + !s.isEmpty && s.all (·.isDigit) + +private def isHexChar (c : Char) : Bool := + c.isDigit || (c.val >= 0x61 && c.val <= 0x66) || (c.val >= 0x41 && c.val <= 0x46) + +-- ============================================================================ +-- Component type and Name conversion +-- ============================================================================ + +private inductive Component where + | str (s : String) + | num (n : Nat) + deriving BEq, Repr, Inhabited + +/-- Convert a `Name` to a forward-ordered array of components. -/ +private def nameToComponents (n : Name) : Array Component := + go n [] |>.toArray +where + go : Name → List Component → List Component + | .anonymous, acc => acc + | .str pre s, acc => go pre (Component.str s :: acc) + | .num pre n, acc => go pre (Component.num n :: acc) + +private def formatComponents (comps : Array Component) : String := + comps.toList.map (fun + | Component.str s => s + | Component.num n => toString n) + |> String.intercalate "." + +-- ============================================================================ +-- Compiler suffix matching +-- ============================================================================ + +/-- Match a component against known compiler-generated suffixes. +Returns the human-friendly flag label, or `none`. -/ +private def matchSuffix (c : Component) : Option String := + match c with + | Component.str s => + -- Exact matches + if s == "_redArg" then some "arity↓" + else if s == "_boxed" then some "boxed" + else if s == "_impl" then some "impl" + -- Exact or indexed + else if s == "_lam" || s == "_lambda" || s == "_elam" then some "λ" + else if s == "_jp" then some "jp" + else if s == "_closed" then some "closed" + -- Indexed: _lam_N, _lambda_N, _elam_N, _jp_N, _closed_N + else if s.startsWith "_lam_" && isAllDigits (rawExtract s ⟨5⟩ (rawEnd s)) then some "λ" + else if s.startsWith "_lambda_" && isAllDigits (rawExtract s ⟨8⟩ (rawEnd s)) then some "λ" + else if s.startsWith "_elam_" && isAllDigits (rawExtract s ⟨6⟩ (rawEnd s)) then some "λ" + else if s.startsWith "_jp_" && isAllDigits (rawExtract s ⟨4⟩ (rawEnd s)) then some "jp" + else if s.startsWith "_closed_" && isAllDigits (rawExtract s ⟨8⟩ (rawEnd s)) then some "closed" + else none + | _ => none + +private def isSpecIndex (c : Component) : Bool := + match c with + | Component.str s => + s.startsWith "spec_" && s.length > 5 && isAllDigits (rawExtract s ⟨5⟩ (rawEnd s)) + | _ => false + +-- ============================================================================ +-- Postprocessing: components → human-friendly string +-- ============================================================================ + +private def stripPrivate (comps : Array Component) (start stop : Nat) : + Nat × Bool := + if stop - start >= 3 && comps[start]? == some (Component.str "_private") then + Id.run do + for i in [start + 1 : stop] do + if comps[i]? == some (Component.num 0) then + if i + 1 < stop then return (i + 1, true) + else return (start, false) + return (start, false) + else + (start, false) + +private structure SpecEntry where + name : String + flags : Array String + +private def processSpecContext (comps : Array Component) : SpecEntry := Id.run do + -- Strip private prefix within context + let mut begin_ := 0 + if comps.size >= 3 && comps[0]? == some (Component.str "_private") then + for i in [1:comps.size] do + if comps[i]? == some (Component.num 0) && i + 1 < comps.size then + begin_ := i + 1 + break + let mut nameParts : Array String := #[] + let mut flags : Array String := #[] + for i in [begin_:comps.size] do + let c := comps[i]! + match matchSuffix c with + | some flag => + if !flags.contains flag then + flags := flags.push flag + | none => + if isSpecIndex c then pure () + else match c with + | Component.str s => nameParts := nameParts.push s + | Component.num n => nameParts := nameParts.push (toString n) + { name := String.intercalate "." nameParts.toList, flags } + +/-- Main postprocessing: transform raw demangled components into a +human-friendly display string. -/ +private def postprocessComponents (components : Array Component) : String := Id.run do + if components.isEmpty then return "" + + -- 1. Strip _private prefix + let (privStart, isPrivate) := stripPrivate components 0 components.size + let mut parts := components.extract privStart components.size + + -- 2. Strip hygienic suffixes: everything from _@ onward + for i in [:parts.size] do + match parts[i]! with + | Component.str s => + if s.startsWith "_@" then + parts := parts.extract 0 i + break + | _ => pure () + + -- 3. Handle specialization: _at_ ... _spec N + let mut specEntries : Array SpecEntry := #[] + let mut firstAt : Option Nat := none + for i in [:parts.size] do + if parts[i]! == Component.str "_at_" then + firstAt := some i + break + + if let some fa := firstAt then + let base := parts.extract 0 fa + let rest := parts.extract fa parts.size + + -- Parse _at_..._spec entries + let mut entries : Array (Array Component) := #[] + let mut currentCtx : Option (Array Component) := none + let mut remaining : Array Component := #[] + let mut skipNext := false + + for i in [:rest.size] do + if skipNext then + skipNext := false + continue + let p := rest[i]! + if p == Component.str "_at_" then + if let some ctx := currentCtx then + entries := entries.push ctx + currentCtx := some #[] + else if p == Component.str "_spec" then + if let some ctx := currentCtx then + entries := entries.push ctx + currentCtx := none + skipNext := true + else if match p with | Component.str s => s.startsWith "_spec" | _ => false then + if let some ctx := currentCtx then + entries := entries.push ctx + currentCtx := none + else + match currentCtx with + | some ctx => currentCtx := some (ctx.push p) + | none => remaining := remaining.push p + + if let some ctx := currentCtx then + if !ctx.isEmpty then + entries := entries.push ctx + + for entry in entries do + let se := processSpecContext entry + if !se.name.isEmpty || !se.flags.isEmpty then + specEntries := specEntries.push se + + parts := base ++ remaining + + -- 4. Collect suffix flags from the end + let mut flags : Array String := #[] + let mut cont := true + while cont && !parts.isEmpty do + let last := parts.back! + match matchSuffix last with + | some flag => + flags := flags.push flag + parts := parts.pop + | none => + match last with + | Component.num _ => + if parts.size >= 2 then + match matchSuffix parts[parts.size - 2]! with + | some flag => + flags := flags.push flag + parts := parts.pop.pop + | none => cont := false + else cont := false + | _ => cont := false + + if isPrivate then + flags := flags.push "private" + + -- 5. Format result + let name := if parts.isEmpty then "?" else formatComponents parts + let mut result := name + + if !flags.isEmpty then + result := result ++ " [" ++ String.intercalate ", " flags.toList ++ "]" + + for entry in specEntries do + let ctxStr := if entry.name.isEmpty then "?" else entry.name + if !entry.flags.isEmpty then + result := result ++ " spec at " ++ ctxStr ++ "[" + ++ String.intercalate ", " entry.flags.toList ++ "]" + else + result := result ++ " spec at " ++ ctxStr + + return result + +-- ============================================================================ +-- lp_ package/body splitting +-- ============================================================================ + +/-- Check if a mangled name body starts with an uppercase letter +(after skipping `00` disambiguation and leading underscores). -/ +private partial def hasUpperStart (s : String) : Bool := Id.run do + if s.isEmpty then return false + let mut pos : RawPos := ⟨0⟩ + -- Skip 00 disambiguation + if s.utf8ByteSize >= 2 && rawGet s ⟨0⟩ == '0' && rawGet s ⟨1⟩ == '0' then + pos := ⟨2⟩ + -- Skip leading underscores + while !rawAtEnd s pos && rawGet s pos == '_' do + pos := rawNext s pos + if rawAtEnd s pos then return false + return (rawGet s pos).isUpper + +/-- Given `s` = everything after `lp_`, find a valid split into (pkg, body). +Returns the raw mangled package and body strings, or `none`. -/ +private partial def findLpSplit (s : String) : Option (String × String) := Id.run do + let endPos := rawEnd s + let mut validSplits : Array (String × String × Bool) := #[] + let mut pos : RawPos := ⟨0⟩ + while !rawAtEnd s pos do + if rawGet s pos == '_' && pos.byteIdx > 0 then + let nextByte := rawNext s pos + if !rawAtEnd s nextByte then + let pkg := rawExtract s ⟨0⟩ pos + let body := rawExtract s nextByte endPos + if (Name.demangle? body).isSome then + validSplits := validSplits.push (pkg, body, hasUpperStart body) + pos := rawNext s pos + if validSplits.isEmpty then return none + -- Prefer: shortest valid package (first split point). + -- Among splits where body starts uppercase, pick the first. + -- If no uppercase, still pick the first. + let upperSplits := validSplits.filter (·.2.2) + if !upperSplits.isEmpty then + return some (upperSplits[0]!.1, upperSplits[0]!.2.1) + else + return some (validSplits[0]!.1, validSplits[0]!.2.1) + +/-- Unmangle a package name (single-component mangled name). -/ +private def unmanglePkg (s : String) : String := + match Name.demangle s with + | .str .anonymous s => s + | _ => s + +-- ============================================================================ +-- Full symbol demangling +-- ============================================================================ + +/-- Strip `.cold.N` or `.cold` LLVM suffix from a symbol. -/ +private partial def stripColdSuffix (s : String) : String × String := Id.run do + let endPos := rawEnd s + let mut pos : RawPos := ⟨0⟩ + while !rawAtEnd s pos do + if rawGet s pos == '.' then + let rest := rawExtract s pos endPos + if rest.startsWith ".cold" then + return (rawExtract s ⟨0⟩ pos, rest) + pos := rawNext s pos + return (s, "") + +/-- Demangle a mangled body and postprocess to human-friendly string. -/ +private def demangleBody (body : String) : String := + let name := Name.demangle body + postprocessComponents (nameToComponents name) + +/-- Try prefix-based demangling of a symbol (without .cold suffix). -/ +private def demangleCore (s : String) : Option String := do + -- _init_l_ + if s.startsWith "_init_l_" then + let body := rawExtract s ⟨8⟩ (rawEnd s) + if !body.isEmpty then return s!"[init] {demangleBody body}" + + -- _init_lp_ + if s.startsWith "_init_lp_" then + let after := rawExtract s ⟨9⟩ (rawEnd s) + if let some (pkg, body) := findLpSplit after then + if !body.isEmpty then return s!"[init] {demangleBody body} ({unmanglePkg pkg})" + + -- initialize_l_ + if s.startsWith "initialize_l_" then + let body := rawExtract s ⟨13⟩ (rawEnd s) + if !body.isEmpty then return s!"[module_init] {demangleBody body}" + + -- initialize_lp_ + if s.startsWith "initialize_lp_" then + let after := rawExtract s ⟨14⟩ (rawEnd s) + if let some (pkg, body) := findLpSplit after then + if !body.isEmpty then return s!"[module_init] {demangleBody body} ({unmanglePkg pkg})" + + -- initialize_ (bare module init) + if s.startsWith "initialize_" then + let body := rawExtract s ⟨11⟩ (rawEnd s) + if !body.isEmpty then return s!"[module_init] {demangleBody body}" + + -- l_ + if s.startsWith "l_" then + let body := rawExtract s ⟨2⟩ (rawEnd s) + if !body.isEmpty then return demangleBody body + + -- lp_ + if s.startsWith "lp_" then + let after := rawExtract s ⟨3⟩ (rawEnd s) + if let some (pkg, body) := findLpSplit after then + if !body.isEmpty then return s!"{demangleBody body} ({unmanglePkg pkg})" + + none + +/-- Demangle a C symbol name produced by the Lean compiler. +Returns `none` if the symbol is not a recognized Lean name. -/ +public def demangleSymbol (symbol : String) : Option String := do + if symbol.isEmpty then none + + -- Strip .cold suffix first + let (core, coldSuffix) := stripColdSuffix symbol + + -- Handle lean_apply_N + if core.startsWith "lean_apply_" then + let rest := rawExtract core ⟨11⟩ (rawEnd core) + if isAllDigits rest then + let r := s!"" + if coldSuffix.isEmpty then return r else return s!"{r} {coldSuffix}" + + -- Handle _lean_main + if core == "_lean_main" then + if coldSuffix.isEmpty then return "[lean] main" + else return s!"[lean] main {coldSuffix}" + + -- Try prefix-based demangling + let result ← demangleCore core + if coldSuffix.isEmpty then return result + else return s!"{result} {coldSuffix}" + +-- ============================================================================ +-- Backtrace line parsing +-- ============================================================================ + +/-- Extract the symbol name from a backtrace line. +Returns `(prefix, symbol, suffix)` where `prefix ++ symbol ++ suffix == line`. +Handles Linux glibc and macOS backtrace formats. -/ +private partial def extractSymbol (line : String) : + Option (String × String × String) := Id.run do + let endPos := rawEnd line + -- Try Linux glibc: ./lean(SYMBOL+0x2a) [0x555...] + let mut pos : RawPos := ⟨0⟩ + while !rawAtEnd line pos do + if rawGet line pos == '(' then + let symStart := rawNext line pos + let mut j := symStart + while !rawAtEnd line j do + let c := rawGet line j + if c == '+' || c == ')' then + if j.byteIdx > symStart.byteIdx then + return some (rawExtract line ⟨0⟩ symStart, + rawExtract line symStart j, + rawExtract line j endPos) + return none + j := rawNext line j + return none + pos := rawNext line pos + + -- Try macOS: N lib 0xADDR SYMBOL + offset + pos := ⟨0⟩ + while !rawAtEnd line pos do + if rawGet line pos == '0' then + let pos1 := rawNext line pos + if !rawAtEnd line pos1 && rawGet line pos1 == 'x' then + -- Skip hex digits + let mut j := rawNext line pos1 + while !rawAtEnd line j && isHexChar (rawGet line j) do + j := rawNext line j + -- Skip spaces + while !rawAtEnd line j && rawGet line j == ' ' do + j := rawNext line j + if rawAtEnd line j then return none + let symStart := j + -- Find " + " or end + while !rawAtEnd line j do + if rawGet line j == ' ' then + let j1 := rawNext line j + if !rawAtEnd line j1 && rawGet line j1 == '+' then + let j2 := rawNext line j1 + if !rawAtEnd line j2 && rawGet line j2 == ' ' then + break + j := rawNext line j + if j.byteIdx > symStart.byteIdx then + return some (rawExtract line ⟨0⟩ symStart, + rawExtract line symStart j, + rawExtract line j endPos) + return none + pos := rawNext line pos + + return none + +/-- Demangle a backtrace line. Returns `none` if no Lean symbol was found. -/ +public def demangleBtLine (line : String) : Option String := do + let (pfx, sym, sfx) ← extractSymbol line + let demangled ← demangleSymbol sym + return pfx ++ demangled ++ sfx + +-- ============================================================================ +-- C exports for runtime backtrace handler +-- ============================================================================ + +/-- C export: demangle a backtrace line. +Returns the demangled line, or empty string if nothing was demangled. -/ +@[export lean_demangle_bt_line_cstr] +def demangleBtLineCStr (line : @& String) : String := + (demangleBtLine line).getD "" + +/-- C export: demangle a single symbol. +Returns the demangled name, or empty string if not a Lean symbol. -/ +@[export lean_demangle_symbol_cstr] +def demangleSymbolCStr (symbol : @& String) : String := + (demangleSymbol symbol).getD "" + +end Lean.Name.Demangle diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 7bce946380ab..b49ca7c0e458 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -21,7 +21,6 @@ set( sharecommon.cpp stack_overflow.cpp process.cpp - demangle.cpp object_ref.cpp mpn.cpp mutex.cpp diff --git a/src/runtime/demangle.cpp b/src/runtime/demangle.cpp deleted file mode 100644 index 4849f219565a..000000000000 --- a/src/runtime/demangle.cpp +++ /dev/null @@ -1,792 +0,0 @@ -/* -Copyright (c) 2025 Microsoft Corporation. All rights reserved. -Released under Apache 2.0 license as described in the file LICENSE. - -Author: Leonardo de Moura - -C++ port of the Lean name demangling algorithm (Name.demangleAux from -NameMangling.lean) and human-friendly postprocessing. Used to make -runtime backtraces readable. -*/ -#include -#include -#include -#include -#include "runtime/demangle.h" - -namespace { - -// --------------------------------------------------------------------------- -// Name component: either a string or a number -// --------------------------------------------------------------------------- - -struct Component { - bool is_num; - std::string str; - unsigned num; - Component() : is_num(false), num(0) {} - static Component mk_str(std::string s) { Component c; c.str = std::move(s); return c; } - static Component mk_str(char ch) { Component c; c.str = std::string(1, ch); return c; } - static Component mk_num(unsigned n) { Component c; c.is_num = true; c.num = n; return c; } -}; - -using Components = std::vector; - -// --------------------------------------------------------------------------- -// Hex parsing and UTF-8 encoding -// --------------------------------------------------------------------------- - -int parse_hex(const char * s, int pos, int len, int n_digits, unsigned & out_val) { - if (pos + n_digits > len) return 0; - unsigned val = 0; - for (int i = 0; i < n_digits; i++) { - char c = s[pos + i]; - if (c >= '0' && c <= '9') - val = (val << 4) | (unsigned)(c - '0'); - else if (c >= 'a' && c <= 'f') - val = (val << 4) | (unsigned)(c - 'a' + 10); - else - return 0; - } - out_val = val; - return n_digits; -} - -void append_utf8(std::string & out, unsigned cp) { - if (cp < 0x80) { - out += (char)cp; - } else if (cp < 0x800) { - out += (char)(0xC0 | (cp >> 6)); - out += (char)(0x80 | (cp & 0x3F)); - } else if (cp < 0x10000) { - out += (char)(0xE0 | (cp >> 12)); - out += (char)(0x80 | ((cp >> 6) & 0x3F)); - out += (char)(0x80 | (cp & 0x3F)); - } else if (cp < 0x110000) { - out += (char)(0xF0 | (cp >> 18)); - out += (char)(0x80 | ((cp >> 12) & 0x3F)); - out += (char)(0x80 | ((cp >> 6) & 0x3F)); - out += (char)(0x80 | (cp & 0x3F)); - } -} - -// --------------------------------------------------------------------------- -// Core demangling: produces a component list -// Port of Name.demangleAux from NameMangling.lean -// --------------------------------------------------------------------------- - -void demangle_main(const char * s, int pos, int len, - std::string & acc, int ucount, Components & out); -void name_start(const char * s, int pos, int len, Components & out); - -void decode_num(const char * s, int pos, int len, - unsigned n, Components & out) { - while (pos < len) { - char ch = s[pos]; - if (ch >= '0' && ch <= '9') { - n = n * 10 + (unsigned)(ch - '0'); - pos++; - } else { - pos++; // skip trailing '_' - out.push_back(Component::mk_num(n)); - if (pos >= len) return; - pos++; // skip separator '_' - name_start(s, pos, len, out); - return; - } - } - out.push_back(Component::mk_num(n)); -} - -void name_start(const char * s, int pos, int len, Components & out) { - if (pos >= len) return; - char ch = s[pos]; - pos++; - if (ch >= '0' && ch <= '9') { - if (ch == '0' && pos < len && s[pos] == '0') { - pos++; - std::string acc; - demangle_main(s, pos, len, acc, 0, out); - } else { - decode_num(s, pos, len, (unsigned)(ch - '0'), out); - } - } else if (ch == '_') { - std::string acc; - demangle_main(s, pos, len, acc, 1, out); - } else { - std::string acc(1, ch); - demangle_main(s, pos, len, acc, 0, out); - } -} - -void demangle_main(const char * s, int pos, int len, - std::string & acc, int ucount, Components & out) { - while (pos < len) { - char ch = s[pos]; - pos++; - - if (ch == '_') { ucount++; continue; } - - if (ucount % 2 == 0) { - for (int i = 0; i < ucount / 2; i++) acc += '_'; - acc += ch; - ucount = 0; - continue; - } - - // Odd ucount: separator or escape - if (ch >= '0' && ch <= '9') { - for (int i = 0; i < ucount / 2; i++) acc += '_'; - out.push_back(Component::mk_str(std::move(acc))); - if (ch == '0' && pos < len && s[pos] == '0') { - pos++; - acc.clear(); - demangle_main(s, pos, len, acc, 0, out); - return; - } else { - decode_num(s, pos, len, (unsigned)(ch - '0'), out); - return; - } - } - - unsigned hex_val; - int consumed; - if (ch == 'x') { - consumed = parse_hex(s, pos, len, 2, hex_val); - if (consumed > 0) { - for (int i = 0; i < ucount / 2; i++) acc += '_'; - append_utf8(acc, hex_val); - pos += consumed; ucount = 0; continue; - } - } - if (ch == 'u') { - consumed = parse_hex(s, pos, len, 4, hex_val); - if (consumed > 0) { - for (int i = 0; i < ucount / 2; i++) acc += '_'; - append_utf8(acc, hex_val); - pos += consumed; ucount = 0; continue; - } - } - if (ch == 'U') { - consumed = parse_hex(s, pos, len, 8, hex_val); - if (consumed > 0) { - for (int i = 0; i < ucount / 2; i++) acc += '_'; - append_utf8(acc, hex_val); - pos += consumed; ucount = 0; continue; - } - } - - // Name separator - out.push_back(Component::mk_str(std::move(acc))); - acc.clear(); - for (int i = 0; i < ucount / 2; i++) acc += '_'; - acc += ch; - ucount = 0; - } - - for (int i = 0; i < ucount / 2; i++) acc += '_'; - if (!acc.empty()) - out.push_back(Component::mk_str(std::move(acc))); -} - -bool demangle_body(const char * s, int len, Components & out) { - if (len == 0) return false; - name_start(s, 0, len, out); - return !out.empty(); -} - -// Convenience: demangle to flat dot-separated string (used for lp_ validation) -bool demangle_body_flat(const char * s, int len, std::string & out) { - Components comps; - if (!demangle_body(s, len, comps)) return false; - for (size_t i = 0; i < comps.size(); i++) { - if (i > 0) out += '.'; - if (comps[i].is_num) out += std::to_string(comps[i].num); - else out += comps[i].str; - } - return true; -} - -// --------------------------------------------------------------------------- -// Format components as dot-separated string -// --------------------------------------------------------------------------- - -std::string format_name(const Components & comps) { - std::string out; - for (size_t i = 0; i < comps.size(); i++) { - if (i > 0) out += '.'; - if (comps[i].is_num) out += std::to_string(comps[i].num); - else out += comps[i].str; - } - return out; -} - -// --------------------------------------------------------------------------- -// Human-friendly postprocessing -// Port of postprocess_name from lean_demangle.py -// --------------------------------------------------------------------------- - -// Suffix flag labels (UTF-8 encoded) -static const char * FLAG_ARITY_DOWN = "arity\xe2\x86\x93"; // arity↓ -static const char * FLAG_BOXED = "boxed"; -static const char * FLAG_IMPL = "impl"; -static const char * FLAG_LAMBDA = "\xce\xbb"; // λ -static const char * FLAG_JP = "jp"; -static const char * FLAG_CLOSED = "closed"; - -// Check if a string consists entirely of ASCII digits. -bool is_all_digits(const char * s) { - if (!*s) return false; - while (*s) { if (*s < '0' || *s > '9') return false; s++; } - return true; -} - -bool starts_with_str(const std::string & s, const char * prefix) { - size_t plen = strlen(prefix); - return s.size() >= plen && s.compare(0, plen, prefix) == 0; -} - -// Match a compiler-generated suffix component. Returns flag label or nullptr. -const char * match_suffix(const Component & c) { - if (c.is_num) return nullptr; - const std::string & s = c.str; - // Exact matches - if (s == "_redArg") return FLAG_ARITY_DOWN; - if (s == "_boxed") return FLAG_BOXED; - if (s == "_impl") return FLAG_IMPL; - // Exact or indexed prefix matches - if (s == "_lam" || s == "_lambda" || s == "_elam") return FLAG_LAMBDA; - if (s == "_jp") return FLAG_JP; - if (s == "_closed") return FLAG_CLOSED; - // Indexed: _lam_N, _lambda_N, _elam_N, _jp_N, _closed_N - struct { const char * prefix; size_t len; const char * flag; } indexed[] = { - {"_lam_", 5, FLAG_LAMBDA}, - {"_lambda_", 8, FLAG_LAMBDA}, - {"_elam_", 6, FLAG_LAMBDA}, - {"_jp_", 4, FLAG_JP}, - {"_closed_", 8, FLAG_CLOSED}, - }; - for (auto & e : indexed) { - if (s.size() > e.len && s.compare(0, e.len, e.prefix) == 0 && - is_all_digits(s.c_str() + e.len)) - return e.flag; - } - return nullptr; -} - -// Check if component is a spec_N index. -bool is_spec_index(const Component & c) { - if (c.is_num) return false; - return starts_with_str(c.str, "spec_") && c.str.size() > 5 && - is_all_digits(c.str.c_str() + 5); -} - -// Strip _private.Module.0. prefix. Returns (begin index past the strip, is_private). -struct StripResult { size_t begin; bool is_private; }; - -StripResult strip_private(const Components & parts, size_t begin, size_t end) { - if (end - begin >= 3 && !parts[begin].is_num && parts[begin].str == "_private") { - for (size_t i = begin + 1; i < end; i++) { - if (parts[i].is_num && parts[i].num == 0) { - if (i + 1 < end) - return {i + 1, true}; - break; - } - } - } - return {begin, false}; -} - -// Spec context entry: name components + context flags -struct SpecEntry { - std::string name; - std::vector flags; -}; - -// Process a spec context: strip private, collect flags, format name -SpecEntry process_spec_context(const Components & comps, size_t begin, size_t end) { - SpecEntry entry; - auto sr = strip_private(comps, begin, end); - - std::vector seen_flags; - std::string name; - bool first = true; - - for (size_t i = sr.begin; i < end; i++) { - const char * flag = match_suffix(comps[i]); - if (flag) { - // Deduplicate - bool dup = false; - for (auto f : entry.flags) { if (f == flag) { dup = true; break; } } - if (!dup) entry.flags.push_back(flag); - } else if (is_spec_index(comps[i])) { - // skip - } else { - if (!first) name += '.'; - if (comps[i].is_num) name += std::to_string(comps[i].num); - else name += comps[i].str; - first = false; - } - } - entry.name = std::move(name); - return entry; -} - -std::string postprocess_name(const Components & components) { - if (components.empty()) return ""; - - size_t n = components.size(); - - // --- Strip _private prefix --- - auto sr = strip_private(components, 0, n); - size_t begin = sr.begin; - bool is_private = sr.is_private; - - // Copy relevant range into a working vector - Components parts(components.begin() + begin, components.begin() + n); - - // --- Strip hygienic suffixes: everything from _@ onward --- - { - size_t cut = parts.size(); - for (size_t i = 0; i < parts.size(); i++) { - if (!parts[i].is_num && starts_with_str(parts[i].str, "_@")) { - cut = i; - break; - } - } - parts.resize(cut); - } - - // --- Handle specialization: _at_ ... _spec N --- - std::vector spec_entries; - { - // Find first _at_ - int first_at = -1; - for (size_t i = 0; i < parts.size(); i++) { - if (!parts[i].is_num && parts[i].str == "_at_") { - first_at = (int)i; - break; - } - } - - if (first_at >= 0) { - Components base(parts.begin(), parts.begin() + first_at); - // Parse _at_..._spec entries - Components current_ctx; - bool in_ctx = false; - Components remaining; - bool skip_next = false; - - for (size_t i = first_at; i < parts.size(); i++) { - if (skip_next) { skip_next = false; continue; } - if (!parts[i].is_num && parts[i].str == "_at_") { - if (in_ctx) { - auto entry = process_spec_context(current_ctx, 0, current_ctx.size()); - if (!entry.name.empty() || !entry.flags.empty()) - spec_entries.push_back(std::move(entry)); - current_ctx.clear(); - } - in_ctx = true; - continue; - } - if (!parts[i].is_num && parts[i].str == "_spec") { - if (in_ctx) { - auto entry = process_spec_context(current_ctx, 0, current_ctx.size()); - if (!entry.name.empty() || !entry.flags.empty()) - spec_entries.push_back(std::move(entry)); - current_ctx.clear(); - in_ctx = false; - } - skip_next = true; - continue; - } - if (!parts[i].is_num && starts_with_str(parts[i].str, "_spec")) { - if (in_ctx) { - auto entry = process_spec_context(current_ctx, 0, current_ctx.size()); - if (!entry.name.empty() || !entry.flags.empty()) - spec_entries.push_back(std::move(entry)); - current_ctx.clear(); - in_ctx = false; - } - continue; - } - if (in_ctx) - current_ctx.push_back(parts[i]); - else - remaining.push_back(parts[i]); - } - if (in_ctx && !current_ctx.empty()) { - auto entry = process_spec_context(current_ctx, 0, current_ctx.size()); - if (!entry.name.empty() || !entry.flags.empty()) - spec_entries.push_back(std::move(entry)); - } - - parts = base; - parts.insert(parts.end(), remaining.begin(), remaining.end()); - } - } - - // --- Collect suffix flags from the end --- - std::vector flags; - while (!parts.empty()) { - const Component & last = parts.back(); - const char * flag = match_suffix(last); - if (flag) { - flags.push_back(flag); - parts.pop_back(); - } else if (last.is_num && parts.size() >= 2) { - const char * prev_flag = match_suffix(parts[parts.size() - 2]); - if (prev_flag) { - flags.push_back(prev_flag); - parts.pop_back(); // number - parts.pop_back(); // suffix - } else { - break; - } - } else { - break; - } - } - - if (is_private) flags.push_back("private"); - - // --- Format result --- - std::string result = parts.empty() ? "?" : format_name(parts); - - if (!flags.empty()) { - result += " ["; - for (size_t i = 0; i < flags.size(); i++) { - if (i > 0) result += ", "; - result += flags[i]; - } - result += ']'; - } - - for (auto & entry : spec_entries) { - std::string ctx_str = entry.name.empty() ? "?" : entry.name; - if (!entry.flags.empty()) { - result += " spec at " + ctx_str + "["; - for (size_t i = 0; i < entry.flags.size(); i++) { - if (i > 0) result += ", "; - result += entry.flags[i]; - } - result += ']'; - } else { - result += " spec at " + ctx_str; - } - } - - return result; -} - -// --------------------------------------------------------------------------- -// Prefix handling and lp_ splitting -// --------------------------------------------------------------------------- - -const char * starts_with(const char * s, const char * prefix) { - size_t plen = strlen(prefix); - if (strncmp(s, prefix, plen) == 0) return s + plen; - return nullptr; -} - -bool has_upper_start(const char * s, int len) { - if (len == 0) return false; - int pos = 0; - if (pos + 1 < len && s[pos] == '0' && s[pos + 1] == '0') pos += 2; - while (pos < len && s[pos] == '_') pos++; - if (pos >= len) return false; - return s[pos] >= 'A' && s[pos] <= 'Z'; -} - -bool is_valid_string_mangle(const char * s, int end) { - int pos = 0; - while (pos < end) { - char ch = s[pos]; - if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9')) { - pos++; - } else if (ch == '_') { - if (pos + 1 >= end) return false; - char nch = s[pos + 1]; - unsigned v; - if (nch == '_') { pos += 2; } - else if (nch == 'x' && parse_hex(s, pos + 2, end, 2, v)) { pos += 4; } - else if (nch == 'u' && parse_hex(s, pos + 2, end, 4, v)) { pos += 6; } - else if (nch == 'U' && parse_hex(s, pos + 2, end, 8, v)) { pos += 10; } - else return false; - } else { - return false; - } - } - return true; -} - -int find_lp_body(const char * s, int len) { - int best = -1; - bool best_has_upper = false; - - for (int i = 0; i < len; i++) { - if (s[i] != '_') continue; - if (i == 0) continue; - if (!is_valid_string_mangle(s, i)) continue; - int body_start = i + 1; - int body_len = len - body_start; - if (body_len <= 0) continue; - - Components test; - if (!demangle_body(s + body_start, body_len, test)) continue; - - bool upper = has_upper_start(s + body_start, body_len); - if (upper) { - if (!best_has_upper || i > best - 1) { - best = body_start; - best_has_upper = true; - } - } else if (!best_has_upper) { - if (best == -1) best = body_start; - } - } - return best; -} - -void unmangle_pkg(const char * s, int len, std::string & out) { - std::string tmp; - if (demangle_body_flat(s, len, tmp) && tmp.find('.') == std::string::npos) { - out += tmp; - } else { - out.append(s, len); - } -} - -// --------------------------------------------------------------------------- -// Helper to produce final malloc'd string -// --------------------------------------------------------------------------- - -char * to_malloc_str(const std::string & s) { - char * ret = (char *)malloc(s.size() + 1); - if (ret) memcpy(ret, s.c_str(), s.size() + 1); - return ret; -} - -// Demangle body and postprocess to human-friendly string. -bool demangle_and_postprocess(const char * body, int body_len, std::string & out) { - Components comps; - if (!demangle_body(body, body_len, comps)) return false; - out = postprocess_name(comps); - return true; -} - -} // anonymous namespace - -// --------------------------------------------------------------------------- -// Public API -// --------------------------------------------------------------------------- - -char * lean_demangle_symbol(const char * symbol) { - if (!symbol || !symbol[0]) return nullptr; - - int slen = (int)strlen(symbol); - - // Handle lean_apply_N -> - { - const char * rest = starts_with(symbol, "lean_apply_"); - if (rest && is_all_digits(rest)) { - std::string result = " 0 && demangle_and_postprocess(rest, body_len, out)) { - std::string result = "[init] " + out; - if (cold_suffix) { result += ' '; result.append(cold_suffix, cold_suffix_len); } - return to_malloc_str(result); - } - } - - // _init_lp_ prefix - if ((rest = starts_with(cs, "_init_lp_")) != nullptr) { - int after_len = core_len - (int)(rest - cs); - int body_idx = find_lp_body(rest, after_len); - if (body_idx >= 0) { - std::string pkg_out; - unmangle_pkg(rest, body_idx - 1, pkg_out); - if (demangle_and_postprocess(rest + body_idx, after_len - body_idx, out)) { - std::string result = "[init] " + out + " (" + pkg_out + ")"; - if (cold_suffix) { result += ' '; result.append(cold_suffix, cold_suffix_len); } - return to_malloc_str(result); - } - } - } - - // initialize_l_ prefix - if ((rest = starts_with(cs, "initialize_l_")) != nullptr) { - int body_len = core_len - (int)(rest - cs); - if (body_len > 0 && demangle_and_postprocess(rest, body_len, out)) { - std::string result = "[module_init] " + out; - if (cold_suffix) { result += ' '; result.append(cold_suffix, cold_suffix_len); } - return to_malloc_str(result); - } - } - - // initialize_lp_ prefix - if ((rest = starts_with(cs, "initialize_lp_")) != nullptr) { - int after_len = core_len - (int)(rest - cs); - int body_idx = find_lp_body(rest, after_len); - if (body_idx >= 0) { - std::string pkg_out; - unmangle_pkg(rest, body_idx - 1, pkg_out); - if (demangle_and_postprocess(rest + body_idx, after_len - body_idx, out)) { - std::string result = "[module_init] " + out + " (" + pkg_out + ")"; - if (cold_suffix) { result += ' '; result.append(cold_suffix, cold_suffix_len); } - return to_malloc_str(result); - } - } - } - - // initialize_ (bare module init) - if ((rest = starts_with(cs, "initialize_")) != nullptr) { - int body_len = core_len - (int)(rest - cs); - if (body_len > 0 && demangle_and_postprocess(rest, body_len, out)) { - std::string result = "[module_init] " + out; - if (cold_suffix) { result += ' '; result.append(cold_suffix, cold_suffix_len); } - return to_malloc_str(result); - } - } - - // l_ prefix - if ((rest = starts_with(cs, "l_")) != nullptr) { - int body_len = core_len - (int)(rest - cs); - if (body_len > 0 && demangle_and_postprocess(rest, body_len, out)) { - if (cold_suffix) { out += ' '; out.append(cold_suffix, cold_suffix_len); } - return to_malloc_str(out); - } - } - - // lp_ prefix - if ((rest = starts_with(cs, "lp_")) != nullptr) { - int after_len = core_len - (int)(rest - cs); - int body_idx = find_lp_body(rest, after_len); - if (body_idx >= 0) { - std::string pkg_out; - unmangle_pkg(rest, body_idx - 1, pkg_out); - if (demangle_and_postprocess(rest + body_idx, after_len - body_idx, out)) { - std::string result = out + " (" + pkg_out + ")"; - if (cold_suffix) { result += ' '; result.append(cold_suffix, cold_suffix_len); } - return to_malloc_str(result); - } - } - } - - return nullptr; -} - -// --------------------------------------------------------------------------- -// Backtrace line parsing -// --------------------------------------------------------------------------- - -static const char * extract_symbol(const char * line, int * sym_len) { - int len = (int)strlen(line); - - // Linux (glibc): ./lean(l_Lean_Meta_foo+0x2a) [0x555...] - for (int i = 0; i < len; i++) { - if (line[i] == '(') { - int start = i + 1; - for (int j = start; j < len; j++) { - if (line[j] == '+' || line[j] == ')') { - if (j > start) { *sym_len = j - start; return line + start; } - break; - } - } - break; - } - } - - // macOS: 2 lean 0x100abc123 l_Lean_Meta_foo + 42 - for (int i = 0; i + 1 < len; i++) { - if (line[i] == '0' && line[i + 1] == 'x') { - int j = i + 2; - while (j < len && ((line[j] >= '0' && line[j] <= '9') || - (line[j] >= 'a' && line[j] <= 'f') || - (line[j] >= 'A' && line[j] <= 'F'))) j++; - while (j < len && line[j] == ' ') j++; - if (j >= len) return nullptr; - int start = j; - while (j < len) { - if (j + 2 < len && line[j] == ' ' && line[j + 1] == '+' && line[j + 2] == ' ') - break; - j++; - } - if (j > start) { *sym_len = j - start; return line + start; } - return nullptr; - } - } - - return nullptr; -} - -char * lean_demangle_bt_line(const char * line) { - if (!line) return nullptr; - - int sym_len = 0; - const char * sym = extract_symbol(line, &sym_len); - if (!sym || sym_len == 0) return nullptr; - - // Make null-terminated copy - char * sym_copy = (char *)malloc(sym_len + 1); - if (!sym_copy) return nullptr; - memcpy(sym_copy, sym, sym_len); - sym_copy[sym_len] = '\0'; - - char * demangled = lean_demangle_symbol(sym_copy); - free(sym_copy); - if (!demangled) return nullptr; - - // Reconstruct line with demangled name - int line_len = (int)strlen(line); - int dem_len = (int)strlen(demangled); - int prefix_len = (int)(sym - line); - int suffix_start = prefix_len + sym_len; - int suffix_len = line_len - suffix_start; - - int new_len = prefix_len + dem_len + suffix_len; - char * result = (char *)malloc(new_len + 1); - if (!result) { free(demangled); return nullptr; } - - memcpy(result, line, prefix_len); - memcpy(result + prefix_len, demangled, dem_len); - memcpy(result + prefix_len + dem_len, line + suffix_start, suffix_len); - result[new_len] = '\0'; - - free(demangled); - return result; -} diff --git a/src/runtime/demangle.h b/src/runtime/demangle.h deleted file mode 100644 index 0af19df94c4f..000000000000 --- a/src/runtime/demangle.h +++ /dev/null @@ -1,26 +0,0 @@ -/* -Copyright (c) 2025 Microsoft Corporation. All rights reserved. -Released under Apache 2.0 license as described in the file LICENSE. - -Author: Leonardo de Moura -*/ -#pragma once - -/* - * Demangle Lean symbol names in backtrace lines. - * - * lean_demangle_symbol(symbol): - * Given a mangled C symbol name (e.g. "l_Lean_Meta_Grind_foo"), - * returns a malloc'd string with the demangled name (e.g. "Lean.Meta.Grind.foo"), - * or nullptr if the symbol is not a Lean name. - * - * lean_demangle_bt_line(line): - * Given a backtrace line from backtrace_symbols(), extracts the symbol, - * demangles it, and returns a malloc'd string with the demangled name - * substituted in. Returns nullptr if nothing was demangled. - * - * Callers must free() non-null return values. - */ - -char * lean_demangle_symbol(const char * symbol); -char * lean_demangle_bt_line(const char * line); diff --git a/src/runtime/object.cpp b/src/runtime/object.cpp index c031e8a0bcf9..a3a9a8ee0f86 100644 --- a/src/runtime/object.cpp +++ b/src/runtime/object.cpp @@ -31,7 +31,8 @@ Author: Leonardo de Moura #if LEAN_SUPPORTS_BACKTRACE #include #include -#include "runtime/demangle.h" +// Lean-exported demangler from Lean.Compiler.NameDemangling +extern "C" lean_object * lean_demangle_bt_line_cstr(lean_object *); #endif // HACK: for unknown reasons, std::isnan(x) fails on msys64 because math.h @@ -137,13 +138,20 @@ static void print_backtrace(bool force_stderr) { if (char ** symbols = backtrace_symbols(bt_buf, nptrs)) { bool raw = getenv("LEAN_BACKTRACE_RAW"); for (int i = 0; i < nptrs; i++) { - char * demangled = raw ? nullptr : lean_demangle_bt_line(symbols[i]); - if (demangled) { - panic_eprintln(demangled, force_stderr); - free(demangled); - } else { - panic_eprintln(symbols[i], force_stderr); + if (!raw) { + lean_object * line_obj = lean_mk_string(symbols[i]); + lean_object * result = lean_demangle_bt_line_cstr(line_obj); + char const * result_str = lean_string_cstr(result); + if (result_str[0] != '\0') { + panic_eprintln(result_str, force_stderr); + lean_dec(result); + lean_dec(line_obj); + continue; + } + lean_dec(result); + lean_dec(line_obj); } + panic_eprintln(symbols[i], force_stderr); } // According to `man backtrace`, each `symbols[i]` should NOT be freed free(symbols); diff --git a/tests/lean/run/demangling.lean b/tests/lean/run/demangling.lean new file mode 100644 index 000000000000..d3415cba0ea0 --- /dev/null +++ b/tests/lean/run/demangling.lean @@ -0,0 +1,281 @@ +module +import Lean.Compiler.NameMangling +import Lean.Compiler.NameDemangling + +/-! +# Tests for Lean Name Demangling + +Tests the full demangling pipeline from `NameDemangling.lean`. +-/ + +open Lean.Name.Demangle +open Lean (Name) + +def check (label : String) (got expected : String) : IO Unit := do + if got != expected then + throw <| .userError s!"FAIL [{label}]: got {repr got}, expected {repr expected}" + +def checkSome (label : String) (got : Option String) (expected : String) : IO Unit := do + match got with + | some v => check label v expected + | none => throw <| .userError s!"FAIL [{label}]: got none, expected {repr expected}" + +def checkNone (label : String) (got : Option String) : IO Unit := do + match got with + | some v => throw <| .userError s!"FAIL [{label}]: got {repr v}, expected none" + | none => pure () + +-- Helper to build name with compiler suffixes +private def mkName (parts : List (String ⊕ Nat)) : Name := + parts.foldl (fun n p => + match p with + | .inl s => n.str s + | .inr k => n.num k) .anonymous + +-- ============================================================================ +-- Basic l_ prefix demangling +-- ============================================================================ + +#eval checkSome "simple l_" (demangleSymbol "l_Lean_Meta_Sym_main") "Lean.Meta.Sym.main" +#eval checkSome "single l_" (demangleSymbol "l_main") "main" + +-- ============================================================================ +-- lp_ prefix with package names +-- ============================================================================ + +#eval do + -- Construct: lp_std_Lean_Meta_foo + let mangled := Name.mangle `Lean.Meta.foo (s!"lp_{String.mangle "std"}_") + checkSome "lp_ simple" (demangleSymbol mangled) "Lean.Meta.foo (std)" + +#eval do + -- Package with underscore: my_pkg + let mangled := Name.mangle `Lean.Meta.foo (s!"lp_{String.mangle "my_pkg"}_") + checkSome "lp_ underscore pkg" (demangleSymbol mangled) "Lean.Meta.foo (my_pkg)" + +-- ============================================================================ +-- _init_ prefixes +-- ============================================================================ + +#eval checkSome "init l_" + (demangleSymbol "_init_l_Lean_Meta_foo") "[init] Lean.Meta.foo" + +#eval do + let name := mkName [.inl "_private", .inl "X", .inr 0, .inl "Y", .inl "foo"] + let mangled := "_init_" ++ name.mangle "l_" + checkSome "init l_ private" (demangleSymbol mangled) "[init] Y.foo [private]" + +-- ============================================================================ +-- initialize_ prefixes +-- ============================================================================ + +#eval checkSome "initialize bare" + (demangleSymbol "initialize_Init_Control_Basic") "[module_init] Init.Control.Basic" +#eval checkSome "initialize l_" + (demangleSymbol "initialize_l_Lean_Meta_foo") "[module_init] Lean.Meta.foo" + +-- ============================================================================ +-- lean_apply_N and _lean_main +-- ============================================================================ + +#eval checkSome "lean_apply_5" (demangleSymbol "lean_apply_5") "" +#eval checkSome "lean_apply_12" (demangleSymbol "lean_apply_12") "" +#eval checkSome "lean_main" (demangleSymbol "_lean_main") "[lean] main" + +-- ============================================================================ +-- .cold.N suffix handling +-- ============================================================================ + +#eval do + let mangled := Name.mangle `Lean.Meta.foo._redArg "l_" + checkSome "cold suffix" (demangleSymbol (mangled ++ ".cold.1")) + "Lean.Meta.foo [arity↓] .cold.1" + +#eval checkSome "cold suffix plain" + (demangleSymbol "l_Lean_Meta_foo.cold") "Lean.Meta.foo .cold" + +-- ============================================================================ +-- Non-Lean symbols return none +-- ============================================================================ + +#eval checkNone "printf" (demangleSymbol "printf") +#eval checkNone "malloc" (demangleSymbol "malloc") +#eval checkNone "empty" (demangleSymbol "") + +-- ============================================================================ +-- Postprocessing: suffix folding +-- ============================================================================ + +#eval do + let name := mkName [.inl "foo", .inl "_boxed"] + checkSome "boxed" (demangleSymbol (name.mangle "l_")) "foo [boxed]" + +#eval do + let name := mkName [.inl "foo", .inl "_redArg"] + checkSome "redArg" (demangleSymbol (name.mangle "l_")) "foo [arity↓]" + +#eval do + let name := mkName [.inl "foo", .inl "_impl"] + checkSome "impl" (demangleSymbol (name.mangle "l_")) "foo [impl]" + +#eval do + let name := mkName [.inl "foo", .inl "_lam", .inr 0] + checkSome "lambda separate" (demangleSymbol (name.mangle "l_")) "foo [λ]" + +#eval do + let name := mkName [.inl "foo", .inl "_lam_0"] + checkSome "lambda indexed" (demangleSymbol (name.mangle "l_")) "foo [λ]" + +#eval do + let name := mkName [.inl "foo", .inl "_lambda_2"] + checkSome "lambda_2" (demangleSymbol (name.mangle "l_")) "foo [λ]" + +#eval do + let name := mkName [.inl "foo", .inl "_elam"] + checkSome "elam" (demangleSymbol (name.mangle "l_")) "foo [λ]" + +#eval do + let name := mkName [.inl "foo", .inl "_jp"] + checkSome "jp" (demangleSymbol (name.mangle "l_")) "foo [jp]" + +#eval do + let name := mkName [.inl "myConst", .inl "_closed", .inr 0] + checkSome "closed" (demangleSymbol (name.mangle "l_")) "myConst [closed]" + +#eval do + let name := mkName [.inl "myConst", .inl "_closed_0"] + checkSome "closed indexed" (demangleSymbol (name.mangle "l_")) "myConst [closed]" + +#eval do + let name := mkName [.inl "foo", .inl "_redArg", .inl "_boxed"] + checkSome "multiple suffixes" (demangleSymbol (name.mangle "l_")) + "foo [boxed, arity↓]" + +-- ============================================================================ +-- Postprocessing: private name stripping +-- ============================================================================ + +#eval do + let name := mkName [.inl "_private", .inl "Lean", .inl "Meta", .inl "Basic", + .inr 0, .inl "Lean", .inl "Meta", .inl "foo"] + checkSome "private" (demangleSymbol (name.mangle "l_")) + "Lean.Meta.foo [private]" + +#eval do + let name := mkName [.inl "_private", .inl "Lean", .inl "Meta", .inl "Basic", + .inr 0, .inl "Lean", .inl "Meta", .inl "foo", .inl "_redArg"] + checkSome "private + redArg" (demangleSymbol (name.mangle "l_")) + "Lean.Meta.foo [arity↓, private]" + +-- ============================================================================ +-- Postprocessing: hygienic suffix stripping +-- ============================================================================ + +#eval do + let name := mkName [.inl "Lean", .inl "Meta", .inl "foo", .inl "_@", + .inl "Lean", .inl "Meta", .inl "_hyg", .inr 42] + checkSome "hygienic strip" (demangleSymbol (name.mangle "l_")) + "Lean.Meta.foo" + +-- ============================================================================ +-- Postprocessing: specialization contexts +-- ============================================================================ + +#eval do + let name := mkName [.inl "List", .inl "map", .inl "_at_", + .inl "Foo", .inl "bar", .inl "_spec", .inr 3] + checkSome "specialization" (demangleSymbol (name.mangle "l_")) + "List.map spec at Foo.bar" + +#eval do + let name := mkName [.inl "Lean", .inl "MVarId", .inl "withContext", + .inl "_at_", .inl "Foo", .inl "bar", .inl "_spec", .inr 2, + .inl "_boxed"] + checkSome "spec with base suffix" (demangleSymbol (name.mangle "l_")) + "Lean.MVarId.withContext [boxed] spec at Foo.bar" + +#eval do + let name := mkName [.inl "Lean", .inl "Meta", .inl "foo", + .inl "_at_", .inl "Lean", .inl "Meta", .inl "bar", + .inl "_elam_1", .inl "_redArg", .inl "_spec", .inr 2] + checkSome "spec context flags" (demangleSymbol (name.mangle "l_")) + "Lean.Meta.foo spec at Lean.Meta.bar[λ, arity↓]" + +#eval do + let name := mkName [.inl "f", + .inl "_at_", .inl "g", .inl "_spec", .inr 1, + .inl "_at_", .inl "h", .inl "_spec", .inr 2] + checkSome "multiple at" (demangleSymbol (name.mangle "l_")) + "f spec at g spec at h" + +-- ============================================================================ +-- Complex real-world name +-- ============================================================================ + +#eval do + let name := mkName [.inl "Lean", .inl "MVarId", .inl "withContext", + .inl "_at_", + .inl "_private", .inl "Lean", .inl "Meta", .inl "Sym", .inr 0, + .inl "Lean", .inl "Meta", .inl "Sym", .inl "BackwardRule", .inl "apply", + .inl "_spec", .inr 2, + .inl "_redArg", .inl "_lambda", .inr 0, .inl "_boxed"] + checkSome "complex" (demangleSymbol (name.mangle "l_")) + "Lean.MVarId.withContext [boxed, λ, arity↓] spec at Lean.Meta.Sym.BackwardRule.apply" + +-- ============================================================================ +-- Backtrace line parsing: Linux glibc format +-- ============================================================================ + +#eval checkSome "bt linux" + (demangleBtLine "./lean(l_Lean_Meta_foo+0x2a) [0x555555555555]") + "./lean(Lean.Meta.foo+0x2a) [0x555555555555]" + +#eval do + let name := mkName [.inl "foo", .inl "_boxed"] + let sym := name.mangle "l_" + checkSome "bt linux boxed" + (demangleBtLine s!"./lean({sym}+0x10) [0x7fff]") + "./lean(foo [boxed]+0x10) [0x7fff]" + +-- ============================================================================ +-- Backtrace line parsing: macOS format +-- ============================================================================ + +#eval checkSome "bt macos" + (demangleBtLine "3 lean 0x00000001001234 l_Lean_Meta_foo + 42") + "3 lean 0x00000001001234 Lean.Meta.foo + 42" + +-- ============================================================================ +-- Backtrace line parsing: non-Lean lines unchanged +-- ============================================================================ + +#eval checkNone "bt non-lean" + (demangleBtLine "./lean(printf+0x10) [0x7fff]") + +-- ============================================================================ +-- Edge cases: never crashes +-- ============================================================================ + +#eval do + let inputs := #[ + "", "l_", "lp_", "lp_x", "_init_", "initialize_", + "l_____", "lp____", "l_00", "l_0", + "some random string", "l_ space" + ] + inputs.forM fun inp => do + let _ := demangleSymbol inp + let _ := demangleBtLine inp + pure () + +-- ============================================================================ +-- C export wrappers (test via getD "" since CStr functions are not public) +-- ============================================================================ + +#eval check "export symbol" + ((demangleSymbol "l_Lean_Meta_foo").getD "") "Lean.Meta.foo" +#eval check "export symbol none" + ((demangleSymbol "printf").getD "") "" +#eval check "export bt line" + ((demangleBtLine "./lean(l_foo+0x1) [0x2]").getD "") "./lean(foo+0x1) [0x2]" +#eval check "export bt line none" + ((demangleBtLine "no lean symbols here").getD "") "" From 6cdaee43b82b71af72387f486b2cb0c67e878114 Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Wed, 18 Feb 2026 03:53:38 +0000 Subject: [PATCH 2/7] =?UTF-8?q?fix:=20use=20weak=20symbol=20for=20backtrac?= =?UTF-8?q?e=20demangler=20to=20avoid=20leanrt=20=E2=86=92=20libLean=20lin?= =?UTF-8?q?k=20dependency?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `@[export lean_demangle_bt_line_cstr]` function is defined in `Lean.Compiler.NameDemangling` (in `libLean`) but called from `libleanrt` (`print_backtrace`). On Linux these are in separate linker groups, so downstream executables fail to link with undefined reference. Use a weak symbol with a no-op stub so `leanrt` doesn't require `libLean` at link time. When the Lean demangler is linked in (as it always is for Lean executables), the real implementation overrides the stub. Co-Authored-By: Claude Opus 4.6 --- src/runtime/object.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/runtime/object.cpp b/src/runtime/object.cpp index a3a9a8ee0f86..2ac8ae6345f8 100644 --- a/src/runtime/object.cpp +++ b/src/runtime/object.cpp @@ -31,8 +31,12 @@ Author: Leonardo de Moura #if LEAN_SUPPORTS_BACKTRACE #include #include -// Lean-exported demangler from Lean.Compiler.NameDemangling -extern "C" lean_object * lean_demangle_bt_line_cstr(lean_object *); +// Lean-exported demangler from Lean.Compiler.NameDemangling. +// Declared as a weak symbol so leanrt doesn't require libLean at link time. +// When the Lean demangler is linked in, it overrides this stub. +extern "C" __attribute__((weak)) lean_object * lean_demangle_bt_line_cstr(lean_object * s) { + return lean_mk_string(""); +} #endif // HACK: for unknown reasons, std::isnan(x) fails on msys64 because math.h From f2694b8bc2c1339ab8c128756447a55ff1dec870 Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Wed, 18 Feb 2026 05:41:32 +0000 Subject: [PATCH 3/7] fix: address review feedback for name demangling This PR fixes issues found during code review: - add re-entrancy guard in C runtime panic handler to prevent recursive panic when demangling itself panics - expand test coverage from 44 to 118 test cases - fix bug in findLpSplit where _private was absorbed into package name - replace hardcoded byte offsets with dropPrefix? - remove unnecessary partial annotations using bounded for loops - move public import from Lean.lean to Lean/Compiler.lean - remove AI-slop stylistic artifacts (section dividers, verbose comments) Co-Authored-By: Claude Opus 4.6 --- src/Lean.lean | 1 - src/Lean/Compiler.lean | 1 + src/Lean/Compiler/NameDemangling.lean | 167 ++++++----------- src/runtime/object.cpp | 6 +- tests/lean/run/demangling.lean | 246 +++++++++++++++++++++++++- 5 files changed, 300 insertions(+), 121 deletions(-) diff --git a/src/Lean.lean b/src/Lean.lean index b11461e90545..f5e62c749ae6 100644 --- a/src/Lean.lean +++ b/src/Lean.lean @@ -7,7 +7,6 @@ module public import Lean.Data public import Lean.Compiler -public import Lean.Compiler.NameDemangling public import Lean.Environment public import Lean.Modifiers public import Lean.ProjFns diff --git a/src/Lean/Compiler.lean b/src/Lean/Compiler.lean index 570c698042a4..216c41629174 100644 --- a/src/Lean/Compiler.lean +++ b/src/Lean/Compiler.lean @@ -18,4 +18,5 @@ public import Lean.Compiler.FFI public import Lean.Compiler.MetaAttr public import Lean.Compiler.NoncomputableAttr public import Lean.Compiler.Main +public import Lean.Compiler.NameDemangling public import Lean.Compiler.Old -- TODO: delete after we port code generator to Lean diff --git a/src/Lean/Compiler/NameDemangling.lean b/src/Lean/Compiler/NameDemangling.lean index 1f74f30c4a7b..6442b961fecb 100644 --- a/src/Lean/Compiler/NameDemangling.lean +++ b/src/Lean/Compiler/NameDemangling.lean @@ -11,23 +11,12 @@ import Init.Data.String.TakeDrop import Init.Data.String.Search public import Lean.Compiler.NameMangling -/-! -# Lean Name Demangling - -Human-friendly demangling of Lean compiler symbol names. Extends the core -`Name.demangle` from `NameMangling.lean` with prefix handling, compiler suffix -folding, and backtrace line parsing. - -This is the single source of truth for name demangling. The C runtime calls -these functions via `@[export]` for backtrace display. --/ +/-! Human-friendly demangling of Lean compiler symbol names, extending +`Name.demangle` with prefix handling, compiler suffix folding, and backtrace +line parsing. Called from the C runtime via `@[export]` for backtrace display. -/ namespace Lean.Name.Demangle --- ============================================================================ --- Raw position helpers (avoid dependent String.Pos proofs) --- ============================================================================ - private abbrev RawPos := String.Pos.Raw private def rawGet (s : String) (i : RawPos) : Char := @@ -44,9 +33,9 @@ private def rawExtract (s : String) (b e : RawPos) : String := private def rawEnd (s : String) : RawPos := ⟨s.utf8ByteSize⟩ --- ============================================================================ --- Helpers --- ============================================================================ +/-- `String.dropPrefix?` returning a `String` instead of a `Slice`. -/ +private def dropPrefix? (s : String) (pre : String) : Option String := + (s.dropPrefix? pre).map (·.toString) private def isAllDigits (s : String) : Bool := !s.isEmpty && s.all (·.isDigit) @@ -54,16 +43,11 @@ private def isAllDigits (s : String) : Bool := private def isHexChar (c : Char) : Bool := c.isDigit || (c.val >= 0x61 && c.val <= 0x66) || (c.val >= 0x41 && c.val <= 0x46) --- ============================================================================ --- Component type and Name conversion --- ============================================================================ - private inductive Component where | str (s : String) | num (n : Nat) deriving BEq, Repr, Inhabited -/-- Convert a `Name` to a forward-ordered array of components. -/ private def nameToComponents (n : Name) : Array Component := go n [] |>.toArray where @@ -78,42 +62,28 @@ private def formatComponents (comps : Array Component) : String := | Component.num n => toString n) |> String.intercalate "." --- ============================================================================ --- Compiler suffix matching --- ============================================================================ - -/-- Match a component against known compiler-generated suffixes. -Returns the human-friendly flag label, or `none`. -/ private def matchSuffix (c : Component) : Option String := match c with | Component.str s => - -- Exact matches if s == "_redArg" then some "arity↓" else if s == "_boxed" then some "boxed" else if s == "_impl" then some "impl" - -- Exact or indexed else if s == "_lam" || s == "_lambda" || s == "_elam" then some "λ" else if s == "_jp" then some "jp" else if s == "_closed" then some "closed" - -- Indexed: _lam_N, _lambda_N, _elam_N, _jp_N, _closed_N - else if s.startsWith "_lam_" && isAllDigits (rawExtract s ⟨5⟩ (rawEnd s)) then some "λ" - else if s.startsWith "_lambda_" && isAllDigits (rawExtract s ⟨8⟩ (rawEnd s)) then some "λ" - else if s.startsWith "_elam_" && isAllDigits (rawExtract s ⟨6⟩ (rawEnd s)) then some "λ" - else if s.startsWith "_jp_" && isAllDigits (rawExtract s ⟨4⟩ (rawEnd s)) then some "jp" - else if s.startsWith "_closed_" && isAllDigits (rawExtract s ⟨8⟩ (rawEnd s)) then some "closed" + else if (dropPrefix? s "_lam_").any isAllDigits then some "λ" + else if (dropPrefix? s "_lambda_").any isAllDigits then some "λ" + else if (dropPrefix? s "_elam_").any isAllDigits then some "λ" + else if (dropPrefix? s "_jp_").any isAllDigits then some "jp" + else if (dropPrefix? s "_closed_").any isAllDigits then some "closed" else none | _ => none private def isSpecIndex (c : Component) : Bool := match c with - | Component.str s => - s.startsWith "spec_" && s.length > 5 && isAllDigits (rawExtract s ⟨5⟩ (rawEnd s)) + | Component.str s => (dropPrefix? s "spec_").any isAllDigits | _ => false --- ============================================================================ --- Postprocessing: components → human-friendly string --- ============================================================================ - private def stripPrivate (comps : Array Component) (start stop : Nat) : Nat × Bool := if stop - start >= 3 && comps[start]? == some (Component.str "_private") then @@ -131,7 +101,6 @@ private structure SpecEntry where flags : Array String private def processSpecContext (comps : Array Component) : SpecEntry := Id.run do - -- Strip private prefix within context let mut begin_ := 0 if comps.size >= 3 && comps[0]? == some (Component.str "_private") then for i in [1:comps.size] do @@ -153,16 +122,13 @@ private def processSpecContext (comps : Array Component) : SpecEntry := Id.run d | Component.num n => nameParts := nameParts.push (toString n) { name := String.intercalate "." nameParts.toList, flags } -/-- Main postprocessing: transform raw demangled components into a -human-friendly display string. -/ private def postprocessComponents (components : Array Component) : String := Id.run do if components.isEmpty then return "" - -- 1. Strip _private prefix let (privStart, isPrivate) := stripPrivate components 0 components.size let mut parts := components.extract privStart components.size - -- 2. Strip hygienic suffixes: everything from _@ onward + -- Strip hygienic suffixes (_@ onward) for i in [:parts.size] do match parts[i]! with | Component.str s => @@ -171,7 +137,7 @@ private def postprocessComponents (components : Array Component) : String := Id. break | _ => pure () - -- 3. Handle specialization: _at_ ... _spec N + -- Handle specialization contexts (_at_ ... _spec N) let mut specEntries : Array SpecEntry := #[] let mut firstAt : Option Nat := none for i in [:parts.size] do @@ -183,7 +149,6 @@ private def postprocessComponents (components : Array Component) : String := Id. let base := parts.extract 0 fa let rest := parts.extract fa parts.size - -- Parse _at_..._spec entries let mut entries : Array (Array Component) := #[] let mut currentCtx : Option (Array Component) := none let mut remaining : Array Component := #[] @@ -223,7 +188,7 @@ private def postprocessComponents (components : Array Component) : String := Id. parts := base ++ remaining - -- 4. Collect suffix flags from the end + -- Collect suffix flags from the end let mut flags : Array String := #[] let mut cont := true while cont && !parts.isEmpty do @@ -247,7 +212,6 @@ private def postprocessComponents (components : Array Component) : String := Id. if isPrivate then flags := flags.push "private" - -- 5. Format result let name := if parts.isEmpty then "?" else formatComponents parts let mut result := name @@ -264,37 +228,35 @@ private def postprocessComponents (components : Array Component) : String := Id. return result --- ============================================================================ --- lp_ package/body splitting --- ============================================================================ - -/-- Check if a mangled name body starts with an uppercase letter -(after skipping `00` disambiguation and leading underscores). -/ -private partial def hasUpperStart (s : String) : Bool := Id.run do +private def hasUpperStart (s : String) : Bool := Id.run do if s.isEmpty then return false let mut pos : RawPos := ⟨0⟩ -- Skip 00 disambiguation if s.utf8ByteSize >= 2 && rawGet s ⟨0⟩ == '0' && rawGet s ⟨1⟩ == '0' then pos := ⟨2⟩ -- Skip leading underscores - while !rawAtEnd s pos && rawGet s pos == '_' do + for _ in [:s.utf8ByteSize] do + if rawAtEnd s pos || rawGet s pos != '_' then break pos := rawNext s pos if rawAtEnd s pos then return false return (rawGet s pos).isUpper -/-- Given `s` = everything after `lp_`, find a valid split into (pkg, body). -Returns the raw mangled package and body strings, or `none`. -/ -private partial def findLpSplit (s : String) : Option (String × String) := Id.run do +private def findLpSplit (s : String) : Option (String × String) := Id.run do let endPos := rawEnd s let mut validSplits : Array (String × String × Bool) := #[] let mut pos : RawPos := ⟨0⟩ - while !rawAtEnd s pos do + for _ in [:s.utf8ByteSize] do + if rawAtEnd s pos then break if rawGet s pos == '_' && pos.byteIdx > 0 then let nextByte := rawNext s pos if !rawAtEnd s nextByte then let pkg := rawExtract s ⟨0⟩ pos let body := rawExtract s nextByte endPos - if (Name.demangle? body).isSome then + -- Package must be a valid single-component mangled name + let validPkg := match Name.demangle? pkg with + | some (.str .anonymous _) => true + | _ => false + if validPkg && (Name.demangle? body).isSome then validSplits := validSplits.push (pkg, body, hasUpperStart body) pos := rawNext s pos if validSplits.isEmpty then return none @@ -307,21 +269,16 @@ private partial def findLpSplit (s : String) : Option (String × String) := Id.r else return some (validSplits[0]!.1, validSplits[0]!.2.1) -/-- Unmangle a package name (single-component mangled name). -/ private def unmanglePkg (s : String) : String := match Name.demangle s with | .str .anonymous s => s | _ => s --- ============================================================================ --- Full symbol demangling --- ============================================================================ - -/-- Strip `.cold.N` or `.cold` LLVM suffix from a symbol. -/ -private partial def stripColdSuffix (s : String) : String × String := Id.run do +private def stripColdSuffix (s : String) : String × String := Id.run do let endPos := rawEnd s let mut pos : RawPos := ⟨0⟩ - while !rawAtEnd s pos do + for _ in [:s.utf8ByteSize] do + if rawAtEnd s pos then break if rawGet s pos == '.' then let rest := rawExtract s pos endPos if rest.startsWith ".cold" then @@ -329,55 +286,44 @@ private partial def stripColdSuffix (s : String) : String × String := Id.run do pos := rawNext s pos return (s, "") -/-- Demangle a mangled body and postprocess to human-friendly string. -/ private def demangleBody (body : String) : String := let name := Name.demangle body postprocessComponents (nameToComponents name) -/-- Try prefix-based demangling of a symbol (without .cold suffix). -/ private def demangleCore (s : String) : Option String := do -- _init_l_ - if s.startsWith "_init_l_" then - let body := rawExtract s ⟨8⟩ (rawEnd s) + if let some body := dropPrefix? s "_init_l_" then if !body.isEmpty then return s!"[init] {demangleBody body}" -- _init_lp_ - if s.startsWith "_init_lp_" then - let after := rawExtract s ⟨9⟩ (rawEnd s) + if let some after := dropPrefix? s "_init_lp_" then if let some (pkg, body) := findLpSplit after then if !body.isEmpty then return s!"[init] {demangleBody body} ({unmanglePkg pkg})" -- initialize_l_ - if s.startsWith "initialize_l_" then - let body := rawExtract s ⟨13⟩ (rawEnd s) + if let some body := dropPrefix? s "initialize_l_" then if !body.isEmpty then return s!"[module_init] {demangleBody body}" -- initialize_lp_ - if s.startsWith "initialize_lp_" then - let after := rawExtract s ⟨14⟩ (rawEnd s) + if let some after := dropPrefix? s "initialize_lp_" then if let some (pkg, body) := findLpSplit after then if !body.isEmpty then return s!"[module_init] {demangleBody body} ({unmanglePkg pkg})" -- initialize_ (bare module init) - if s.startsWith "initialize_" then - let body := rawExtract s ⟨11⟩ (rawEnd s) + if let some body := dropPrefix? s "initialize_" then if !body.isEmpty then return s!"[module_init] {demangleBody body}" -- l_ - if s.startsWith "l_" then - let body := rawExtract s ⟨2⟩ (rawEnd s) + if let some body := dropPrefix? s "l_" then if !body.isEmpty then return demangleBody body -- lp_ - if s.startsWith "lp_" then - let after := rawExtract s ⟨3⟩ (rawEnd s) + if let some after := dropPrefix? s "lp_" then if let some (pkg, body) := findLpSplit after then if !body.isEmpty then return s!"{demangleBody body} ({unmanglePkg pkg})" none -/-- Demangle a C symbol name produced by the Lean compiler. -Returns `none` if the symbol is not a recognized Lean name. -/ public def demangleSymbol (symbol : String) : Option String := do if symbol.isEmpty then none @@ -385,8 +331,7 @@ public def demangleSymbol (symbol : String) : Option String := do let (core, coldSuffix) := stripColdSuffix symbol -- Handle lean_apply_N - if core.startsWith "lean_apply_" then - let rest := rawExtract core ⟨11⟩ (rawEnd core) + if let some rest := dropPrefix? core "lean_apply_" then if isAllDigits rest then let r := s!"" if coldSuffix.isEmpty then return r else return s!"{r} {coldSuffix}" @@ -401,23 +346,20 @@ public def demangleSymbol (symbol : String) : Option String := do if coldSuffix.isEmpty then return result else return s!"{result} {coldSuffix}" --- ============================================================================ --- Backtrace line parsing --- ============================================================================ - -/-- Extract the symbol name from a backtrace line. -Returns `(prefix, symbol, suffix)` where `prefix ++ symbol ++ suffix == line`. -Handles Linux glibc and macOS backtrace formats. -/ -private partial def extractSymbol (line : String) : +/-- Extract the symbol from a backtrace line (Linux glibc or macOS format). -/ +private def extractSymbol (line : String) : Option (String × String × String) := Id.run do let endPos := rawEnd line + let len := line.utf8ByteSize -- Try Linux glibc: ./lean(SYMBOL+0x2a) [0x555...] let mut pos : RawPos := ⟨0⟩ - while !rawAtEnd line pos do + for _ in [:len] do + if rawAtEnd line pos then break if rawGet line pos == '(' then let symStart := rawNext line pos let mut j := symStart - while !rawAtEnd line j do + for _ in [:len] do + if rawAtEnd line j then break let c := rawGet line j if c == '+' || c == ')' then if j.byteIdx > symStart.byteIdx then @@ -431,21 +373,25 @@ private partial def extractSymbol (line : String) : -- Try macOS: N lib 0xADDR SYMBOL + offset pos := ⟨0⟩ - while !rawAtEnd line pos do + for _ in [:len] do + if rawAtEnd line pos then break if rawGet line pos == '0' then let pos1 := rawNext line pos if !rawAtEnd line pos1 && rawGet line pos1 == 'x' then -- Skip hex digits let mut j := rawNext line pos1 - while !rawAtEnd line j && isHexChar (rawGet line j) do + for _ in [:len] do + if rawAtEnd line j || !isHexChar (rawGet line j) then break j := rawNext line j -- Skip spaces - while !rawAtEnd line j && rawGet line j == ' ' do + for _ in [:len] do + if rawAtEnd line j || rawGet line j != ' ' then break j := rawNext line j if rawAtEnd line j then return none let symStart := j -- Find " + " or end - while !rawAtEnd line j do + for _ in [:len] do + if rawAtEnd line j then break if rawGet line j == ' ' then let j1 := rawNext line j if !rawAtEnd line j1 && rawGet line j1 == '+' then @@ -462,24 +408,15 @@ private partial def extractSymbol (line : String) : return none -/-- Demangle a backtrace line. Returns `none` if no Lean symbol was found. -/ public def demangleBtLine (line : String) : Option String := do let (pfx, sym, sfx) ← extractSymbol line let demangled ← demangleSymbol sym return pfx ++ demangled ++ sfx --- ============================================================================ --- C exports for runtime backtrace handler --- ============================================================================ - -/-- C export: demangle a backtrace line. -Returns the demangled line, or empty string if nothing was demangled. -/ @[export lean_demangle_bt_line_cstr] def demangleBtLineCStr (line : @& String) : String := (demangleBtLine line).getD "" -/-- C export: demangle a single symbol. -Returns the demangled name, or empty string if not a Lean symbol. -/ @[export lean_demangle_symbol_cstr] def demangleSymbolCStr (symbol : @& String) : String := (demangleSymbol symbol).getD "" diff --git a/src/runtime/object.cpp b/src/runtime/object.cpp index 2ac8ae6345f8..4baea2913571 100644 --- a/src/runtime/object.cpp +++ b/src/runtime/object.cpp @@ -135,6 +135,8 @@ static void panic_eprintln(char const * line, bool force_stderr) { panic_eprintln(line, strlen(line), force_stderr); } +static bool g_in_demangle = false; + static void print_backtrace(bool force_stderr) { #if LEAN_SUPPORTS_BACKTRACE void * bt_buf[100]; @@ -142,10 +144,12 @@ static void print_backtrace(bool force_stderr) { if (char ** symbols = backtrace_symbols(bt_buf, nptrs)) { bool raw = getenv("LEAN_BACKTRACE_RAW"); for (int i = 0; i < nptrs; i++) { - if (!raw) { + if (!raw && !g_in_demangle) { + g_in_demangle = true; lean_object * line_obj = lean_mk_string(symbols[i]); lean_object * result = lean_demangle_bt_line_cstr(line_obj); char const * result_str = lean_string_cstr(result); + g_in_demangle = false; if (result_str[0] != '\0') { panic_eprintln(result_str, force_stderr); lean_dec(result); diff --git a/tests/lean/run/demangling.lean b/tests/lean/run/demangling.lean index d3415cba0ea0..6a536474dd21 100644 --- a/tests/lean/run/demangling.lean +++ b/tests/lean/run/demangling.lean @@ -25,6 +25,10 @@ def checkNone (label : String) (got : Option String) : IO Unit := do | some v => throw <| .userError s!"FAIL [{label}]: got {repr v}, expected none" | none => pure () +def checkName (label : String) (got expected : Name) : IO Unit := do + if got != expected then + throw <| .userError s!"FAIL [{label}]: got {repr got}, expected {repr expected}" + -- Helper to build name with compiler suffixes private def mkName (parts : List (String ⊕ Nat)) : Name := parts.foldl (fun n p => @@ -32,6 +36,122 @@ private def mkName (parts : List (String ⊕ Nat)) : Name := | .inl s => n.str s | .inr k => n.num k) .anonymous +-- Verify mangle → demangle round-trips: demangle(mangle(name)) == name +private def checkRoundTrip (label : String) (parts : List (String ⊕ Nat)) : IO Unit := do + let name := mkName parts + let mangled := name.mangle "" + let demangled := Name.demangle mangled + checkName s!"roundtrip {label}" demangled name + +-- ============================================================================ +-- String.mangle +-- ============================================================================ + +#eval check "mangle alphanumeric" (String.mangle "hello") "hello" +#eval check "mangle underscore" (String.mangle "a_b") "a__b" +#eval check "mangle double underscore" (String.mangle "__") "____" +#eval check "mangle dot" (String.mangle ".") "_x2e" +#eval check "mangle a.b" (String.mangle "a.b") "a_x2eb" +#eval check "mangle lambda" (String.mangle "\u03bb") "_u03bb" +#eval check "mangle astral" (String.mangle (String.singleton (Char.ofNat 0x1d55c))) "_U0001d55c" +#eval check "mangle empty" (String.mangle "") "" + +-- ============================================================================ +-- Name.mangle +-- ============================================================================ + +#eval check "mangle simple name" (Name.mangle `Lean.Meta.Sym.main) "l_Lean_Meta_Sym_main" +#eval check "mangle single" (Name.mangle `main) "l_main" +#eval check "mangle underscore comp" (Name.mangle `a_b) "l_a__b" +#eval check "mangle underscore comp.c" (Name.mangle `a_b.c) "l_a__b_c" +#eval do + let name := mkName [.inl "_private", .inl "Lean", .inl "Meta", .inl "Basic", + .inr 0, .inl "Lean", .inl "Meta", .inl "withMVarContextImp"] + check "mangle private" (name.mangle "l_") + "l___private_Lean_Meta_Basic_0__Lean_Meta_withMVarContextImp" +#eval do + let name := mkName [.inr 42] + check "mangle numeric root" (name.mangle "l_") "l_42_" +#eval do + let name := mkName [.inr 42, .inl "foo"] + check "mangle numeric root + str" (name.mangle "l_") "l_42__foo" +#eval do + let name := mkName [.inl "a", .inl "x27"] + check "mangle disambiguation" (name.mangle "l_") "l_a_00x27" +#eval do + let name := mkName [.inl "0foo"] + check "mangle digit start disambiguation" (name.mangle "l_") "l_000foo" +#eval do + check "mangle custom prefix" (Name.mangle `foo "lp_pkg_") "lp_pkg_foo" + +-- ============================================================================ +-- Name.demangle (core algorithm, no postprocessing) +-- ============================================================================ + +#eval checkName "demangle simple" (Name.demangle "Lean_Meta_Sym_main") `Lean.Meta.Sym.main +#eval checkName "demangle single" (Name.demangle "main") `main +#eval checkName "demangle underscore" (Name.demangle "a__b") `a_b +#eval checkName "demangle underscore multi" (Name.demangle "a__b_c") `a_b.c +#eval checkName "demangle numeric" + (Name.demangle "foo_42__bar") (mkName [.inl "foo", .inr 42, .inl "bar"]) +#eval checkName "demangle numeric root" + (Name.demangle "42_") (mkName [.inr 42]) +#eval checkName "demangle numeric at end" + (Name.demangle "foo_42_") (mkName [.inl "foo", .inr 42]) +#eval checkName "demangle disambiguation 00" + (Name.demangle "a_00x27") (mkName [.inl "a", .inl "x27"]) +#eval checkName "demangle disambiguation root" + (Name.demangle "000foo") (mkName [.inl "0foo"]) +#eval checkName "demangle hex x" + (Name.demangle "a_x2eb") (mkName [.inl "a.b"]) +#eval checkName "demangle hex u" + (Name.demangle "_u03bb") (mkName [.inl "\u03bb"]) +#eval checkName "demangle hex U" + (Name.demangle "_U0001d55c") (mkName [.inl (String.singleton (Char.ofNat 0x1d55c))]) +#eval checkName "demangle private" + (Name.demangle "__private_Lean_Meta_Basic_0__Lean_Meta_withMVarContextImp") + (mkName [.inl "_private", .inl "Lean", .inl "Meta", .inl "Basic", + .inr 0, .inl "Lean", .inl "Meta", .inl "withMVarContextImp"]) +#eval checkName "demangle boxed suffix" + (Name.demangle "foo___boxed") (mkName [.inl "foo", .inl "_boxed"]) +#eval checkName "demangle underscore-ending component" + (Name.demangle "a___00b") (mkName [.inl "a_", .inl "b"]) + +-- ============================================================================ +-- Mangle/demangle round-trips +-- ============================================================================ + +#eval checkRoundTrip "simple" [.inl "Lean", .inl "Meta", .inl "main"] +#eval checkRoundTrip "single" [.inl "a"] +#eval checkRoundTrip "three" [.inl "Foo", .inl "Bar", .inl "baz"] +#eval checkRoundTrip "numeric" [.inl "foo", .inr 0, .inl "bar"] +#eval checkRoundTrip "numeric root" [.inr 42] +#eval checkRoundTrip "numeric multi" [.inl "a", .inr 1, .inl "b", .inr 2, .inl "c"] +#eval checkRoundTrip "_private" [.inl "_private"] +#eval checkRoundTrip "underscore" [.inl "a_b", .inl "c_d"] +#eval checkRoundTrip "_at_ _spec" [.inl "_at_", .inl "_spec"] +#eval checkRoundTrip "private name" + [.inl "_private", .inl "Lean", .inl "Meta", .inl "Basic", .inr 0, + .inl "Lean", .inl "Meta", .inl "withMVarContextImp"] +#eval checkRoundTrip "boxed" [.inl "Lean", .inl "Meta", .inl "foo", .inl "_boxed"] +#eval checkRoundTrip "redArg" [.inl "Lean", .inl "Meta", .inl "foo", .inl "_redArg"] +#eval checkRoundTrip "specialization" + [.inl "List", .inl "map", .inl "_at_", .inl "Foo", .inl "bar", .inl "_spec", .inr 3] +#eval checkRoundTrip "lambda" [.inl "Foo", .inl "bar", .inl "_lambda", .inr 0] +#eval checkRoundTrip "lambda 2" [.inl "Foo", .inl "bar", .inl "_lambda", .inr 2] +#eval checkRoundTrip "closed" [.inl "myConst", .inl "_closed", .inr 0] +#eval checkRoundTrip "dot char" [.inl "a.b"] +#eval checkRoundTrip "unicode" [.inl "\u03bb"] +#eval checkRoundTrip "unicode arrow" [.inl "a", .inl "b\u2192c"] +#eval checkRoundTrip "disambiguation x27" [.inl "a", .inl "x27"] +#eval checkRoundTrip "disambiguation 0foo" [.inl "0foo"] +#eval checkRoundTrip "disambiguation underscore end" [.inl "a_", .inl "b"] +#eval checkRoundTrip "complex" + [.inl "Lean", .inl "MVarId", .inl "withContext", .inl "_at_", + .inl "_private", .inl "Lean", .inl "Meta", .inl "Sym", .inr 0, + .inl "Lean", .inl "Meta", .inl "Sym", .inl "BackwardRule", .inl "apply", + .inl "_spec", .inr 2, .inl "_redArg", .inl "_lambda", .inr 0, .inl "_boxed"] + -- ============================================================================ -- Basic l_ prefix demangling -- ============================================================================ @@ -44,15 +164,18 @@ private def mkName (parts : List (String ⊕ Nat)) : Name := -- ============================================================================ #eval do - -- Construct: lp_std_Lean_Meta_foo let mangled := Name.mangle `Lean.Meta.foo (s!"lp_{String.mangle "std"}_") checkSome "lp_ simple" (demangleSymbol mangled) "Lean.Meta.foo (std)" #eval do - -- Package with underscore: my_pkg let mangled := Name.mangle `Lean.Meta.foo (s!"lp_{String.mangle "my_pkg"}_") checkSome "lp_ underscore pkg" (demangleSymbol mangled) "Lean.Meta.foo (my_pkg)" +#eval do + let name := mkName [.inl "_private", .inl "X", .inr 0, .inl "Y", .inl "foo"] + let mangled := name.mangle (s!"lp_{String.mangle "pkg"}_") + checkSome "lp_ private decl" (demangleSymbol mangled) "Y.foo [private] (pkg)" + -- ============================================================================ -- _init_ prefixes -- ============================================================================ @@ -65,6 +188,11 @@ private def mkName (parts : List (String ⊕ Nat)) : Name := let mangled := "_init_" ++ name.mangle "l_" checkSome "init l_ private" (demangleSymbol mangled) "[init] Y.foo [private]" +#eval do + let mangled := Name.mangle `Lean.Meta.foo (s!"lp_{String.mangle "std"}_") + checkSome "init lp_" (demangleSymbol ("_init_" ++ mangled)) + "[init] Lean.Meta.foo (std)" + -- ============================================================================ -- initialize_ prefixes -- ============================================================================ @@ -74,6 +202,11 @@ private def mkName (parts : List (String ⊕ Nat)) : Name := #eval checkSome "initialize l_" (demangleSymbol "initialize_l_Lean_Meta_foo") "[module_init] Lean.Meta.foo" +#eval do + let mangled := Name.mangle `Lean.Meta.foo (s!"lp_{String.mangle "std"}_") + checkSome "initialize lp_" (demangleSymbol ("initialize_" ++ mangled)) + "[module_init] Lean.Meta.foo (std)" + -- ============================================================================ -- lean_apply_N and _lean_main -- ============================================================================ @@ -94,6 +227,12 @@ private def mkName (parts : List (String ⊕ Nat)) : Name := #eval checkSome "cold suffix plain" (demangleSymbol "l_Lean_Meta_foo.cold") "Lean.Meta.foo .cold" +#eval checkSome "cold lean_apply" + (demangleSymbol "lean_apply_5.cold.1") " .cold.1" + +#eval checkSome "cold lean_main" + (demangleSymbol "_lean_main.cold.1") "[lean] main .cold.1" + -- ============================================================================ -- Non-Lean symbols return none -- ============================================================================ @@ -134,10 +273,18 @@ private def mkName (parts : List (String ⊕ Nat)) : Name := let name := mkName [.inl "foo", .inl "_elam"] checkSome "elam" (demangleSymbol (name.mangle "l_")) "foo [λ]" +#eval do + let name := mkName [.inl "foo", .inl "_elam_1"] + checkSome "elam indexed" (demangleSymbol (name.mangle "l_")) "foo [λ]" + #eval do let name := mkName [.inl "foo", .inl "_jp"] checkSome "jp" (demangleSymbol (name.mangle "l_")) "foo [jp]" +#eval do + let name := mkName [.inl "foo", .inl "_jp_3"] + checkSome "jp indexed" (demangleSymbol (name.mangle "l_")) "foo [jp]" + #eval do let name := mkName [.inl "myConst", .inl "_closed", .inr 0] checkSome "closed" (demangleSymbol (name.mangle "l_")) "myConst [closed]" @@ -151,6 +298,20 @@ private def mkName (parts : List (String ⊕ Nat)) : Name := checkSome "multiple suffixes" (demangleSymbol (name.mangle "l_")) "foo [boxed, arity↓]" +#eval do + -- _lam_0 followed by _boxed + let name := mkName [.inl "Lean", .inl "Meta", .inl "Simp", .inl "simpLambda", + .inl "_lam_0", .inl "_boxed"] + checkSome "lambda + boxed" (demangleSymbol (name.mangle "l_")) + "Lean.Meta.Simp.simpLambda [boxed, λ]" + +#eval do + -- _redArg followed by _lam_0 + let name := mkName [.inl "Lean", .inl "profileitIOUnsafe", + .inl "_redArg", .inl "_lam_0"] + checkSome "redArg + lam" (demangleSymbol (name.mangle "l_")) + "Lean.profileitIOUnsafe [λ, arity↓]" + -- ============================================================================ -- Postprocessing: private name stripping -- ============================================================================ @@ -201,6 +362,14 @@ private def mkName (parts : List (String ⊕ Nat)) : Name := checkSome "spec context flags" (demangleSymbol (name.mangle "l_")) "Lean.Meta.foo spec at Lean.Meta.bar[λ, arity↓]" +#eval do + -- Duplicate flag labels are deduplicated: _lam_0 + _elam_1 both map to λ + let name := mkName [.inl "f", .inl "_at_", + .inl "g", .inl "_lam_0", .inl "_elam_1", .inl "_redArg", + .inl "_spec", .inr 1] + checkSome "spec context flags dedup" (demangleSymbol (name.mangle "l_")) + "f spec at g[λ, arity↓]" + #eval do let name := mkName [.inl "f", .inl "_at_", .inl "g", .inl "_spec", .inr 1, @@ -208,6 +377,44 @@ private def mkName (parts : List (String ⊕ Nat)) : Name := checkSome "multiple at" (demangleSymbol (name.mangle "l_")) "f spec at g spec at h" +#eval do + -- Multiple spec at with flags on base and contexts + let name := mkName [.inl "f", + .inl "_at_", .inl "g", .inl "_redArg", .inl "_spec", .inr 1, + .inl "_at_", .inl "h", .inl "_lam_0", .inl "_spec", .inr 2, + .inl "_boxed"] + checkSome "multiple at with flags" (demangleSymbol (name.mangle "l_")) + "f [boxed] spec at g[arity↓] spec at h[λ]" + +#eval do + -- Base trailing suffix appearing after _spec N + let name := mkName [.inl "f", .inl "_at_", .inl "g", .inl "_spec", .inr 1, + .inl "_lam_0"] + checkSome "base flags after spec" (demangleSymbol (name.mangle "l_")) + "f [λ] spec at g" + +#eval do + -- spec_0 entries in context should be stripped + let name := mkName [.inl "Lean", .inl "Meta", .inl "transformWithCache", .inl "visit", + .inl "_at_", + .inl "_private", .inl "Lean", .inl "Meta", .inl "Transform", .inr 0, + .inl "Lean", .inl "Meta", .inl "transform", + .inl "Lean", .inl "Meta", .inl "Sym", .inl "unfoldReducible", + .inl "spec_0", .inl "spec_0", + .inl "_spec", .inr 1] + checkSome "spec context strip spec_0" (demangleSymbol (name.mangle "l_")) + "Lean.Meta.transformWithCache.visit spec at Lean.Meta.transform.Lean.Meta.Sym.unfoldReducible" + +#eval do + -- _private in spec context should be stripped + let name := mkName [.inl "Array", .inl "mapMUnsafe", .inl "map", + .inl "_at_", + .inl "_private", .inl "Lean", .inl "Meta", .inl "Transform", .inr 0, + .inl "Lean", .inl "Meta", .inl "transformWithCache", .inl "visit", + .inl "_spec", .inr 1] + checkSome "spec context strip private" (demangleSymbol (name.mangle "l_")) + "Array.mapMUnsafe.map spec at Lean.Meta.transformWithCache.visit" + -- ============================================================================ -- Complex real-world name -- ============================================================================ @@ -237,6 +444,22 @@ private def mkName (parts : List (String ⊕ Nat)) : Name := (demangleBtLine s!"./lean({sym}+0x10) [0x7fff]") "./lean(foo [boxed]+0x10) [0x7fff]" +#eval do + let name := mkName [.inl "_private", .inl "Lean", .inl "Meta", .inl "Basic", + .inr 0, .inl "Lean", .inl "Meta", .inl "foo"] + let sym := name.mangle "l_" + checkSome "bt linux private" + (demangleBtLine s!"./lean({sym}+0x10) [0x7fff]") + "./lean(Lean.Meta.foo [private]+0x10) [0x7fff]" + +#eval do + let name := mkName [.inl "_private", .inl "Lean", .inl "Meta", .inl "Basic", + .inr 0, .inl "Lean", .inl "Meta", .inl "foo", .inl "_redArg"] + let sym := name.mangle "l_" + checkSome "bt linux private + redArg" + (demangleBtLine s!"./lean({sym}+0x10) [0x7fff]") + "./lean(Lean.Meta.foo [arity↓, private]+0x10) [0x7fff]" + -- ============================================================================ -- Backtrace line parsing: macOS format -- ============================================================================ @@ -245,6 +468,13 @@ private def mkName (parts : List (String ⊕ Nat)) : Name := (demangleBtLine "3 lean 0x00000001001234 l_Lean_Meta_foo + 42") "3 lean 0x00000001001234 Lean.Meta.foo + 42" +#eval do + let name := mkName [.inl "foo", .inl "_redArg"] + let sym := name.mangle "l_" + checkSome "bt macos redArg" + (demangleBtLine s!"3 lean 0x00000001001234 {sym} + 42") + "3 lean 0x00000001001234 foo [arity↓] + 42" + -- ============================================================================ -- Backtrace line parsing: non-Lean lines unchanged -- ============================================================================ @@ -252,6 +482,9 @@ private def mkName (parts : List (String ⊕ Nat)) : Name := #eval checkNone "bt non-lean" (demangleBtLine "./lean(printf+0x10) [0x7fff]") +#eval checkNone "bt no parens" + (demangleBtLine "just some random text") + -- ============================================================================ -- Edge cases: never crashes -- ============================================================================ @@ -260,7 +493,12 @@ private def mkName (parts : List (String ⊕ Nat)) : Name := let inputs := #[ "", "l_", "lp_", "lp_x", "_init_", "initialize_", "l_____", "lp____", "l_00", "l_0", - "some random string", "l_ space" + "some random string", "l_ space", + "_init_l_", "_init_lp_", "_init_lp_x", + "initialize_l_", "initialize_lp_", "initialize_lp_x", + "lean_apply_", "lean_apply_x", "lean_apply_0", + "l_x", "l_0", "l_00", "l___", "lp_a_b", + ".cold", ".cold.1", "l_.cold", "l_x.cold.99" ] inputs.forM fun inp => do let _ := demangleSymbol inp @@ -268,7 +506,7 @@ private def mkName (parts : List (String ⊕ Nat)) : Name := pure () -- ============================================================================ --- C export wrappers (test via getD "" since CStr functions are not public) +-- C export wrappers -- ============================================================================ #eval check "export symbol" From bee68d41aacb2df86f0cfac099f28ae0d7be4045 Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Fri, 20 Feb 2026 04:46:15 +0000 Subject: [PATCH 4/7] fix: address further review feedback for name demangling This PR addresses review feedback from hargoniX and Kha: - add missing lean_dec(s) in the weak symbol stub (object.cpp) - add Char.isHexDigit to Init and use it instead of custom isHexChar - reuse Lean.NamePart instead of defining a local Component type - remove _lambda_N, _jp_N, _closed_N numbered suffix handling (compiler only produces _lam_N and _elam_N) Co-Authored-By: Claude Opus 4.6 --- src/Init/Data/Char/Basic.lean | 8 +++ src/Lean/Compiler/NameDemangling.lean | 76 ++++++++++++--------------- src/Lean/Data/NameTrie.lean | 1 + src/runtime/object.cpp | 1 + tests/lean/run/demangling.lean | 6 +-- 5 files changed, 46 insertions(+), 46 deletions(-) diff --git a/src/Init/Data/Char/Basic.lean b/src/Init/Data/Char/Basic.lean index f8aa9801aff0..2d52b6788e19 100644 --- a/src/Init/Data/Char/Basic.lean +++ b/src/Init/Data/Char/Basic.lean @@ -129,6 +129,14 @@ The ASCII digits are the following: `0123456789`. @[inline] def isDigit (c : Char) : Bool := c.val ≥ '0'.val && c.val ≤ '9'.val +/-- +Returns `true` if the character is an ASCII hexadecimal digit. + +The ASCII hexadecimal digits are the following: `0123456789abcdefABCDEF`. +-/ +@[inline] def isHexDigit (c : Char) : Bool := + c.isDigit || (c.val ≥ 'a'.val && c.val ≤ 'f'.val) || (c.val ≥ 'A'.val && c.val ≤ 'F'.val) + /-- Returns `true` if the character is an ASCII letter or digit. diff --git a/src/Lean/Compiler/NameDemangling.lean b/src/Lean/Compiler/NameDemangling.lean index 6442b961fecb..9c657acabcee 100644 --- a/src/Lean/Compiler/NameDemangling.lean +++ b/src/Lean/Compiler/NameDemangling.lean @@ -9,6 +9,7 @@ prelude import Init.While import Init.Data.String.TakeDrop import Init.Data.String.Search +import Lean.Data.NameTrie public import Lean.Compiler.NameMangling /-! Human-friendly demangling of Lean compiler symbol names, extending @@ -40,31 +41,23 @@ private def dropPrefix? (s : String) (pre : String) : Option String := private def isAllDigits (s : String) : Bool := !s.isEmpty && s.all (·.isDigit) -private def isHexChar (c : Char) : Bool := - c.isDigit || (c.val >= 0x61 && c.val <= 0x66) || (c.val >= 0x41 && c.val <= 0x46) - -private inductive Component where - | str (s : String) - | num (n : Nat) - deriving BEq, Repr, Inhabited - -private def nameToComponents (n : Name) : Array Component := +private def nameToNameParts (n : Name) : Array NamePart := go n [] |>.toArray where - go : Name → List Component → List Component + go : Name → List NamePart → List NamePart | .anonymous, acc => acc - | .str pre s, acc => go pre (Component.str s :: acc) - | .num pre n, acc => go pre (Component.num n :: acc) + | .str pre s, acc => go pre (NamePart.str s :: acc) + | .num pre n, acc => go pre (NamePart.num n :: acc) -private def formatComponents (comps : Array Component) : String := +private def formatNameParts (comps : Array NamePart) : String := comps.toList.map (fun - | Component.str s => s - | Component.num n => toString n) + | NamePart.str s => s + | NamePart.num n => toString n) |> String.intercalate "." -private def matchSuffix (c : Component) : Option String := +private def matchSuffix (c : NamePart) : Option String := match c with - | Component.str s => + | NamePart.str s => if s == "_redArg" then some "arity↓" else if s == "_boxed" then some "boxed" else if s == "_impl" then some "impl" @@ -72,24 +65,21 @@ private def matchSuffix (c : Component) : Option String := else if s == "_jp" then some "jp" else if s == "_closed" then some "closed" else if (dropPrefix? s "_lam_").any isAllDigits then some "λ" - else if (dropPrefix? s "_lambda_").any isAllDigits then some "λ" else if (dropPrefix? s "_elam_").any isAllDigits then some "λ" - else if (dropPrefix? s "_jp_").any isAllDigits then some "jp" - else if (dropPrefix? s "_closed_").any isAllDigits then some "closed" else none | _ => none -private def isSpecIndex (c : Component) : Bool := +private def isSpecIndex (c : NamePart) : Bool := match c with - | Component.str s => (dropPrefix? s "spec_").any isAllDigits + | NamePart.str s => (dropPrefix? s "spec_").any isAllDigits | _ => false -private def stripPrivate (comps : Array Component) (start stop : Nat) : +private def stripPrivate (comps : Array NamePart) (start stop : Nat) : Nat × Bool := - if stop - start >= 3 && comps[start]? == some (Component.str "_private") then + if stop - start >= 3 && comps[start]? == some (NamePart.str "_private") then Id.run do for i in [start + 1 : stop] do - if comps[i]? == some (Component.num 0) then + if comps[i]? == some (NamePart.num 0) then if i + 1 < stop then return (i + 1, true) else return (start, false) return (start, false) @@ -100,11 +90,11 @@ private structure SpecEntry where name : String flags : Array String -private def processSpecContext (comps : Array Component) : SpecEntry := Id.run do +private def processSpecContext (comps : Array NamePart) : SpecEntry := Id.run do let mut begin_ := 0 - if comps.size >= 3 && comps[0]? == some (Component.str "_private") then + if comps.size >= 3 && comps[0]? == some (NamePart.str "_private") then for i in [1:comps.size] do - if comps[i]? == some (Component.num 0) && i + 1 < comps.size then + if comps[i]? == some (NamePart.num 0) && i + 1 < comps.size then begin_ := i + 1 break let mut nameParts : Array String := #[] @@ -118,11 +108,11 @@ private def processSpecContext (comps : Array Component) : SpecEntry := Id.run d | none => if isSpecIndex c then pure () else match c with - | Component.str s => nameParts := nameParts.push s - | Component.num n => nameParts := nameParts.push (toString n) + | NamePart.str s => nameParts := nameParts.push s + | NamePart.num n => nameParts := nameParts.push (toString n) { name := String.intercalate "." nameParts.toList, flags } -private def postprocessComponents (components : Array Component) : String := Id.run do +private def postprocessNameParts (components : Array NamePart) : String := Id.run do if components.isEmpty then return "" let (privStart, isPrivate) := stripPrivate components 0 components.size @@ -131,7 +121,7 @@ private def postprocessComponents (components : Array Component) : String := Id. -- Strip hygienic suffixes (_@ onward) for i in [:parts.size] do match parts[i]! with - | Component.str s => + | NamePart.str s => if s.startsWith "_@" then parts := parts.extract 0 i break @@ -141,7 +131,7 @@ private def postprocessComponents (components : Array Component) : String := Id. let mut specEntries : Array SpecEntry := #[] let mut firstAt : Option Nat := none for i in [:parts.size] do - if parts[i]! == Component.str "_at_" then + if parts[i]! == NamePart.str "_at_" then firstAt := some i break @@ -149,9 +139,9 @@ private def postprocessComponents (components : Array Component) : String := Id. let base := parts.extract 0 fa let rest := parts.extract fa parts.size - let mut entries : Array (Array Component) := #[] - let mut currentCtx : Option (Array Component) := none - let mut remaining : Array Component := #[] + let mut entries : Array (Array NamePart) := #[] + let mut currentCtx : Option (Array NamePart) := none + let mut remaining : Array NamePart := #[] let mut skipNext := false for i in [:rest.size] do @@ -159,16 +149,16 @@ private def postprocessComponents (components : Array Component) : String := Id. skipNext := false continue let p := rest[i]! - if p == Component.str "_at_" then + if p == NamePart.str "_at_" then if let some ctx := currentCtx then entries := entries.push ctx currentCtx := some #[] - else if p == Component.str "_spec" then + else if p == NamePart.str "_spec" then if let some ctx := currentCtx then entries := entries.push ctx currentCtx := none skipNext := true - else if match p with | Component.str s => s.startsWith "_spec" | _ => false then + else if match p with | NamePart.str s => s.startsWith "_spec" | _ => false then if let some ctx := currentCtx then entries := entries.push ctx currentCtx := none @@ -199,7 +189,7 @@ private def postprocessComponents (components : Array Component) : String := Id. parts := parts.pop | none => match last with - | Component.num _ => + | NamePart.num _ => if parts.size >= 2 then match matchSuffix parts[parts.size - 2]! with | some flag => @@ -212,7 +202,7 @@ private def postprocessComponents (components : Array Component) : String := Id. if isPrivate then flags := flags.push "private" - let name := if parts.isEmpty then "?" else formatComponents parts + let name := if parts.isEmpty then "?" else formatNameParts parts let mut result := name if !flags.isEmpty then @@ -288,7 +278,7 @@ private def stripColdSuffix (s : String) : String × String := Id.run do private def demangleBody (body : String) : String := let name := Name.demangle body - postprocessComponents (nameToComponents name) + postprocessNameParts (nameToNameParts name) private def demangleCore (s : String) : Option String := do -- _init_l_ @@ -381,7 +371,7 @@ private def extractSymbol (line : String) : -- Skip hex digits let mut j := rawNext line pos1 for _ in [:len] do - if rawAtEnd line j || !isHexChar (rawGet line j) then break + if rawAtEnd line j || !(rawGet line j).isHexDigit then break j := rawNext line j -- Skip spaces for _ in [:len] do diff --git a/src/Lean/Data/NameTrie.lean b/src/Lean/Data/NameTrie.lean index f58f7d150a30..11823d0c40f8 100644 --- a/src/Lean/Data/NameTrie.lean +++ b/src/Lean/Data/NameTrie.lean @@ -16,6 +16,7 @@ namespace Lean inductive NamePart | str (s : String) | num (n : Nat) + deriving BEq, Inhabited instance : ToString NamePart where toString diff --git a/src/runtime/object.cpp b/src/runtime/object.cpp index 4baea2913571..48229b6d4f4b 100644 --- a/src/runtime/object.cpp +++ b/src/runtime/object.cpp @@ -35,6 +35,7 @@ Author: Leonardo de Moura // Declared as a weak symbol so leanrt doesn't require libLean at link time. // When the Lean demangler is linked in, it overrides this stub. extern "C" __attribute__((weak)) lean_object * lean_demangle_bt_line_cstr(lean_object * s) { + lean_dec(s); return lean_mk_string(""); } #endif diff --git a/tests/lean/run/demangling.lean b/tests/lean/run/demangling.lean index 6a536474dd21..8d53585eed14 100644 --- a/tests/lean/run/demangling.lean +++ b/tests/lean/run/demangling.lean @@ -267,7 +267,7 @@ private def checkRoundTrip (label : String) (parts : List (String ⊕ Nat)) : IO #eval do let name := mkName [.inl "foo", .inl "_lambda_2"] - checkSome "lambda_2" (demangleSymbol (name.mangle "l_")) "foo [λ]" + checkSome "lambda_2" (demangleSymbol (name.mangle "l_")) "foo._lambda_2" #eval do let name := mkName [.inl "foo", .inl "_elam"] @@ -283,7 +283,7 @@ private def checkRoundTrip (label : String) (parts : List (String ⊕ Nat)) : IO #eval do let name := mkName [.inl "foo", .inl "_jp_3"] - checkSome "jp indexed" (demangleSymbol (name.mangle "l_")) "foo [jp]" + checkSome "jp indexed" (demangleSymbol (name.mangle "l_")) "foo._jp_3" #eval do let name := mkName [.inl "myConst", .inl "_closed", .inr 0] @@ -291,7 +291,7 @@ private def checkRoundTrip (label : String) (parts : List (String ⊕ Nat)) : IO #eval do let name := mkName [.inl "myConst", .inl "_closed_0"] - checkSome "closed indexed" (demangleSymbol (name.mangle "l_")) "myConst [closed]" + checkSome "closed indexed" (demangleSymbol (name.mangle "l_")) "myConst._closed_0" #eval do let name := mkName [.inl "foo", .inl "_redArg", .inl "_boxed"] From 8b27fdb2d670ccf903cdae0fbb96687c9168f8f3 Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Fri, 20 Feb 2026 05:42:49 +0000 Subject: [PATCH 5/7] refactor: replace shadow string API with modern String API in name demangling This PR removes the custom raw-position string helpers (rawGet, rawNext, rawAtEnd, rawExtract, rawEnd) and rewrites all string traversal to use the modern Lean 4 String API: s.positions iterator, s.Pos with proofs, s.find? with char/predicate/substring patterns, s.extract, and a skipWhile helper with termination proof. Net reduction of 52 lines. Co-Authored-By: Claude Opus 4.6 --- src/Lean/Compiler/NameDemangling.lean | 173 +++++++++----------------- 1 file changed, 59 insertions(+), 114 deletions(-) diff --git a/src/Lean/Compiler/NameDemangling.lean b/src/Lean/Compiler/NameDemangling.lean index 9c657acabcee..6f8881a616ac 100644 --- a/src/Lean/Compiler/NameDemangling.lean +++ b/src/Lean/Compiler/NameDemangling.lean @@ -9,6 +9,7 @@ prelude import Init.While import Init.Data.String.TakeDrop import Init.Data.String.Search +import Init.Data.String.Iterate import Lean.Data.NameTrie public import Lean.Compiler.NameMangling @@ -18,22 +19,6 @@ line parsing. Called from the C runtime via `@[export]` for backtrace display. - namespace Lean.Name.Demangle -private abbrev RawPos := String.Pos.Raw - -private def rawGet (s : String) (i : RawPos) : Char := - String.Pos.Raw.get s i - -private def rawNext (s : String) (i : RawPos) : RawPos := - String.Pos.Raw.next s i - -private def rawAtEnd (s : String) (i : RawPos) : Bool := - i.byteIdx >= s.utf8ByteSize - -private def rawExtract (s : String) (b e : RawPos) : String := - String.Pos.Raw.extract s b e - -private def rawEnd (s : String) : RawPos := ⟨s.utf8ByteSize⟩ - /-- `String.dropPrefix?` returning a `String` instead of a `Slice`. -/ private def dropPrefix? (s : String) (pre : String) : Option String := (s.dropPrefix? pre).map (·.toString) @@ -218,37 +203,30 @@ private def postprocessNameParts (components : Array NamePart) : String := Id.ru return result -private def hasUpperStart (s : String) : Bool := Id.run do - if s.isEmpty then return false - let mut pos : RawPos := ⟨0⟩ - -- Skip 00 disambiguation - if s.utf8ByteSize >= 2 && rawGet s ⟨0⟩ == '0' && rawGet s ⟨1⟩ == '0' then - pos := ⟨2⟩ - -- Skip leading underscores - for _ in [:s.utf8ByteSize] do - if rawAtEnd s pos || rawGet s pos != '_' then break - pos := rawNext s pos - if rawAtEnd s pos then return false - return (rawGet s pos).isUpper +private def hasUpperStart (s : String) : Bool := + let s := ((s.dropPrefix? "00").map (·.toString)).getD s + go s s.startPos +where + go (s : String) (pos : s.Pos) : Bool := + if h : pos = s.endPos then false + else if pos.get h == '_' then go s (pos.next h) + else (pos.get h).isUpper + termination_by pos private def findLpSplit (s : String) : Option (String × String) := Id.run do - let endPos := rawEnd s let mut validSplits : Array (String × String × Bool) := #[] - let mut pos : RawPos := ⟨0⟩ - for _ in [:s.utf8ByteSize] do - if rawAtEnd s pos then break - if rawGet s pos == '_' && pos.byteIdx > 0 then - let nextByte := rawNext s pos - if !rawAtEnd s nextByte then - let pkg := rawExtract s ⟨0⟩ pos - let body := rawExtract s nextByte endPos - -- Package must be a valid single-component mangled name - let validPkg := match Name.demangle? pkg with - | some (.str .anonymous _) => true - | _ => false - if validPkg && (Name.demangle? body).isSome then - validSplits := validSplits.push (pkg, body, hasUpperStart body) - pos := rawNext s pos + for ⟨pos, h⟩ in s.positions do + if pos.get h == '_' && pos ≠ s.startPos then + let nextPos := pos.next h + if nextPos == s.endPos then continue + let pkg := s.extract s.startPos pos + let body := s.extract nextPos s.endPos + -- Package must be a valid single-component mangled name + let validPkg := match Name.demangle? pkg with + | some (.str .anonymous _) => true + | _ => false + if validPkg && (Name.demangle? body).isSome then + validSplits := validSplits.push (pkg, body, hasUpperStart body) if validSplits.isEmpty then return none -- Prefer: shortest valid package (first split point). -- Among splits where body starts uppercase, pick the first. @@ -264,17 +242,10 @@ private def unmanglePkg (s : String) : String := | .str .anonymous s => s | _ => s -private def stripColdSuffix (s : String) : String × String := Id.run do - let endPos := rawEnd s - let mut pos : RawPos := ⟨0⟩ - for _ in [:s.utf8ByteSize] do - if rawAtEnd s pos then break - if rawGet s pos == '.' then - let rest := rawExtract s pos endPos - if rest.startsWith ".cold" then - return (rawExtract s ⟨0⟩ pos, rest) - pos := rawNext s pos - return (s, "") +private def stripColdSuffix (s : String) : String × String := + match s.find? ".cold" with + | some pos => (s.extract s.startPos pos, s.extract pos s.endPos) + | none => (s, "") private def demangleBody (body : String) : String := let name := Name.demangle body @@ -336,67 +307,41 @@ public def demangleSymbol (symbol : String) : Option String := do if coldSuffix.isEmpty then return result else return s!"{result} {coldSuffix}" +private def skipWhile (s : String) (pos : s.Pos) (pred : Char → Bool) : s.Pos := + if h : pos = s.endPos then pos + else if pred (pos.get h) then skipWhile s (pos.next h) pred + else pos +termination_by pos + +private def splitAt₂ (s : String) (p₁ p₂ : s.Pos) : String × String × String := + (s.extract s.startPos p₁, s.extract p₁ p₂, s.extract p₂ s.endPos) + /-- Extract the symbol from a backtrace line (Linux glibc or macOS format). -/ private def extractSymbol (line : String) : - Option (String × String × String) := Id.run do - let endPos := rawEnd line - let len := line.utf8ByteSize - -- Try Linux glibc: ./lean(SYMBOL+0x2a) [0x555...] - let mut pos : RawPos := ⟨0⟩ - for _ in [:len] do - if rawAtEnd line pos then break - if rawGet line pos == '(' then - let symStart := rawNext line pos - let mut j := symStart - for _ in [:len] do - if rawAtEnd line j then break - let c := rawGet line j - if c == '+' || c == ')' then - if j.byteIdx > symStart.byteIdx then - return some (rawExtract line ⟨0⟩ symStart, - rawExtract line symStart j, - rawExtract line j endPos) - return none - j := rawNext line j - return none - pos := rawNext line pos - - -- Try macOS: N lib 0xADDR SYMBOL + offset - pos := ⟨0⟩ - for _ in [:len] do - if rawAtEnd line pos then break - if rawGet line pos == '0' then - let pos1 := rawNext line pos - if !rawAtEnd line pos1 && rawGet line pos1 == 'x' then - -- Skip hex digits - let mut j := rawNext line pos1 - for _ in [:len] do - if rawAtEnd line j || !(rawGet line j).isHexDigit then break - j := rawNext line j - -- Skip spaces - for _ in [:len] do - if rawAtEnd line j || rawGet line j != ' ' then break - j := rawNext line j - if rawAtEnd line j then return none - let symStart := j - -- Find " + " or end - for _ in [:len] do - if rawAtEnd line j then break - if rawGet line j == ' ' then - let j1 := rawNext line j - if !rawAtEnd line j1 && rawGet line j1 == '+' then - let j2 := rawNext line j1 - if !rawAtEnd line j2 && rawGet line j2 == ' ' then - break - j := rawNext line j - if j.byteIdx > symStart.byteIdx then - return some (rawExtract line ⟨0⟩ symStart, - rawExtract line symStart j, - rawExtract line j endPos) - return none - pos := rawNext line pos - - return none + Option (String × String × String) := + tryLinux line |>.orElse (fun _ => tryMacOS line) +where + -- Linux glibc: ./lean(SYMBOL+0x2a) [0x555...] + tryLinux (line : String) : Option (String × String × String) := do + let parenPos ← line.find? '(' + if h : parenPos = line.endPos then none else + let symStart := parenPos.next h + let delimPos ← symStart.find? (fun c => c == '+' || c == ')') + if delimPos == symStart then none else + some (splitAt₂ line symStart delimPos) + -- macOS: N lib 0xADDR SYMBOL + offset + tryMacOS (line : String) : Option (String × String × String) := do + let zxPos ← line.find? "0x" + if h : zxPos = line.endPos then none else + let afterZero := zxPos.next h + if h2 : afterZero = line.endPos then none else + let afterX := afterZero.next h2 + let afterHex := skipWhile line afterX (·.isHexDigit) + let symStart := skipWhile line afterHex (· == ' ') + if symStart == line.endPos then none else + let symEnd := (symStart.find? " + ").getD line.endPos + if symEnd == symStart then none else + some (splitAt₂ line symStart symEnd) public def demangleBtLine (line : String) : Option String := do let (pfx, sym, sfx) ← extractSymbol line From 1c37c3b73433cb428ea525dfde6c9f36d0a045e9 Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Fri, 20 Feb 2026 10:03:36 +0000 Subject: [PATCH 6/7] fix: remove reentrancy guard from backtrace demangler The g_in_demangle guard was preventing recursive demangling if the demangler itself panicked. The demangler is pure string manipulation that should not panic, and if it does we'd rather discover the bug than silently fall back to raw output. Co-Authored-By: Claude Opus 4.6 --- src/runtime/object.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/runtime/object.cpp b/src/runtime/object.cpp index 48229b6d4f4b..aefb0064abe1 100644 --- a/src/runtime/object.cpp +++ b/src/runtime/object.cpp @@ -136,8 +136,6 @@ static void panic_eprintln(char const * line, bool force_stderr) { panic_eprintln(line, strlen(line), force_stderr); } -static bool g_in_demangle = false; - static void print_backtrace(bool force_stderr) { #if LEAN_SUPPORTS_BACKTRACE void * bt_buf[100]; @@ -145,12 +143,10 @@ static void print_backtrace(bool force_stderr) { if (char ** symbols = backtrace_symbols(bt_buf, nptrs)) { bool raw = getenv("LEAN_BACKTRACE_RAW"); for (int i = 0; i < nptrs; i++) { - if (!raw && !g_in_demangle) { - g_in_demangle = true; + if (!raw) { lean_object * line_obj = lean_mk_string(symbols[i]); lean_object * result = lean_demangle_bt_line_cstr(line_obj); char const * result_str = lean_string_cstr(result); - g_in_demangle = false; if (result_str[0] != '\0') { panic_eprintln(result_str, force_stderr); lean_dec(result); From 25e8e043b8898bfde4b6a12efb6e290363df04cd Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Fri, 6 Mar 2026 05:05:38 +0000 Subject: [PATCH 7/7] fix: address review feedback for name demangling (round 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR addresses three issues raised in review: 1. Use `Name.toString` for correct escaping of demangled names (#9): replace manual `String.intercalate "."` formatting with `Name.toString`, which handles `«»` escaping for names containing special characters. Add `namePartsToName` helper and guard `formatNameParts` against empty input. 2. Use `Name.demangle?` for robust `lp_` package splitting (#8): replace `findLpSplit`/`hasUpperStart`/`unmanglePkg` with a simpler `demangleWithPkg` that tries each underscore as a split point and validates both package and body via `Name.demangle?` round-trip. 3. Fix suffix flags lost after hygienic sections: move suffix flag collection (`_boxed`, `_lam`, etc.) before `_@` hygienic suffix stripping, so flags appearing after hygienic sections are recognized. Co-Authored-By: Claude Opus 4.6 --- src/Lean/Compiler/NameDemangling.lean | 126 +++++++++++--------------- tests/lean/run/demangling.lean | 20 ++++ 2 files changed, 75 insertions(+), 71 deletions(-) diff --git a/src/Lean/Compiler/NameDemangling.lean b/src/Lean/Compiler/NameDemangling.lean index 6f8881a616ac..235a6006d46d 100644 --- a/src/Lean/Compiler/NameDemangling.lean +++ b/src/Lean/Compiler/NameDemangling.lean @@ -34,11 +34,15 @@ where | .str pre s, acc => go pre (NamePart.str s :: acc) | .num pre n, acc => go pre (NamePart.num n :: acc) +private def namePartsToName (parts : Array NamePart) : Name := + parts.foldl (fun acc p => + match p with + | .str s => acc.mkStr s + | .num n => acc.mkNum n) .anonymous + +/-- Format name parts using `Name.toString` for correct escaping. -/ private def formatNameParts (comps : Array NamePart) : String := - comps.toList.map (fun - | NamePart.str s => s - | NamePart.num n => toString n) - |> String.intercalate "." + if comps.isEmpty then "" else (namePartsToName comps).toString private def matchSuffix (c : NamePart) : Option String := match c with @@ -82,7 +86,7 @@ private def processSpecContext (comps : Array NamePart) : SpecEntry := Id.run do if comps[i]? == some (NamePart.num 0) && i + 1 < comps.size then begin_ := i + 1 break - let mut nameParts : Array String := #[] + let mut parts : Array NamePart := #[] let mut flags : Array String := #[] for i in [begin_:comps.size] do let c := comps[i]! @@ -92,10 +96,8 @@ private def processSpecContext (comps : Array NamePart) : SpecEntry := Id.run do flags := flags.push flag | none => if isSpecIndex c then pure () - else match c with - | NamePart.str s => nameParts := nameParts.push s - | NamePart.num n => nameParts := nameParts.push (toString n) - { name := String.intercalate "." nameParts.toList, flags } + else parts := parts.push c + { name := formatNameParts parts, flags } private def postprocessNameParts (components : Array NamePart) : String := Id.run do if components.isEmpty then return "" @@ -103,6 +105,31 @@ private def postprocessNameParts (components : Array NamePart) : String := Id.ru let (privStart, isPrivate) := stripPrivate components 0 components.size let mut parts := components.extract privStart components.size + -- Collect suffix flags from the end first, before stripping hygienic suffixes, + -- since _boxed etc. may appear after _@ sections. + let mut flags : Array String := #[] + let mut cont := true + while cont && !parts.isEmpty do + let last := parts.back! + match matchSuffix last with + | some flag => + flags := flags.push flag + parts := parts.pop + | none => + match last with + | NamePart.num _ => + if parts.size >= 2 then + match matchSuffix parts[parts.size - 2]! with + | some flag => + flags := flags.push flag + parts := parts.pop.pop + | none => cont := false + else cont := false + | _ => cont := false + + if isPrivate then + flags := flags.push "private" + -- Strip hygienic suffixes (_@ onward) for i in [:parts.size] do match parts[i]! with @@ -163,30 +190,6 @@ private def postprocessNameParts (components : Array NamePart) : String := Id.ru parts := base ++ remaining - -- Collect suffix flags from the end - let mut flags : Array String := #[] - let mut cont := true - while cont && !parts.isEmpty do - let last := parts.back! - match matchSuffix last with - | some flag => - flags := flags.push flag - parts := parts.pop - | none => - match last with - | NamePart.num _ => - if parts.size >= 2 then - match matchSuffix parts[parts.size - 2]! with - | some flag => - flags := flags.push flag - parts := parts.pop.pop - | none => cont := false - else cont := false - | _ => cont := false - - if isPrivate then - flags := flags.push "private" - let name := if parts.isEmpty then "?" else formatNameParts parts let mut result := name @@ -203,54 +206,35 @@ private def postprocessNameParts (components : Array NamePart) : String := Id.ru return result -private def hasUpperStart (s : String) : Bool := - let s := ((s.dropPrefix? "00").map (·.toString)).getD s - go s s.startPos -where - go (s : String) (pos : s.Pos) : Bool := - if h : pos = s.endPos then false - else if pos.get h == '_' then go s (pos.next h) - else (pos.get h).isUpper - termination_by pos - -private def findLpSplit (s : String) : Option (String × String) := Id.run do - let mut validSplits : Array (String × String × Bool) := #[] +private def demangleBody (body : String) : String := + let name := Name.demangle body + postprocessNameParts (nameToNameParts name) + +/-- Split a `lp_`-prefixed symbol into (demangled body, package name). +Tries each underscore as a split point; the first valid split (shortest single-component +package where the remainder is a valid mangled name) is correct. -/ +private def demangleWithPkg (s : String) : Option (String × String) := do for ⟨pos, h⟩ in s.positions do if pos.get h == '_' && pos ≠ s.startPos then let nextPos := pos.next h if nextPos == s.endPos then continue let pkg := s.extract s.startPos pos let body := s.extract nextPos s.endPos - -- Package must be a valid single-component mangled name let validPkg := match Name.demangle? pkg with | some (.str .anonymous _) => true | _ => false if validPkg && (Name.demangle? body).isSome then - validSplits := validSplits.push (pkg, body, hasUpperStart body) - if validSplits.isEmpty then return none - -- Prefer: shortest valid package (first split point). - -- Among splits where body starts uppercase, pick the first. - -- If no uppercase, still pick the first. - let upperSplits := validSplits.filter (·.2.2) - if !upperSplits.isEmpty then - return some (upperSplits[0]!.1, upperSplits[0]!.2.1) - else - return some (validSplits[0]!.1, validSplits[0]!.2.1) - -private def unmanglePkg (s : String) : String := - match Name.demangle s with - | .str .anonymous s => s - | _ => s + let pkgName := match Name.demangle pkg with + | .str .anonymous s => s + | _ => pkg + return (demangleBody body, pkgName) + none private def stripColdSuffix (s : String) : String × String := match s.find? ".cold" with | some pos => (s.extract s.startPos pos, s.extract pos s.endPos) | none => (s, "") -private def demangleBody (body : String) : String := - let name := Name.demangle body - postprocessNameParts (nameToNameParts name) - private def demangleCore (s : String) : Option String := do -- _init_l_ if let some body := dropPrefix? s "_init_l_" then @@ -258,8 +242,8 @@ private def demangleCore (s : String) : Option String := do -- _init_lp_ if let some after := dropPrefix? s "_init_lp_" then - if let some (pkg, body) := findLpSplit after then - if !body.isEmpty then return s!"[init] {demangleBody body} ({unmanglePkg pkg})" + if let some (name, pkg) := demangleWithPkg after then + return s!"[init] {name} ({pkg})" -- initialize_l_ if let some body := dropPrefix? s "initialize_l_" then @@ -267,8 +251,8 @@ private def demangleCore (s : String) : Option String := do -- initialize_lp_ if let some after := dropPrefix? s "initialize_lp_" then - if let some (pkg, body) := findLpSplit after then - if !body.isEmpty then return s!"[module_init] {demangleBody body} ({unmanglePkg pkg})" + if let some (name, pkg) := demangleWithPkg after then + return s!"[module_init] {name} ({pkg})" -- initialize_ (bare module init) if let some body := dropPrefix? s "initialize_" then @@ -280,8 +264,8 @@ private def demangleCore (s : String) : Option String := do -- lp_ if let some after := dropPrefix? s "lp_" then - if let some (pkg, body) := findLpSplit after then - if !body.isEmpty then return s!"{demangleBody body} ({unmanglePkg pkg})" + if let some (name, pkg) := demangleWithPkg after then + return s!"{name} ({pkg})" none diff --git a/tests/lean/run/demangling.lean b/tests/lean/run/demangling.lean index 8d53585eed14..8a5f59b13e1f 100644 --- a/tests/lean/run/demangling.lean +++ b/tests/lean/run/demangling.lean @@ -171,6 +171,11 @@ private def checkRoundTrip (label : String) (parts : List (String ⊕ Nat)) : IO let mangled := Name.mangle `Lean.Meta.foo (s!"lp_{String.mangle "my_pkg"}_") checkSome "lp_ underscore pkg" (demangleSymbol mangled) "Lean.Meta.foo (my_pkg)" +#eval do + -- Package with escaped chars (hyphen → _x2d): split must not break mid-escape + let mangled := Name.mangle `Lean.Meta.foo (s!"lp_{String.mangle "my-pkg"}_") + checkSome "lp_ escaped pkg" (demangleSymbol mangled) "Lean.Meta.foo (my-pkg)" + #eval do let name := mkName [.inl "_private", .inl "X", .inr 0, .inl "Y", .inl "foo"] let mangled := name.mangle (s!"lp_{String.mangle "pkg"}_") @@ -338,6 +343,21 @@ private def checkRoundTrip (label : String) (parts : List (String ⊕ Nat)) : IO checkSome "hygienic strip" (demangleSymbol (name.mangle "l_")) "Lean.Meta.foo" +#eval do + -- _boxed after _@ hygienic section should still be recognized + let name := mkName [.inl "Lean", .inl "Meta", .inl "foo", .inl "_@", + .inl "Lean", .inl "Meta", .inl "_hyg", .inr 42, .inl "_boxed"] + checkSome "hygienic + boxed" (demangleSymbol (name.mangle "l_")) + "Lean.Meta.foo [boxed]" + +#eval do + -- _lam + _boxed after _@ should both be recognized + let name := mkName [.inl "Lean", .inl "initFn", .inl "_@", + .inl "Lean", .inl "Elab", .inl "_hyg", .inr 42, + .inl "_lam", .inr 0, .inl "_boxed"] + checkSome "hygienic + lam + boxed" (demangleSymbol (name.mangle "l_")) + "Lean.initFn [boxed, λ]" + -- ============================================================================ -- Postprocessing: specialization contexts -- ============================================================================