diff --git a/script/profiler/lean_demangle.py b/script/profiler/lean_demangle.py index a637a2f7b390..5599940517ca 100644 --- a/script/profiler/lean_demangle.py +++ b/script/profiler/lean_demangle.py @@ -1,9 +1,11 @@ #!/usr/bin/env python3 """ -Lean name demangler. +Lean name demangler — thin wrapper around the Lean CLI tool. -Demangles C symbol names produced by the Lean 4 compiler back into -readable Lean hierarchical names. +Spawns ``lean --run lean_demangle_cli.lean`` as a persistent subprocess +and communicates via stdin/stdout pipes. This ensures a single source +of truth for demangling logic (the Lean implementation in +``Lean.Compiler.NameDemangling``). Usage as a filter (like c++filt): echo "l_Lean_Meta_Sym_main" | python lean_demangle.py @@ -13,767 +15,68 @@ print(demangle_lean_name("l_Lean_Meta_Sym_main")) """ +import atexit +import os +import subprocess import sys - -# --------------------------------------------------------------------------- -# String.mangle / unmangle -# --------------------------------------------------------------------------- - -def _is_ascii_alnum(ch): - """Check if ch is an ASCII letter or digit (matching Lean's isAlpha/isDigit).""" - return ('a' <= ch <= 'z') or ('A' <= ch <= 'Z') or ('0' <= ch <= '9') - - -def mangle_string(s): - """Port of Lean's String.mangle: escape a single string for C identifiers.""" - result = [] - for ch in s: - if _is_ascii_alnum(ch): - result.append(ch) - elif ch == '_': - result.append('__') - else: - code = ord(ch) - if code < 0x100: - result.append('_x' + format(code, '02x')) - elif code < 0x10000: - result.append('_u' + format(code, '04x')) - else: - result.append('_U' + format(code, '08x')) - return ''.join(result) - - -def _parse_hex(s, pos, n): - """Parse n lowercase hex digits at pos. Returns (new_pos, value) or None.""" - if pos + n > len(s): - return None - val = 0 - for i in range(n): - c = s[pos + i] - if '0' <= c <= '9': - val = (val << 4) | (ord(c) - ord('0')) - elif 'a' <= c <= 'f': - val = (val << 4) | (ord(c) - ord('a') + 10) - else: - return None - return (pos + n, val) - - -# --------------------------------------------------------------------------- -# Name mangling (for round-trip verification) -# --------------------------------------------------------------------------- - -def _check_disambiguation(m): - """Port of Lean's checkDisambiguation: does mangled string m need a '00' prefix?""" - pos = 0 - while pos < len(m): - ch = m[pos] - if ch == '_': - pos += 1 - continue - if ch == 'x': - return _parse_hex(m, pos + 1, 2) is not None - if ch == 'u': - return _parse_hex(m, pos + 1, 4) is not None - if ch == 'U': - return _parse_hex(m, pos + 1, 8) is not None - if '0' <= ch <= '9': - return True - return False - # all underscores or empty - return True - - -def _need_disambiguation(prev_component, mangled_next): - """Port of Lean's needDisambiguation.""" - # Check if previous component (as a string) ends with '_' - prev_ends_underscore = (isinstance(prev_component, str) and - len(prev_component) > 0 and - prev_component[-1] == '_') - return prev_ends_underscore or _check_disambiguation(mangled_next) - - -def mangle_name(components, prefix="l_"): - """ - Mangle a list of name components (str or int) into a C symbol. - Port of Lean's Name.mangle. - """ - if not components: - return prefix - - parts = [] - prev = None - for i, comp in enumerate(components): - if isinstance(comp, int): - if i == 0: - parts.append(str(comp) + '_') - else: - parts.append('_' + str(comp) + '_') - else: - m = mangle_string(comp) - if i == 0: - if _check_disambiguation(m): - parts.append('00' + m) - else: - parts.append(m) - else: - if _need_disambiguation(prev, m): - parts.append('_00' + m) - else: - parts.append('_' + m) - prev = comp - - return prefix + ''.join(parts) - - -# --------------------------------------------------------------------------- -# Name demangling -# --------------------------------------------------------------------------- - -def demangle_body(s): - """ - Demangle a string produced by Name.mangleAux (without prefix). - Returns a list of components (str or int). - - This is a faithful port of Lean's Name.demangleAux from NameMangling.lean. - """ - components = [] - length = len(s) - - def emit(comp): - components.append(comp) - - def decode_num(pos, n): - """Parse remaining digits, emit numeric component, continue.""" - while pos < length: - ch = s[pos] - if '0' <= ch <= '9': - n = n * 10 + (ord(ch) - ord('0')) - pos += 1 - else: - # Expect '_' (trailing underscore of numeric encoding) - pos += 1 # skip '_' - emit(n) - if pos >= length: - return pos - # Skip separator '_' and go to name_start - pos += 1 - return name_start(pos) - # End of string - emit(n) - return pos - - def name_start(pos): - """Start parsing a new name component.""" - if pos >= length: - return pos - ch = s[pos] - pos += 1 - if '0' <= ch <= '9': - # Check for '00' disambiguation - if ch == '0' and pos < length and s[pos] == '0': - pos += 1 - return demangle_main(pos, "", 0) - else: - return decode_num(pos, ord(ch) - ord('0')) - elif ch == '_': - return demangle_main(pos, "", 1) - else: - return demangle_main(pos, ch, 0) - - def demangle_main(pos, acc, ucount): - """Main demangling loop.""" - while pos < length: - ch = s[pos] - pos += 1 - - if ch == '_': - ucount += 1 - continue - - if ucount % 2 == 0: - # Even underscores: literal underscores in component name - acc += '_' * (ucount // 2) + ch - ucount = 0 - continue - - # Odd ucount: separator or escape - if '0' <= ch <= '9': - # End current str component, start number - emit(acc + '_' * (ucount // 2)) - if ch == '0' and pos < length and s[pos] == '0': - pos += 1 - return demangle_main(pos, "", 0) - else: - return decode_num(pos, ord(ch) - ord('0')) - - # Try hex escapes - if ch == 'x': - result = _parse_hex(s, pos, 2) - if result is not None: - new_pos, val = result - acc += '_' * (ucount // 2) + chr(val) - pos = new_pos - ucount = 0 - continue - - if ch == 'u': - result = _parse_hex(s, pos, 4) - if result is not None: - new_pos, val = result - acc += '_' * (ucount // 2) + chr(val) - pos = new_pos - ucount = 0 - continue - - if ch == 'U': - result = _parse_hex(s, pos, 8) - if result is not None: - new_pos, val = result - acc += '_' * (ucount // 2) + chr(val) - pos = new_pos - ucount = 0 - continue - - # Name separator - emit(acc) - acc = '_' * (ucount // 2) + ch - ucount = 0 - - # End of string - acc += '_' * (ucount // 2) - if acc: - emit(acc) - return pos - - name_start(0) - return components - - -# --------------------------------------------------------------------------- -# Prefix handling for lp_ (package prefix) -# --------------------------------------------------------------------------- - -def _is_valid_string_mangle(s): - """Check if s is a valid output of String.mangle (no trailing bare _).""" - pos = 0 - length = len(s) - while pos < length: - ch = s[pos] - if _is_ascii_alnum(ch): - pos += 1 - elif ch == '_': - if pos + 1 >= length: - return False # trailing bare _ - nch = s[pos + 1] - if nch == '_': - pos += 2 - elif nch == 'x' and _parse_hex(s, pos + 2, 2) is not None: - pos = _parse_hex(s, pos + 2, 2)[0] - elif nch == 'u' and _parse_hex(s, pos + 2, 4) is not None: - pos = _parse_hex(s, pos + 2, 4)[0] - elif nch == 'U' and _parse_hex(s, pos + 2, 8) is not None: - pos = _parse_hex(s, pos + 2, 8)[0] - else: - return False - else: - return False - return True - - -def _skip_string_mangle(s, pos): - """ - Skip past a String.mangle output in s starting at pos. - Returns the position after the mangled string (where we expect the separator '_'). - This is a greedy scan. - """ - length = len(s) - while pos < length: - ch = s[pos] - if _is_ascii_alnum(ch): - pos += 1 - elif ch == '_': - if pos + 1 < length: - nch = s[pos + 1] - if nch == '_': - pos += 2 - elif nch == 'x' and _parse_hex(s, pos + 2, 2) is not None: - pos = _parse_hex(s, pos + 2, 2)[0] - elif nch == 'u' and _parse_hex(s, pos + 2, 4) is not None: - pos = _parse_hex(s, pos + 2, 4)[0] - elif nch == 'U' and _parse_hex(s, pos + 2, 8) is not None: - pos = _parse_hex(s, pos + 2, 8)[0] - else: - return pos # bare '_': separator - else: - return pos - else: - return pos - return pos - - -def _find_lp_body(s): - """ - Given s = everything after 'lp_' in a symbol, find where the declaration - body (Name.mangleAux output) starts. - Returns the start index of the body within s, or None. - - Strategy: try all candidate split points where the package part is a valid - String.mangle output and the body round-trips. Prefer the longest valid - package name (most specific match). - """ - length = len(s) - - # Collect candidate split positions: every '_' that could be the separator - candidates = [] - pos = 0 - while pos < length: - if s[pos] == '_': - candidates.append(pos) - pos += 1 - - # Try each candidate; collect all valid splits - valid_splits = [] - for split_pos in candidates: - pkg_part = s[:split_pos] - if not pkg_part: - continue - if not _is_valid_string_mangle(pkg_part): - continue - body = s[split_pos + 1:] - if not body: - continue - components = demangle_body(body) - if not components: - continue - remangled = mangle_name(components, prefix="") - if remangled == body: - first = components[0] - # Score: prefer first component starting with uppercase - has_upper = isinstance(first, str) and first and first[0].isupper() - valid_splits.append((split_pos, has_upper)) - - if valid_splits: - # Among splits where first decl component starts uppercase, pick longest pkg. - # Otherwise pick shortest pkg. - upper_splits = [s for s in valid_splits if s[1]] - if upper_splits: - best = max(upper_splits, key=lambda x: x[0]) - else: - best = min(valid_splits, key=lambda x: x[0]) - return best[0] + 1 - - # Fallback: greedy String.mangle scan - greedy_pos = _skip_string_mangle(s, 0) - if greedy_pos < length and s[greedy_pos] == '_': - return greedy_pos + 1 - - return None - - -# --------------------------------------------------------------------------- -# Format name components for display -# --------------------------------------------------------------------------- - -def format_name(components): - """Format a list of name components as a dot-separated string.""" - return '.'.join(str(c) for c in components) - - -# --------------------------------------------------------------------------- -# Human-friendly postprocessing -# --------------------------------------------------------------------------- - -# Compiler-generated suffix components — exact match -_SUFFIX_FLAGS_EXACT = { - '_redArg': 'arity\u2193', - '_boxed': 'boxed', - '_impl': 'impl', -} - -# Compiler-generated suffix prefixes — match with optional _N index -# e.g., _lam, _lam_0, _lam_3, _lambda_0, _closed_2 -_SUFFIX_FLAGS_PREFIX = { - '_lam': '\u03bb', - '_lambda': '\u03bb', - '_elam': '\u03bb', - '_jp': 'jp', - '_closed': 'closed', -} - - -def _match_suffix(component): - """ - Check if a string component is a compiler-generated suffix. - Returns the flag label or None. - - Handles both exact matches (_redArg, _boxed) and indexed suffixes - (_lam_0, _lambda_2, _closed_0) produced by appendIndexAfter. - """ - if not isinstance(component, str): - return None - if component in _SUFFIX_FLAGS_EXACT: - return _SUFFIX_FLAGS_EXACT[component] - if component in _SUFFIX_FLAGS_PREFIX: - return _SUFFIX_FLAGS_PREFIX[component] - # Check for indexed suffix: prefix + _N - for prefix, label in _SUFFIX_FLAGS_PREFIX.items(): - if component.startswith(prefix + '_'): - rest = component[len(prefix) + 1:] - if rest.isdigit(): - return label - return None - - -def _strip_private(components): - """Strip _private.Module.0. prefix. Returns (stripped_parts, is_private).""" - if (len(components) >= 3 and isinstance(components[0], str) and - components[0] == '_private'): - for i in range(1, len(components)): - if components[i] == 0: - if i + 1 < len(components): - return components[i + 1:], True - break - return components, False - - -def _strip_spec_suffixes(components): - """Strip trailing spec_N components (from appendIndexAfter).""" - parts = list(components) - while parts and isinstance(parts[-1], str) and parts[-1].startswith('spec_'): - rest = parts[-1][5:] - if rest.isdigit(): - parts.pop() - else: - break - return parts - - -def _is_spec_index(component): - """Check if a component is a spec_N index (from appendIndexAfter).""" - return (isinstance(component, str) and - component.startswith('spec_') and component[5:].isdigit()) - - -def _parse_spec_entries(rest): - """Parse _at_..._spec pairs into separate spec context entries. - - Given components starting from the first _at_, returns: - - entries: list of component lists, one per _at_..._spec block - - remaining: components after the last _spec N (trailing suffixes) - """ - entries = [] - current_ctx = None - remaining = [] - skip_next = False - - for p in rest: - if skip_next: - skip_next = False - continue - if isinstance(p, str) and p == '_at_': - if current_ctx is not None: - entries.append(current_ctx) - current_ctx = [] - continue - if isinstance(p, str) and p == '_spec': - if current_ctx is not None: - entries.append(current_ctx) - current_ctx = None - skip_next = True - continue - if isinstance(p, str) and p.startswith('_spec'): - if current_ctx is not None: - entries.append(current_ctx) - current_ctx = None - continue - if current_ctx is not None: - current_ctx.append(p) - else: - remaining.append(p) - - if current_ctx is not None: - entries.append(current_ctx) - - return entries, remaining - - -def _process_spec_context(components): - """Process a spec context into a clean name and its flags. - - Returns (name_parts, flags) where name_parts are the cleaned components - and flags is a deduplicated list of flag labels from compiler suffixes. - """ - parts = list(components) - parts, _ = _strip_private(parts) - - name_parts = [] - ctx_flags = [] - seen = set() - - for p in parts: - flag = _match_suffix(p) - if flag is not None: - if flag not in seen: - ctx_flags.append(flag) - seen.add(flag) - elif _is_spec_index(p): - pass - else: - name_parts.append(p) - - return name_parts, ctx_flags - - -def postprocess_name(components): - """ - Transform raw demangled components into a human-friendly display string. - - Applies: - - Private name cleanup: _private.Module.0.Name.foo -> Name.foo [private] - - Hygienic name cleanup: strips _@.module._hygCtx._hyg.N - - Suffix folding: _redArg, _boxed, _lam_0, etc. -> [flags] - - Specialization: f._at_.g._spec.N -> f spec at g - Shown after base [flags], with context flags: spec at g[ctx_flags] - """ - if not components: - return "" - - parts = list(components) - flags = [] - spec_entries = [] - - # --- Strip _private prefix --- - parts, is_private = _strip_private(parts) - - # --- Strip hygienic suffixes: everything from _@ onward --- - at_idx = None - for i, p in enumerate(parts): - if isinstance(p, str) and p.startswith('_@'): - at_idx = i - break - if at_idx is not None: - parts = parts[:at_idx] - - # --- Handle specialization: _at_ ... _spec N --- - at_positions = [i for i, p in enumerate(parts) - if isinstance(p, str) and p == '_at_'] - if at_positions: - first_at = at_positions[0] - base = parts[:first_at] - rest = parts[first_at:] - - entries, remaining = _parse_spec_entries(rest) - for ctx_components in entries: - ctx_name, ctx_flags = _process_spec_context(ctx_components) - if ctx_name or ctx_flags: - spec_entries.append((ctx_name, ctx_flags)) - - parts = base + remaining - - # --- Collect suffix flags from the end --- - while parts: - last = parts[-1] - flag = _match_suffix(last) - if flag is not None: - flags.append(flag) - parts.pop() - elif isinstance(last, int) and len(parts) >= 2: - prev_flag = _match_suffix(parts[-2]) - if prev_flag is not None: - flags.append(prev_flag) - parts.pop() # remove the number - parts.pop() # remove the suffix - else: - break - else: - break - - if is_private: - flags.append('private') - - # --- Format result --- - name = '.'.join(str(c) for c in parts) if parts else '?' - result = name - if flags: - flag_str = ', '.join(flags) - result += f' [{flag_str}]' - - for ctx_name, ctx_flags in spec_entries: - ctx_str = '.'.join(str(c) for c in ctx_name) if ctx_name else '?' - if ctx_flags: - ctx_flag_str = ', '.join(ctx_flags) - result += f' spec at {ctx_str}[{ctx_flag_str}]' - else: - result += f' spec at {ctx_str}' - - return result - - -# --------------------------------------------------------------------------- -# Main demangling entry point -# --------------------------------------------------------------------------- - -def demangle_lean_name_raw(mangled): - """ - Demangle a Lean C symbol, preserving all internal name components. - - Returns the exact demangled name with all compiler-generated suffixes - intact. Use demangle_lean_name() for human-friendly output. - """ - try: - return _demangle_lean_name_inner(mangled, human_friendly=False) - except Exception: - return mangled +_process = None +_script_dir = os.path.dirname(os.path.abspath(__file__)) +_cli_script = os.path.join(_script_dir, "lean_demangle_cli.lean") + + +def _get_process(): + """Get or create the persistent Lean demangler subprocess.""" + global _process + if _process is not None and _process.poll() is None: + return _process + + lean = os.environ.get("LEAN", "lean") + _process = subprocess.Popen( + [lean, "--run", _cli_script], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + text=True, + bufsize=1, # line buffered + ) + atexit.register(_cleanup) + return _process + + +def _cleanup(): + global _process + if _process is not None: + try: + _process.stdin.close() + _process.wait(timeout=5) + except Exception: + _process.kill() + _process = None def demangle_lean_name(mangled): """ Demangle a C symbol name produced by the Lean 4 compiler. - Returns a human-friendly demangled name with compiler suffixes folded - into readable flags. Use demangle_lean_name_raw() to preserve all - internal components. + Returns a human-friendly demangled name, or the original string + if it is not a Lean symbol. """ try: - return _demangle_lean_name_inner(mangled, human_friendly=True) + proc = _get_process() + proc.stdin.write(mangled + "\n") + proc.stdin.flush() + result = proc.stdout.readline().rstrip("\n") + return result if result else mangled except Exception: return mangled -def _demangle_lean_name_inner(mangled, human_friendly=True): - """Inner demangle that may raise on malformed input.""" - - if mangled == "_lean_main": - return "[lean] main" - - # Handle lean_ runtime functions - if human_friendly and mangled.startswith("lean_apply_"): - rest = mangled[11:] - if rest.isdigit(): - return f"" - - # Strip .cold.N suffix (LLVM linker cold function clones) - cold_suffix = "" - core = mangled - dot_pos = core.find('.cold.') - if dot_pos >= 0: - cold_suffix = " " + core[dot_pos:] - core = core[:dot_pos] - elif core.endswith('.cold'): - cold_suffix = " .cold" - core = core[:-5] - - result = _demangle_core(core, human_friendly) - if result is None: - return mangled - return result + cold_suffix - - -def _demangle_core(mangled, human_friendly=True): - """Demangle a symbol without .cold suffix. Returns None if not a Lean name.""" - - fmt = postprocess_name if human_friendly else format_name - - # _init_ prefix - if mangled.startswith("_init_"): - rest = mangled[6:] - body, pkg_display = _strip_lean_prefix(rest) - if body is None: - return None - components = demangle_body(body) - if not components: - return None - name = fmt(components) - if pkg_display: - return f"[init] {name} ({pkg_display})" - return f"[init] {name}" - - # initialize_ prefix (module init functions) - if mangled.startswith("initialize_"): - rest = mangled[11:] - # With package: initialize_lp_{pkg}_{body} or initialize_l_{body} - body, pkg_display = _strip_lean_prefix(rest) - if body is not None: - components = demangle_body(body) - if components: - name = fmt(components) - if pkg_display: - return f"[module_init] {name} ({pkg_display})" - return f"[module_init] {name}" - # Without package: initialize_{Name.mangleAux(moduleName)} - if rest: - components = demangle_body(rest) - if components: - return f"[module_init] {fmt(components)}" - return None - - # l_ or lp_ prefix - body, pkg_display = _strip_lean_prefix(mangled) - if body is None: - return None - components = demangle_body(body) - if not components: - return None - name = fmt(components) - if pkg_display: - return f"{name} ({pkg_display})" - return name - - -def _strip_lean_prefix(s): - """ - Strip the l_ or lp_ prefix from a mangled symbol. - Returns (body, pkg_display) where body is the Name.mangleAux output - and pkg_display is None or a string describing the package. - Returns (None, None) if the string doesn't have a recognized prefix. - """ - if s.startswith("l_"): - return (s[2:], None) - - if s.startswith("lp_"): - after_lp = s[3:] - body_start = _find_lp_body(after_lp) - if body_start is not None: - pkg_mangled = after_lp[:body_start - 1] - # Unmangle the package name - pkg_components = demangle_body(pkg_mangled) - if pkg_components and len(pkg_components) == 1 and isinstance(pkg_components[0], str): - pkg_display = pkg_components[0] - else: - pkg_display = pkg_mangled - return (after_lp[body_start:], pkg_display) - # Fallback: treat everything after lp_ as body - return (after_lp, "?") - - return (None, None) - - -# --------------------------------------------------------------------------- -# CLI -# --------------------------------------------------------------------------- - def main(): - """Filter stdin or arguments, demangling Lean names.""" - import argparse - parser = argparse.ArgumentParser( - description="Demangle Lean 4 C symbol names (like c++filt for Lean)") - parser.add_argument('names', nargs='*', - help='Names to demangle (reads stdin if none given)') - parser.add_argument('--raw', action='store_true', - help='Output exact demangled names without postprocessing') - args = parser.parse_args() - - demangle = demangle_lean_name_raw if args.raw else demangle_lean_name - - if args.names: - for name in args.names: - print(demangle(name)) - else: - for line in sys.stdin: - print(demangle(line.rstrip('\n'))) + """Filter stdin, demangling Lean names.""" + for line in sys.stdin: + print(demangle_lean_name(line.rstrip("\n"))) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/script/profiler/lean_demangle_cli.lean b/script/profiler/lean_demangle_cli.lean new file mode 100644 index 000000000000..3221b1822ee1 --- /dev/null +++ b/script/profiler/lean_demangle_cli.lean @@ -0,0 +1,32 @@ +/- +Copyright (c) 2026 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Kim Morrison +-/ +module + +import Lean.Compiler.NameDemangling + +/-! +Lean name demangler CLI tool. Reads mangled symbol names from stdin (one per +line) and writes demangled names to stdout. Non-Lean symbols pass through +unchanged. Like `c++filt` but for Lean names. + +Usage: + echo "l_Lean_Meta_foo" | lean --run lean_demangle_cli.lean + cat symbols.txt | lean --run lean_demangle_cli.lean +-/ + +open Lean.Name.Demangle + +def main : IO Unit := do + let stdin ← IO.getStdin + let stdout ← IO.getStdout + repeat do + let line ← stdin.getLine + if line.isEmpty then break + let sym := line.trimRight + match demangleSymbol sym with + | some s => stdout.putStrLn s + | none => stdout.putStrLn sym + stdout.flush diff --git a/script/profiler/test_demangle.py b/script/profiler/test_demangle.py deleted file mode 100644 index effffaf0846c..000000000000 --- a/script/profiler/test_demangle.py +++ /dev/null @@ -1,670 +0,0 @@ -#!/usr/bin/env python3 -"""Tests for the Lean name demangler.""" - -import unittest -import json -import gzip -import tempfile -import os - -from lean_demangle import ( - mangle_string, mangle_name, demangle_body, format_name, - demangle_lean_name, demangle_lean_name_raw, postprocess_name, - _parse_hex, _check_disambiguation, -) - - -class TestStringMangle(unittest.TestCase): - """Test String.mangle (character-level escaping).""" - - def test_alphanumeric(self): - self.assertEqual(mangle_string("hello"), "hello") - self.assertEqual(mangle_string("abc123"), "abc123") - - def test_underscore(self): - self.assertEqual(mangle_string("a_b"), "a__b") - self.assertEqual(mangle_string("_"), "__") - self.assertEqual(mangle_string("__"), "____") - - def test_special_chars(self): - self.assertEqual(mangle_string("."), "_x2e") - self.assertEqual(mangle_string("a.b"), "a_x2eb") - - def test_unicode(self): - self.assertEqual(mangle_string("\u03bb"), "_u03bb") - self.assertEqual(mangle_string("\U0001d55c"), "_U0001d55c") - - def test_empty(self): - self.assertEqual(mangle_string(""), "") - - -class TestNameMangle(unittest.TestCase): - """Test Name.mangle (hierarchical name mangling).""" - - def test_simple(self): - self.assertEqual(mangle_name(["Lean", "Meta", "Sym", "main"]), - "l_Lean_Meta_Sym_main") - - def test_single_component(self): - self.assertEqual(mangle_name(["main"]), "l_main") - - def test_numeric_component(self): - self.assertEqual( - mangle_name(["_private", "Lean", "Meta", "Basic", 0, - "Lean", "Meta", "withMVarContextImp"]), - "l___private_Lean_Meta_Basic_0__Lean_Meta_withMVarContextImp") - - def test_component_with_underscore(self): - self.assertEqual(mangle_name(["a_b"]), "l_a__b") - self.assertEqual(mangle_name(["a_b", "c"]), "l_a__b_c") - - def test_disambiguation_digit_start(self): - self.assertEqual(mangle_name(["0foo"]), "l_000foo") - - def test_disambiguation_escape_start(self): - self.assertEqual(mangle_name(["a", "x27"]), "l_a_00x27") - - def test_numeric_root(self): - self.assertEqual(mangle_name([42]), "l_42_") - self.assertEqual(mangle_name([42, "foo"]), "l_42__foo") - - def test_component_ending_with_underscore(self): - self.assertEqual(mangle_name(["a_", "b"]), "l_a___00b") - - def test_custom_prefix(self): - self.assertEqual(mangle_name(["foo"], prefix="lp_pkg_"), - "lp_pkg_foo") - - -class TestDemangleBody(unittest.TestCase): - """Test demangle_body (the core Name.demangleAux algorithm).""" - - def test_simple(self): - self.assertEqual(demangle_body("Lean_Meta_Sym_main"), - ["Lean", "Meta", "Sym", "main"]) - - def test_single(self): - self.assertEqual(demangle_body("main"), ["main"]) - - def test_empty(self): - self.assertEqual(demangle_body(""), []) - - def test_underscore_in_component(self): - self.assertEqual(demangle_body("a__b"), ["a_b"]) - self.assertEqual(demangle_body("a__b_c"), ["a_b", "c"]) - - def test_numeric_component(self): - self.assertEqual(demangle_body("foo_42__bar"), ["foo", 42, "bar"]) - - def test_numeric_root(self): - self.assertEqual(demangle_body("42_"), [42]) - - def test_numeric_at_end(self): - self.assertEqual(demangle_body("foo_42_"), ["foo", 42]) - - def test_disambiguation_00(self): - self.assertEqual(demangle_body("a_00x27"), ["a", "x27"]) - - def test_disambiguation_00_at_root(self): - self.assertEqual(demangle_body("000foo"), ["0foo"]) - - def test_hex_escape_x(self): - self.assertEqual(demangle_body("a_x2eb"), ["a.b"]) - - def test_hex_escape_u(self): - self.assertEqual(demangle_body("_u03bb"), ["\u03bb"]) - - def test_hex_escape_U(self): - self.assertEqual(demangle_body("_U0001d55c"), ["\U0001d55c"]) - - def test_private_name(self): - body = "__private_Lean_Meta_Basic_0__Lean_Meta_withMVarContextImp" - self.assertEqual(demangle_body(body), - ["_private", "Lean", "Meta", "Basic", 0, - "Lean", "Meta", "withMVarContextImp"]) - - def test_boxed_suffix(self): - body = "foo___boxed" - self.assertEqual(demangle_body(body), ["foo", "_boxed"]) - - def test_redArg_suffix(self): - body = "foo_bar___redArg" - self.assertEqual(demangle_body(body), ["foo", "bar", "_redArg"]) - - def test_component_ending_underscore_disambiguation(self): - self.assertEqual(demangle_body("a___00b"), ["a_", "b"]) - - -class TestRoundTrip(unittest.TestCase): - """Test that mangle(demangle(x)) == x for various names.""" - - def _check_roundtrip(self, components): - mangled = mangle_name(components, prefix="") - demangled = demangle_body(mangled) - self.assertEqual(demangled, components, - f"Round-trip failed: {components} -> '{mangled}' -> {demangled}") - mangled_with_prefix = mangle_name(components, prefix="l_") - self.assertTrue(mangled_with_prefix.startswith("l_")) - body = mangled_with_prefix[2:] - demangled2 = demangle_body(body) - self.assertEqual(demangled2, components) - - def test_simple_names(self): - self._check_roundtrip(["Lean", "Meta", "main"]) - self._check_roundtrip(["a"]) - self._check_roundtrip(["Foo", "Bar", "baz"]) - - def test_numeric(self): - self._check_roundtrip(["foo", 0, "bar"]) - self._check_roundtrip([42]) - self._check_roundtrip(["a", 1, "b", 2, "c"]) - - def test_underscores(self): - self._check_roundtrip(["_private"]) - self._check_roundtrip(["a_b", "c_d"]) - self._check_roundtrip(["_at_", "_spec"]) - - def test_private_name(self): - self._check_roundtrip(["_private", "Lean", "Meta", "Basic", 0, - "Lean", "Meta", "withMVarContextImp"]) - - def test_boxed(self): - self._check_roundtrip(["Lean", "Meta", "foo", "_boxed"]) - - def test_redArg(self): - self._check_roundtrip(["Lean", "Meta", "foo", "_redArg"]) - - def test_specialization(self): - self._check_roundtrip(["List", "map", "_at_", "Foo", "bar", "_spec", 3]) - - def test_lambda(self): - self._check_roundtrip(["Foo", "bar", "_lambda", 0]) - self._check_roundtrip(["Foo", "bar", "_lambda", 2]) - - def test_closed(self): - self._check_roundtrip(["myConst", "_closed", 0]) - - def test_special_chars(self): - self._check_roundtrip(["a.b"]) - self._check_roundtrip(["\u03bb"]) - self._check_roundtrip(["a", "b\u2192c"]) - - def test_disambiguation_cases(self): - self._check_roundtrip(["a", "x27"]) - self._check_roundtrip(["0foo"]) - self._check_roundtrip(["a_", "b"]) - - def test_complex_real_names(self): - """Names modeled after real Lean compiler output.""" - self._check_roundtrip( - ["Lean", "MVarId", "withContext", "_at_", - "_private", "Lean", "Meta", "Sym", 0, - "Lean", "Meta", "Sym", "BackwardRule", "apply", - "_spec", 2, "_redArg", "_lambda", 0, "_boxed"]) - - -class TestDemangleRaw(unittest.TestCase): - """Test demangle_lean_name_raw (exact demangling, no postprocessing).""" - - def test_l_prefix(self): - self.assertEqual( - demangle_lean_name_raw("l_Lean_Meta_Sym_main"), - "Lean.Meta.Sym.main") - - def test_l_prefix_private(self): - result = demangle_lean_name_raw( - "l___private_Lean_Meta_Basic_0__Lean_Meta_withMVarContextImp") - self.assertEqual(result, - "_private.Lean.Meta.Basic.0.Lean.Meta.withMVarContextImp") - - def test_l_prefix_boxed(self): - result = demangle_lean_name_raw("l_foo___boxed") - self.assertEqual(result, "foo._boxed") - - def test_l_prefix_redArg(self): - result = demangle_lean_name_raw( - "l___private_Lean_Meta_Basic_0__Lean_Meta_withMVarContextImp___redArg") - self.assertEqual( - result, - "_private.Lean.Meta.Basic.0.Lean.Meta.withMVarContextImp._redArg") - - def test_lean_main(self): - self.assertEqual(demangle_lean_name_raw("_lean_main"), "[lean] main") - - def test_non_lean_names(self): - self.assertEqual(demangle_lean_name_raw("printf"), "printf") - self.assertEqual(demangle_lean_name_raw("malloc"), "malloc") - self.assertEqual(demangle_lean_name_raw("lean_apply_5"), "lean_apply_5") - self.assertEqual(demangle_lean_name_raw(""), "") - - def test_init_prefix(self): - result = demangle_lean_name_raw("_init_l_Lean_Meta_foo") - self.assertEqual(result, "[init] Lean.Meta.foo") - - def test_lp_prefix_simple(self): - mangled = mangle_name(["Lean", "Meta", "foo"], prefix="lp_std_") - self.assertEqual(mangled, "lp_std_Lean_Meta_foo") - result = demangle_lean_name_raw(mangled) - self.assertEqual(result, "Lean.Meta.foo (std)") - - def test_lp_prefix_underscore_pkg(self): - pkg_mangled = mangle_string("my_pkg") - self.assertEqual(pkg_mangled, "my__pkg") - mangled = mangle_name(["Lean", "Meta", "foo"], - prefix=f"lp_{pkg_mangled}_") - self.assertEqual(mangled, "lp_my__pkg_Lean_Meta_foo") - result = demangle_lean_name_raw(mangled) - self.assertEqual(result, "Lean.Meta.foo (my_pkg)") - - def test_lp_prefix_private_decl(self): - mangled = mangle_name( - ["_private", "X", 0, "Y", "foo"], prefix="lp_pkg_") - self.assertEqual(mangled, "lp_pkg___private_X_0__Y_foo") - result = demangle_lean_name_raw(mangled) - self.assertEqual(result, "_private.X.0.Y.foo (pkg)") - - def test_complex_specialization(self): - components = [ - "Lean", "MVarId", "withContext", "_at_", - "_private", "Lean", "Meta", "Sym", 0, - "Lean", "Meta", "Sym", "BackwardRule", "apply", - "_spec", 2, "_redArg", "_lambda", 0, "_boxed" - ] - mangled = mangle_name(components) - result = demangle_lean_name_raw(mangled) - expected = format_name(components) - self.assertEqual(result, expected) - - def test_cold_suffix(self): - result = demangle_lean_name_raw("l_Lean_Meta_foo___redArg.cold.1") - self.assertEqual(result, "Lean.Meta.foo._redArg .cold.1") - - def test_cold_suffix_plain(self): - result = demangle_lean_name_raw("l_Lean_Meta_foo.cold") - self.assertEqual(result, "Lean.Meta.foo .cold") - - def test_initialize_no_pkg(self): - result = demangle_lean_name_raw("initialize_Init_Control_Basic") - self.assertEqual(result, "[module_init] Init.Control.Basic") - - def test_initialize_with_l_prefix(self): - result = demangle_lean_name_raw("initialize_l_Lean_Meta_foo") - self.assertEqual(result, "[module_init] Lean.Meta.foo") - - def test_never_crashes(self): - """Demangling should never raise, just return the original.""" - weird_inputs = [ - "", "l_", "lp_", "lp_x", "_init_", "initialize_", - "l_____", "lp____", "l_00", "l_0", - "some random string", "l_ space", - ] - for inp in weird_inputs: - result = demangle_lean_name_raw(inp) - self.assertIsInstance(result, str) - - -class TestPostprocess(unittest.TestCase): - """Test postprocess_name (human-friendly suffix folding, etc.).""" - - def test_no_change(self): - self.assertEqual(postprocess_name(["Lean", "Meta", "main"]), - "Lean.Meta.main") - - def test_boxed(self): - self.assertEqual(postprocess_name(["foo", "_boxed"]), - "foo [boxed]") - - def test_redArg(self): - self.assertEqual(postprocess_name(["foo", "bar", "_redArg"]), - "foo.bar [arity\u2193]") - - def test_lambda_separate(self): - # _lam as separate component + numeric index - self.assertEqual(postprocess_name(["foo", "_lam", 0]), - "foo [\u03bb]") - - def test_lambda_indexed(self): - # _lam_0 as single string (appendIndexAfter) - self.assertEqual(postprocess_name(["foo", "_lam_0"]), - "foo [\u03bb]") - self.assertEqual(postprocess_name(["foo", "_lambda_2"]), - "foo [\u03bb]") - - def test_lambda_boxed(self): - # _lam_0 followed by _boxed - self.assertEqual( - postprocess_name(["Lean", "Meta", "Simp", "simpLambda", - "_lam_0", "_boxed"]), - "Lean.Meta.Simp.simpLambda [boxed, \u03bb]") - - def test_closed(self): - self.assertEqual(postprocess_name(["myConst", "_closed", 3]), - "myConst [closed]") - - def test_closed_indexed(self): - self.assertEqual(postprocess_name(["myConst", "_closed_0"]), - "myConst [closed]") - - def test_multiple_suffixes(self): - self.assertEqual(postprocess_name(["foo", "_redArg", "_boxed"]), - "foo [boxed, arity\u2193]") - - def test_redArg_lam(self): - # _redArg followed by _lam_0 (issue #4) - self.assertEqual( - postprocess_name(["Lean", "profileitIOUnsafe", - "_redArg", "_lam_0"]), - "Lean.profileitIOUnsafe [\u03bb, arity\u2193]") - - def test_private_name(self): - self.assertEqual( - postprocess_name(["_private", "Lean", "Meta", "Basic", 0, - "Lean", "Meta", "withMVarContextImp"]), - "Lean.Meta.withMVarContextImp [private]") - - def test_private_with_suffix(self): - self.assertEqual( - postprocess_name(["_private", "Lean", "Meta", "Basic", 0, - "Lean", "Meta", "foo", "_redArg"]), - "Lean.Meta.foo [arity\u2193, private]") - - def test_hygienic_strip(self): - self.assertEqual( - postprocess_name(["Lean", "Meta", "foo", "_@", "Lean", "Meta", - "_hyg", 42]), - "Lean.Meta.foo") - - def test_specialization(self): - self.assertEqual( - postprocess_name(["List", "map", "_at_", "Foo", "bar", - "_spec", 3]), - "List.map spec at Foo.bar") - - def test_specialization_with_suffix(self): - # Base suffix _boxed appears in [flags] before spec at - self.assertEqual( - postprocess_name(["Lean", "MVarId", "withContext", "_at_", - "Foo", "bar", "_spec", 2, "_boxed"]), - "Lean.MVarId.withContext [boxed] spec at Foo.bar") - - def test_spec_context_with_flags(self): - # Compiler suffixes in spec context become context flags - self.assertEqual( - postprocess_name(["Lean", "Meta", "foo", "_at_", - "Lean", "Meta", "bar", "_elam_1", "_redArg", - "_spec", 2]), - "Lean.Meta.foo spec at Lean.Meta.bar[\u03bb, arity\u2193]") - - def test_spec_context_flags_dedup(self): - # Duplicate flag labels are deduplicated - self.assertEqual( - postprocess_name(["f", "_at_", - "g", "_lam_0", "_elam_1", "_redArg", - "_spec", 1]), - "f spec at g[\u03bb, arity\u2193]") - - def test_multiple_at(self): - # Multiple _at_ entries become separate spec at clauses - self.assertEqual( - postprocess_name(["f", "_at_", "g", "_spec", 1, - "_at_", "h", "_spec", 2]), - "f spec at g spec at h") - - def test_multiple_at_with_flags(self): - # Multiple spec at with flags on base and contexts - self.assertEqual( - postprocess_name(["f", "_at_", "g", "_redArg", "_spec", 1, - "_at_", "h", "_lam_0", "_spec", 2, - "_boxed"]), - "f [boxed] spec at g[arity\u2193] spec at h[\u03bb]") - - def test_base_flags_before_spec(self): - # Base trailing suffixes appear in [flags] before spec at - self.assertEqual( - postprocess_name(["f", "_at_", "g", "_spec", 1, "_lam_0"]), - "f [\u03bb] spec at g") - - def test_spec_context_strip_spec_suffixes(self): - # spec_0 in context should be stripped - self.assertEqual( - postprocess_name(["Lean", "Meta", "transformWithCache", "visit", - "_at_", - "_private", "Lean", "Meta", "Transform", 0, - "Lean", "Meta", "transform", - "Lean", "Meta", "Sym", "unfoldReducible", - "spec_0", "spec_0", - "_spec", 1]), - "Lean.Meta.transformWithCache.visit " - "spec at Lean.Meta.transform.Lean.Meta.Sym.unfoldReducible") - - def test_spec_context_strip_private(self): - # _private in spec context should be stripped - self.assertEqual( - postprocess_name(["Array", "mapMUnsafe", "map", "_at_", - "_private", "Lean", "Meta", "Transform", 0, - "Lean", "Meta", "transformWithCache", "visit", - "_spec", 1]), - "Array.mapMUnsafe.map " - "spec at Lean.Meta.transformWithCache.visit") - - def test_empty(self): - self.assertEqual(postprocess_name([]), "") - - -class TestDemangleHumanFriendly(unittest.TestCase): - """Test demangle_lean_name (human-friendly output).""" - - def test_simple(self): - self.assertEqual(demangle_lean_name("l_Lean_Meta_main"), - "Lean.Meta.main") - - def test_boxed(self): - self.assertEqual(demangle_lean_name("l_foo___boxed"), - "foo [boxed]") - - def test_redArg(self): - self.assertEqual(demangle_lean_name("l_foo___redArg"), - "foo [arity\u2193]") - - def test_private(self): - self.assertEqual( - demangle_lean_name( - "l___private_Lean_Meta_Basic_0__Lean_Meta_foo"), - "Lean.Meta.foo [private]") - - def test_private_with_redArg(self): - self.assertEqual( - demangle_lean_name( - "l___private_Lean_Meta_Basic_0__Lean_Meta_foo___redArg"), - "Lean.Meta.foo [arity\u2193, private]") - - def test_cold_with_suffix(self): - self.assertEqual( - demangle_lean_name("l_Lean_Meta_foo___redArg.cold.1"), - "Lean.Meta.foo [arity\u2193] .cold.1") - - def test_lean_apply(self): - self.assertEqual(demangle_lean_name("lean_apply_5"), "") - self.assertEqual(demangle_lean_name("lean_apply_12"), "") - - def test_lean_apply_raw_unchanged(self): - self.assertEqual(demangle_lean_name_raw("lean_apply_5"), - "lean_apply_5") - - def test_init_private(self): - self.assertEqual( - demangle_lean_name( - "_init_l___private_X_0__Y_foo"), - "[init] Y.foo [private]") - - def test_complex_specialization(self): - components = [ - "Lean", "MVarId", "withContext", "_at_", - "_private", "Lean", "Meta", "Sym", 0, - "Lean", "Meta", "Sym", "BackwardRule", "apply", - "_spec", 2, "_redArg", "_lambda", 0, "_boxed" - ] - mangled = mangle_name(components) - result = demangle_lean_name(mangled) - # Base: Lean.MVarId.withContext with trailing _redArg, _lambda 0, _boxed - # Spec context: Lean.Meta.Sym.BackwardRule.apply (private stripped) - self.assertEqual( - result, - "Lean.MVarId.withContext [boxed, \u03bb, arity\u2193] " - "spec at Lean.Meta.Sym.BackwardRule.apply") - - def test_non_lean_unchanged(self): - self.assertEqual(demangle_lean_name("printf"), "printf") - self.assertEqual(demangle_lean_name("malloc"), "malloc") - self.assertEqual(demangle_lean_name(""), "") - - -class TestDemangleProfile(unittest.TestCase): - """Test the profile rewriter.""" - - def _make_profile_shared(self, strings): - """Create a profile with shared.stringArray (newer format).""" - return { - "meta": {"version": 28}, - "libs": [], - "shared": { - "stringArray": list(strings), - }, - "threads": [{ - "name": "main", - "pid": "1", - "tid": 1, - "funcTable": { - "name": list(range(len(strings))), - "isJS": [False] * len(strings), - "relevantForJS": [False] * len(strings), - "resource": [-1] * len(strings), - "fileName": [None] * len(strings), - "lineNumber": [None] * len(strings), - "columnNumber": [None] * len(strings), - "length": len(strings), - }, - "frameTable": {"length": 0}, - "stackTable": {"length": 0}, - "samples": {"length": 0}, - "markers": {"length": 0}, - "resourceTable": {"length": 0}, - "nativeSymbols": {"length": 0}, - }], - "pages": [], - "counters": [], - } - - def _make_profile_per_thread(self, strings): - """Create a profile with per-thread stringArray (samply format).""" - return { - "meta": {"version": 28}, - "libs": [], - "threads": [{ - "name": "main", - "pid": "1", - "tid": 1, - "stringArray": list(strings), - "funcTable": { - "name": list(range(len(strings))), - "isJS": [False] * len(strings), - "relevantForJS": [False] * len(strings), - "resource": [-1] * len(strings), - "fileName": [None] * len(strings), - "lineNumber": [None] * len(strings), - "columnNumber": [None] * len(strings), - "length": len(strings), - }, - "frameTable": {"length": 0}, - "stackTable": {"length": 0}, - "samples": {"length": 0}, - "markers": {"length": 0}, - "resourceTable": {"length": 0}, - "nativeSymbols": {"length": 0}, - }], - "pages": [], - "counters": [], - } - - def test_profile_rewrite_shared(self): - from lean_demangle_profile import rewrite_profile - strings = [ - "l_Lean_Meta_Sym_main", - "printf", - "lean_apply_5", - "l___private_Lean_Meta_Basic_0__Lean_Meta_foo", - ] - profile = self._make_profile_shared(strings) - rewrite_profile(profile) - sa = profile["shared"]["stringArray"] - self.assertEqual(sa[0], "Lean.Meta.Sym.main") - self.assertEqual(sa[1], "printf") - self.assertEqual(sa[2], "") - self.assertEqual(sa[3], "Lean.Meta.foo [private]") - - def test_profile_rewrite_per_thread(self): - from lean_demangle_profile import rewrite_profile - strings = [ - "l_Lean_Meta_Sym_main", - "printf", - "lean_apply_5", - "l___private_Lean_Meta_Basic_0__Lean_Meta_foo", - ] - profile = self._make_profile_per_thread(strings) - count = rewrite_profile(profile) - sa = profile["threads"][0]["stringArray"] - self.assertEqual(sa[0], "Lean.Meta.Sym.main") - self.assertEqual(sa[1], "printf") - self.assertEqual(sa[2], "") - self.assertEqual(sa[3], "Lean.Meta.foo [private]") - self.assertEqual(count, 3) - - def test_profile_json_roundtrip(self): - from lean_demangle_profile import process_profile_file - strings = ["l_Lean_Meta_main", "malloc"] - profile = self._make_profile_shared(strings) - - with tempfile.NamedTemporaryFile(mode='w', suffix='.json', - delete=False) as f: - json.dump(profile, f) - inpath = f.name - - outpath = inpath.replace('.json', '-demangled.json') - try: - process_profile_file(inpath, outpath) - with open(outpath) as f: - result = json.load(f) - self.assertEqual(result["shared"]["stringArray"][0], - "Lean.Meta.main") - self.assertEqual(result["shared"]["stringArray"][1], "malloc") - finally: - os.unlink(inpath) - if os.path.exists(outpath): - os.unlink(outpath) - - def test_profile_gzip_roundtrip(self): - from lean_demangle_profile import process_profile_file - strings = ["l_Lean_Meta_main", "malloc"] - profile = self._make_profile_shared(strings) - - with tempfile.NamedTemporaryFile(suffix='.json.gz', - delete=False) as f: - with gzip.open(f, 'wt') as gz: - json.dump(profile, gz) - inpath = f.name - - outpath = inpath.replace('.json.gz', '-demangled.json.gz') - try: - process_profile_file(inpath, outpath) - with gzip.open(outpath, 'rt') as f: - result = json.load(f) - self.assertEqual(result["shared"]["stringArray"][0], - "Lean.Meta.main") - finally: - os.unlink(inpath) - if os.path.exists(outpath): - os.unlink(outpath) - - -if __name__ == '__main__': - unittest.main() diff --git a/src/Init/Data/Char/Basic.lean b/src/Init/Data/Char/Basic.lean index f8aa9801aff0..2d52b6788e19 100644 --- a/src/Init/Data/Char/Basic.lean +++ b/src/Init/Data/Char/Basic.lean @@ -129,6 +129,14 @@ The ASCII digits are the following: `0123456789`. @[inline] def isDigit (c : Char) : Bool := c.val ≥ '0'.val && c.val ≤ '9'.val +/-- +Returns `true` if the character is an ASCII hexadecimal digit. + +The ASCII hexadecimal digits are the following: `0123456789abcdefABCDEF`. +-/ +@[inline] def isHexDigit (c : Char) : Bool := + c.isDigit || (c.val ≥ 'a'.val && c.val ≤ 'f'.val) || (c.val ≥ 'A'.val && c.val ≤ 'F'.val) + /-- Returns `true` if the character is an ASCII letter or digit. diff --git a/src/Lean/Compiler.lean b/src/Lean/Compiler.lean index 570c698042a4..216c41629174 100644 --- a/src/Lean/Compiler.lean +++ b/src/Lean/Compiler.lean @@ -18,4 +18,5 @@ public import Lean.Compiler.FFI public import Lean.Compiler.MetaAttr public import Lean.Compiler.NoncomputableAttr public import Lean.Compiler.Main +public import Lean.Compiler.NameDemangling public import Lean.Compiler.Old -- TODO: delete after we port code generator to Lean diff --git a/src/Lean/Compiler/NameDemangling.lean b/src/Lean/Compiler/NameDemangling.lean new file mode 100644 index 000000000000..235a6006d46d --- /dev/null +++ b/src/Lean/Compiler/NameDemangling.lean @@ -0,0 +1,343 @@ +/- +Copyright (c) 2026 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Kim Morrison +-/ +module + +prelude +import Init.While +import Init.Data.String.TakeDrop +import Init.Data.String.Search +import Init.Data.String.Iterate +import Lean.Data.NameTrie +public import Lean.Compiler.NameMangling + +/-! Human-friendly demangling of Lean compiler symbol names, extending +`Name.demangle` with prefix handling, compiler suffix folding, and backtrace +line parsing. Called from the C runtime via `@[export]` for backtrace display. -/ + +namespace Lean.Name.Demangle + +/-- `String.dropPrefix?` returning a `String` instead of a `Slice`. -/ +private def dropPrefix? (s : String) (pre : String) : Option String := + (s.dropPrefix? pre).map (·.toString) + +private def isAllDigits (s : String) : Bool := + !s.isEmpty && s.all (·.isDigit) + +private def nameToNameParts (n : Name) : Array NamePart := + go n [] |>.toArray +where + go : Name → List NamePart → List NamePart + | .anonymous, acc => acc + | .str pre s, acc => go pre (NamePart.str s :: acc) + | .num pre n, acc => go pre (NamePart.num n :: acc) + +private def namePartsToName (parts : Array NamePart) : Name := + parts.foldl (fun acc p => + match p with + | .str s => acc.mkStr s + | .num n => acc.mkNum n) .anonymous + +/-- Format name parts using `Name.toString` for correct escaping. -/ +private def formatNameParts (comps : Array NamePart) : String := + if comps.isEmpty then "" else (namePartsToName comps).toString + +private def matchSuffix (c : NamePart) : Option String := + match c with + | NamePart.str s => + if s == "_redArg" then some "arity↓" + else if s == "_boxed" then some "boxed" + else if s == "_impl" then some "impl" + else if s == "_lam" || s == "_lambda" || s == "_elam" then some "λ" + else if s == "_jp" then some "jp" + else if s == "_closed" then some "closed" + else if (dropPrefix? s "_lam_").any isAllDigits then some "λ" + else if (dropPrefix? s "_elam_").any isAllDigits then some "λ" + else none + | _ => none + +private def isSpecIndex (c : NamePart) : Bool := + match c with + | NamePart.str s => (dropPrefix? s "spec_").any isAllDigits + | _ => false + +private def stripPrivate (comps : Array NamePart) (start stop : Nat) : + Nat × Bool := + if stop - start >= 3 && comps[start]? == some (NamePart.str "_private") then + Id.run do + for i in [start + 1 : stop] do + if comps[i]? == some (NamePart.num 0) then + if i + 1 < stop then return (i + 1, true) + else return (start, false) + return (start, false) + else + (start, false) + +private structure SpecEntry where + name : String + flags : Array String + +private def processSpecContext (comps : Array NamePart) : SpecEntry := Id.run do + let mut begin_ := 0 + if comps.size >= 3 && comps[0]? == some (NamePart.str "_private") then + for i in [1:comps.size] do + if comps[i]? == some (NamePart.num 0) && i + 1 < comps.size then + begin_ := i + 1 + break + let mut parts : Array NamePart := #[] + let mut flags : Array String := #[] + for i in [begin_:comps.size] do + let c := comps[i]! + match matchSuffix c with + | some flag => + if !flags.contains flag then + flags := flags.push flag + | none => + if isSpecIndex c then pure () + else parts := parts.push c + { name := formatNameParts parts, flags } + +private def postprocessNameParts (components : Array NamePart) : String := Id.run do + if components.isEmpty then return "" + + let (privStart, isPrivate) := stripPrivate components 0 components.size + let mut parts := components.extract privStart components.size + + -- Collect suffix flags from the end first, before stripping hygienic suffixes, + -- since _boxed etc. may appear after _@ sections. + let mut flags : Array String := #[] + let mut cont := true + while cont && !parts.isEmpty do + let last := parts.back! + match matchSuffix last with + | some flag => + flags := flags.push flag + parts := parts.pop + | none => + match last with + | NamePart.num _ => + if parts.size >= 2 then + match matchSuffix parts[parts.size - 2]! with + | some flag => + flags := flags.push flag + parts := parts.pop.pop + | none => cont := false + else cont := false + | _ => cont := false + + if isPrivate then + flags := flags.push "private" + + -- Strip hygienic suffixes (_@ onward) + for i in [:parts.size] do + match parts[i]! with + | NamePart.str s => + if s.startsWith "_@" then + parts := parts.extract 0 i + break + | _ => pure () + + -- Handle specialization contexts (_at_ ... _spec N) + let mut specEntries : Array SpecEntry := #[] + let mut firstAt : Option Nat := none + for i in [:parts.size] do + if parts[i]! == NamePart.str "_at_" then + firstAt := some i + break + + if let some fa := firstAt then + let base := parts.extract 0 fa + let rest := parts.extract fa parts.size + + let mut entries : Array (Array NamePart) := #[] + let mut currentCtx : Option (Array NamePart) := none + let mut remaining : Array NamePart := #[] + let mut skipNext := false + + for i in [:rest.size] do + if skipNext then + skipNext := false + continue + let p := rest[i]! + if p == NamePart.str "_at_" then + if let some ctx := currentCtx then + entries := entries.push ctx + currentCtx := some #[] + else if p == NamePart.str "_spec" then + if let some ctx := currentCtx then + entries := entries.push ctx + currentCtx := none + skipNext := true + else if match p with | NamePart.str s => s.startsWith "_spec" | _ => false then + if let some ctx := currentCtx then + entries := entries.push ctx + currentCtx := none + else + match currentCtx with + | some ctx => currentCtx := some (ctx.push p) + | none => remaining := remaining.push p + + if let some ctx := currentCtx then + if !ctx.isEmpty then + entries := entries.push ctx + + for entry in entries do + let se := processSpecContext entry + if !se.name.isEmpty || !se.flags.isEmpty then + specEntries := specEntries.push se + + parts := base ++ remaining + + let name := if parts.isEmpty then "?" else formatNameParts parts + let mut result := name + + if !flags.isEmpty then + result := result ++ " [" ++ String.intercalate ", " flags.toList ++ "]" + + for entry in specEntries do + let ctxStr := if entry.name.isEmpty then "?" else entry.name + if !entry.flags.isEmpty then + result := result ++ " spec at " ++ ctxStr ++ "[" + ++ String.intercalate ", " entry.flags.toList ++ "]" + else + result := result ++ " spec at " ++ ctxStr + + return result + +private def demangleBody (body : String) : String := + let name := Name.demangle body + postprocessNameParts (nameToNameParts name) + +/-- Split a `lp_`-prefixed symbol into (demangled body, package name). +Tries each underscore as a split point; the first valid split (shortest single-component +package where the remainder is a valid mangled name) is correct. -/ +private def demangleWithPkg (s : String) : Option (String × String) := do + for ⟨pos, h⟩ in s.positions do + if pos.get h == '_' && pos ≠ s.startPos then + let nextPos := pos.next h + if nextPos == s.endPos then continue + let pkg := s.extract s.startPos pos + let body := s.extract nextPos s.endPos + let validPkg := match Name.demangle? pkg with + | some (.str .anonymous _) => true + | _ => false + if validPkg && (Name.demangle? body).isSome then + let pkgName := match Name.demangle pkg with + | .str .anonymous s => s + | _ => pkg + return (demangleBody body, pkgName) + none + +private def stripColdSuffix (s : String) : String × String := + match s.find? ".cold" with + | some pos => (s.extract s.startPos pos, s.extract pos s.endPos) + | none => (s, "") + +private def demangleCore (s : String) : Option String := do + -- _init_l_ + if let some body := dropPrefix? s "_init_l_" then + if !body.isEmpty then return s!"[init] {demangleBody body}" + + -- _init_lp_ + if let some after := dropPrefix? s "_init_lp_" then + if let some (name, pkg) := demangleWithPkg after then + return s!"[init] {name} ({pkg})" + + -- initialize_l_ + if let some body := dropPrefix? s "initialize_l_" then + if !body.isEmpty then return s!"[module_init] {demangleBody body}" + + -- initialize_lp_ + if let some after := dropPrefix? s "initialize_lp_" then + if let some (name, pkg) := demangleWithPkg after then + return s!"[module_init] {name} ({pkg})" + + -- initialize_ (bare module init) + if let some body := dropPrefix? s "initialize_" then + if !body.isEmpty then return s!"[module_init] {demangleBody body}" + + -- l_ + if let some body := dropPrefix? s "l_" then + if !body.isEmpty then return demangleBody body + + -- lp_ + if let some after := dropPrefix? s "lp_" then + if let some (name, pkg) := demangleWithPkg after then + return s!"{name} ({pkg})" + + none + +public def demangleSymbol (symbol : String) : Option String := do + if symbol.isEmpty then none + + -- Strip .cold suffix first + let (core, coldSuffix) := stripColdSuffix symbol + + -- Handle lean_apply_N + if let some rest := dropPrefix? core "lean_apply_" then + if isAllDigits rest then + let r := s!"" + if coldSuffix.isEmpty then return r else return s!"{r} {coldSuffix}" + + -- Handle _lean_main + if core == "_lean_main" then + if coldSuffix.isEmpty then return "[lean] main" + else return s!"[lean] main {coldSuffix}" + + -- Try prefix-based demangling + let result ← demangleCore core + if coldSuffix.isEmpty then return result + else return s!"{result} {coldSuffix}" + +private def skipWhile (s : String) (pos : s.Pos) (pred : Char → Bool) : s.Pos := + if h : pos = s.endPos then pos + else if pred (pos.get h) then skipWhile s (pos.next h) pred + else pos +termination_by pos + +private def splitAt₂ (s : String) (p₁ p₂ : s.Pos) : String × String × String := + (s.extract s.startPos p₁, s.extract p₁ p₂, s.extract p₂ s.endPos) + +/-- Extract the symbol from a backtrace line (Linux glibc or macOS format). -/ +private def extractSymbol (line : String) : + Option (String × String × String) := + tryLinux line |>.orElse (fun _ => tryMacOS line) +where + -- Linux glibc: ./lean(SYMBOL+0x2a) [0x555...] + tryLinux (line : String) : Option (String × String × String) := do + let parenPos ← line.find? '(' + if h : parenPos = line.endPos then none else + let symStart := parenPos.next h + let delimPos ← symStart.find? (fun c => c == '+' || c == ')') + if delimPos == symStart then none else + some (splitAt₂ line symStart delimPos) + -- macOS: N lib 0xADDR SYMBOL + offset + tryMacOS (line : String) : Option (String × String × String) := do + let zxPos ← line.find? "0x" + if h : zxPos = line.endPos then none else + let afterZero := zxPos.next h + if h2 : afterZero = line.endPos then none else + let afterX := afterZero.next h2 + let afterHex := skipWhile line afterX (·.isHexDigit) + let symStart := skipWhile line afterHex (· == ' ') + if symStart == line.endPos then none else + let symEnd := (symStart.find? " + ").getD line.endPos + if symEnd == symStart then none else + some (splitAt₂ line symStart symEnd) + +public def demangleBtLine (line : String) : Option String := do + let (pfx, sym, sfx) ← extractSymbol line + let demangled ← demangleSymbol sym + return pfx ++ demangled ++ sfx + +@[export lean_demangle_bt_line_cstr] +def demangleBtLineCStr (line : @& String) : String := + (demangleBtLine line).getD "" + +@[export lean_demangle_symbol_cstr] +def demangleSymbolCStr (symbol : @& String) : String := + (demangleSymbol symbol).getD "" + +end Lean.Name.Demangle diff --git a/src/Lean/Data/NameTrie.lean b/src/Lean/Data/NameTrie.lean index f58f7d150a30..11823d0c40f8 100644 --- a/src/Lean/Data/NameTrie.lean +++ b/src/Lean/Data/NameTrie.lean @@ -16,6 +16,7 @@ namespace Lean inductive NamePart | str (s : String) | num (n : Nat) + deriving BEq, Inhabited instance : ToString NamePart where toString diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 7bce946380ab..b49ca7c0e458 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -21,7 +21,6 @@ set( sharecommon.cpp stack_overflow.cpp process.cpp - demangle.cpp object_ref.cpp mpn.cpp mutex.cpp diff --git a/src/runtime/demangle.cpp b/src/runtime/demangle.cpp deleted file mode 100644 index 4849f219565a..000000000000 --- a/src/runtime/demangle.cpp +++ /dev/null @@ -1,792 +0,0 @@ -/* -Copyright (c) 2025 Microsoft Corporation. All rights reserved. -Released under Apache 2.0 license as described in the file LICENSE. - -Author: Leonardo de Moura - -C++ port of the Lean name demangling algorithm (Name.demangleAux from -NameMangling.lean) and human-friendly postprocessing. Used to make -runtime backtraces readable. -*/ -#include -#include -#include -#include -#include "runtime/demangle.h" - -namespace { - -// --------------------------------------------------------------------------- -// Name component: either a string or a number -// --------------------------------------------------------------------------- - -struct Component { - bool is_num; - std::string str; - unsigned num; - Component() : is_num(false), num(0) {} - static Component mk_str(std::string s) { Component c; c.str = std::move(s); return c; } - static Component mk_str(char ch) { Component c; c.str = std::string(1, ch); return c; } - static Component mk_num(unsigned n) { Component c; c.is_num = true; c.num = n; return c; } -}; - -using Components = std::vector; - -// --------------------------------------------------------------------------- -// Hex parsing and UTF-8 encoding -// --------------------------------------------------------------------------- - -int parse_hex(const char * s, int pos, int len, int n_digits, unsigned & out_val) { - if (pos + n_digits > len) return 0; - unsigned val = 0; - for (int i = 0; i < n_digits; i++) { - char c = s[pos + i]; - if (c >= '0' && c <= '9') - val = (val << 4) | (unsigned)(c - '0'); - else if (c >= 'a' && c <= 'f') - val = (val << 4) | (unsigned)(c - 'a' + 10); - else - return 0; - } - out_val = val; - return n_digits; -} - -void append_utf8(std::string & out, unsigned cp) { - if (cp < 0x80) { - out += (char)cp; - } else if (cp < 0x800) { - out += (char)(0xC0 | (cp >> 6)); - out += (char)(0x80 | (cp & 0x3F)); - } else if (cp < 0x10000) { - out += (char)(0xE0 | (cp >> 12)); - out += (char)(0x80 | ((cp >> 6) & 0x3F)); - out += (char)(0x80 | (cp & 0x3F)); - } else if (cp < 0x110000) { - out += (char)(0xF0 | (cp >> 18)); - out += (char)(0x80 | ((cp >> 12) & 0x3F)); - out += (char)(0x80 | ((cp >> 6) & 0x3F)); - out += (char)(0x80 | (cp & 0x3F)); - } -} - -// --------------------------------------------------------------------------- -// Core demangling: produces a component list -// Port of Name.demangleAux from NameMangling.lean -// --------------------------------------------------------------------------- - -void demangle_main(const char * s, int pos, int len, - std::string & acc, int ucount, Components & out); -void name_start(const char * s, int pos, int len, Components & out); - -void decode_num(const char * s, int pos, int len, - unsigned n, Components & out) { - while (pos < len) { - char ch = s[pos]; - if (ch >= '0' && ch <= '9') { - n = n * 10 + (unsigned)(ch - '0'); - pos++; - } else { - pos++; // skip trailing '_' - out.push_back(Component::mk_num(n)); - if (pos >= len) return; - pos++; // skip separator '_' - name_start(s, pos, len, out); - return; - } - } - out.push_back(Component::mk_num(n)); -} - -void name_start(const char * s, int pos, int len, Components & out) { - if (pos >= len) return; - char ch = s[pos]; - pos++; - if (ch >= '0' && ch <= '9') { - if (ch == '0' && pos < len && s[pos] == '0') { - pos++; - std::string acc; - demangle_main(s, pos, len, acc, 0, out); - } else { - decode_num(s, pos, len, (unsigned)(ch - '0'), out); - } - } else if (ch == '_') { - std::string acc; - demangle_main(s, pos, len, acc, 1, out); - } else { - std::string acc(1, ch); - demangle_main(s, pos, len, acc, 0, out); - } -} - -void demangle_main(const char * s, int pos, int len, - std::string & acc, int ucount, Components & out) { - while (pos < len) { - char ch = s[pos]; - pos++; - - if (ch == '_') { ucount++; continue; } - - if (ucount % 2 == 0) { - for (int i = 0; i < ucount / 2; i++) acc += '_'; - acc += ch; - ucount = 0; - continue; - } - - // Odd ucount: separator or escape - if (ch >= '0' && ch <= '9') { - for (int i = 0; i < ucount / 2; i++) acc += '_'; - out.push_back(Component::mk_str(std::move(acc))); - if (ch == '0' && pos < len && s[pos] == '0') { - pos++; - acc.clear(); - demangle_main(s, pos, len, acc, 0, out); - return; - } else { - decode_num(s, pos, len, (unsigned)(ch - '0'), out); - return; - } - } - - unsigned hex_val; - int consumed; - if (ch == 'x') { - consumed = parse_hex(s, pos, len, 2, hex_val); - if (consumed > 0) { - for (int i = 0; i < ucount / 2; i++) acc += '_'; - append_utf8(acc, hex_val); - pos += consumed; ucount = 0; continue; - } - } - if (ch == 'u') { - consumed = parse_hex(s, pos, len, 4, hex_val); - if (consumed > 0) { - for (int i = 0; i < ucount / 2; i++) acc += '_'; - append_utf8(acc, hex_val); - pos += consumed; ucount = 0; continue; - } - } - if (ch == 'U') { - consumed = parse_hex(s, pos, len, 8, hex_val); - if (consumed > 0) { - for (int i = 0; i < ucount / 2; i++) acc += '_'; - append_utf8(acc, hex_val); - pos += consumed; ucount = 0; continue; - } - } - - // Name separator - out.push_back(Component::mk_str(std::move(acc))); - acc.clear(); - for (int i = 0; i < ucount / 2; i++) acc += '_'; - acc += ch; - ucount = 0; - } - - for (int i = 0; i < ucount / 2; i++) acc += '_'; - if (!acc.empty()) - out.push_back(Component::mk_str(std::move(acc))); -} - -bool demangle_body(const char * s, int len, Components & out) { - if (len == 0) return false; - name_start(s, 0, len, out); - return !out.empty(); -} - -// Convenience: demangle to flat dot-separated string (used for lp_ validation) -bool demangle_body_flat(const char * s, int len, std::string & out) { - Components comps; - if (!demangle_body(s, len, comps)) return false; - for (size_t i = 0; i < comps.size(); i++) { - if (i > 0) out += '.'; - if (comps[i].is_num) out += std::to_string(comps[i].num); - else out += comps[i].str; - } - return true; -} - -// --------------------------------------------------------------------------- -// Format components as dot-separated string -// --------------------------------------------------------------------------- - -std::string format_name(const Components & comps) { - std::string out; - for (size_t i = 0; i < comps.size(); i++) { - if (i > 0) out += '.'; - if (comps[i].is_num) out += std::to_string(comps[i].num); - else out += comps[i].str; - } - return out; -} - -// --------------------------------------------------------------------------- -// Human-friendly postprocessing -// Port of postprocess_name from lean_demangle.py -// --------------------------------------------------------------------------- - -// Suffix flag labels (UTF-8 encoded) -static const char * FLAG_ARITY_DOWN = "arity\xe2\x86\x93"; // arity↓ -static const char * FLAG_BOXED = "boxed"; -static const char * FLAG_IMPL = "impl"; -static const char * FLAG_LAMBDA = "\xce\xbb"; // λ -static const char * FLAG_JP = "jp"; -static const char * FLAG_CLOSED = "closed"; - -// Check if a string consists entirely of ASCII digits. -bool is_all_digits(const char * s) { - if (!*s) return false; - while (*s) { if (*s < '0' || *s > '9') return false; s++; } - return true; -} - -bool starts_with_str(const std::string & s, const char * prefix) { - size_t plen = strlen(prefix); - return s.size() >= plen && s.compare(0, plen, prefix) == 0; -} - -// Match a compiler-generated suffix component. Returns flag label or nullptr. -const char * match_suffix(const Component & c) { - if (c.is_num) return nullptr; - const std::string & s = c.str; - // Exact matches - if (s == "_redArg") return FLAG_ARITY_DOWN; - if (s == "_boxed") return FLAG_BOXED; - if (s == "_impl") return FLAG_IMPL; - // Exact or indexed prefix matches - if (s == "_lam" || s == "_lambda" || s == "_elam") return FLAG_LAMBDA; - if (s == "_jp") return FLAG_JP; - if (s == "_closed") return FLAG_CLOSED; - // Indexed: _lam_N, _lambda_N, _elam_N, _jp_N, _closed_N - struct { const char * prefix; size_t len; const char * flag; } indexed[] = { - {"_lam_", 5, FLAG_LAMBDA}, - {"_lambda_", 8, FLAG_LAMBDA}, - {"_elam_", 6, FLAG_LAMBDA}, - {"_jp_", 4, FLAG_JP}, - {"_closed_", 8, FLAG_CLOSED}, - }; - for (auto & e : indexed) { - if (s.size() > e.len && s.compare(0, e.len, e.prefix) == 0 && - is_all_digits(s.c_str() + e.len)) - return e.flag; - } - return nullptr; -} - -// Check if component is a spec_N index. -bool is_spec_index(const Component & c) { - if (c.is_num) return false; - return starts_with_str(c.str, "spec_") && c.str.size() > 5 && - is_all_digits(c.str.c_str() + 5); -} - -// Strip _private.Module.0. prefix. Returns (begin index past the strip, is_private). -struct StripResult { size_t begin; bool is_private; }; - -StripResult strip_private(const Components & parts, size_t begin, size_t end) { - if (end - begin >= 3 && !parts[begin].is_num && parts[begin].str == "_private") { - for (size_t i = begin + 1; i < end; i++) { - if (parts[i].is_num && parts[i].num == 0) { - if (i + 1 < end) - return {i + 1, true}; - break; - } - } - } - return {begin, false}; -} - -// Spec context entry: name components + context flags -struct SpecEntry { - std::string name; - std::vector flags; -}; - -// Process a spec context: strip private, collect flags, format name -SpecEntry process_spec_context(const Components & comps, size_t begin, size_t end) { - SpecEntry entry; - auto sr = strip_private(comps, begin, end); - - std::vector seen_flags; - std::string name; - bool first = true; - - for (size_t i = sr.begin; i < end; i++) { - const char * flag = match_suffix(comps[i]); - if (flag) { - // Deduplicate - bool dup = false; - for (auto f : entry.flags) { if (f == flag) { dup = true; break; } } - if (!dup) entry.flags.push_back(flag); - } else if (is_spec_index(comps[i])) { - // skip - } else { - if (!first) name += '.'; - if (comps[i].is_num) name += std::to_string(comps[i].num); - else name += comps[i].str; - first = false; - } - } - entry.name = std::move(name); - return entry; -} - -std::string postprocess_name(const Components & components) { - if (components.empty()) return ""; - - size_t n = components.size(); - - // --- Strip _private prefix --- - auto sr = strip_private(components, 0, n); - size_t begin = sr.begin; - bool is_private = sr.is_private; - - // Copy relevant range into a working vector - Components parts(components.begin() + begin, components.begin() + n); - - // --- Strip hygienic suffixes: everything from _@ onward --- - { - size_t cut = parts.size(); - for (size_t i = 0; i < parts.size(); i++) { - if (!parts[i].is_num && starts_with_str(parts[i].str, "_@")) { - cut = i; - break; - } - } - parts.resize(cut); - } - - // --- Handle specialization: _at_ ... _spec N --- - std::vector spec_entries; - { - // Find first _at_ - int first_at = -1; - for (size_t i = 0; i < parts.size(); i++) { - if (!parts[i].is_num && parts[i].str == "_at_") { - first_at = (int)i; - break; - } - } - - if (first_at >= 0) { - Components base(parts.begin(), parts.begin() + first_at); - // Parse _at_..._spec entries - Components current_ctx; - bool in_ctx = false; - Components remaining; - bool skip_next = false; - - for (size_t i = first_at; i < parts.size(); i++) { - if (skip_next) { skip_next = false; continue; } - if (!parts[i].is_num && parts[i].str == "_at_") { - if (in_ctx) { - auto entry = process_spec_context(current_ctx, 0, current_ctx.size()); - if (!entry.name.empty() || !entry.flags.empty()) - spec_entries.push_back(std::move(entry)); - current_ctx.clear(); - } - in_ctx = true; - continue; - } - if (!parts[i].is_num && parts[i].str == "_spec") { - if (in_ctx) { - auto entry = process_spec_context(current_ctx, 0, current_ctx.size()); - if (!entry.name.empty() || !entry.flags.empty()) - spec_entries.push_back(std::move(entry)); - current_ctx.clear(); - in_ctx = false; - } - skip_next = true; - continue; - } - if (!parts[i].is_num && starts_with_str(parts[i].str, "_spec")) { - if (in_ctx) { - auto entry = process_spec_context(current_ctx, 0, current_ctx.size()); - if (!entry.name.empty() || !entry.flags.empty()) - spec_entries.push_back(std::move(entry)); - current_ctx.clear(); - in_ctx = false; - } - continue; - } - if (in_ctx) - current_ctx.push_back(parts[i]); - else - remaining.push_back(parts[i]); - } - if (in_ctx && !current_ctx.empty()) { - auto entry = process_spec_context(current_ctx, 0, current_ctx.size()); - if (!entry.name.empty() || !entry.flags.empty()) - spec_entries.push_back(std::move(entry)); - } - - parts = base; - parts.insert(parts.end(), remaining.begin(), remaining.end()); - } - } - - // --- Collect suffix flags from the end --- - std::vector flags; - while (!parts.empty()) { - const Component & last = parts.back(); - const char * flag = match_suffix(last); - if (flag) { - flags.push_back(flag); - parts.pop_back(); - } else if (last.is_num && parts.size() >= 2) { - const char * prev_flag = match_suffix(parts[parts.size() - 2]); - if (prev_flag) { - flags.push_back(prev_flag); - parts.pop_back(); // number - parts.pop_back(); // suffix - } else { - break; - } - } else { - break; - } - } - - if (is_private) flags.push_back("private"); - - // --- Format result --- - std::string result = parts.empty() ? "?" : format_name(parts); - - if (!flags.empty()) { - result += " ["; - for (size_t i = 0; i < flags.size(); i++) { - if (i > 0) result += ", "; - result += flags[i]; - } - result += ']'; - } - - for (auto & entry : spec_entries) { - std::string ctx_str = entry.name.empty() ? "?" : entry.name; - if (!entry.flags.empty()) { - result += " spec at " + ctx_str + "["; - for (size_t i = 0; i < entry.flags.size(); i++) { - if (i > 0) result += ", "; - result += entry.flags[i]; - } - result += ']'; - } else { - result += " spec at " + ctx_str; - } - } - - return result; -} - -// --------------------------------------------------------------------------- -// Prefix handling and lp_ splitting -// --------------------------------------------------------------------------- - -const char * starts_with(const char * s, const char * prefix) { - size_t plen = strlen(prefix); - if (strncmp(s, prefix, plen) == 0) return s + plen; - return nullptr; -} - -bool has_upper_start(const char * s, int len) { - if (len == 0) return false; - int pos = 0; - if (pos + 1 < len && s[pos] == '0' && s[pos + 1] == '0') pos += 2; - while (pos < len && s[pos] == '_') pos++; - if (pos >= len) return false; - return s[pos] >= 'A' && s[pos] <= 'Z'; -} - -bool is_valid_string_mangle(const char * s, int end) { - int pos = 0; - while (pos < end) { - char ch = s[pos]; - if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9')) { - pos++; - } else if (ch == '_') { - if (pos + 1 >= end) return false; - char nch = s[pos + 1]; - unsigned v; - if (nch == '_') { pos += 2; } - else if (nch == 'x' && parse_hex(s, pos + 2, end, 2, v)) { pos += 4; } - else if (nch == 'u' && parse_hex(s, pos + 2, end, 4, v)) { pos += 6; } - else if (nch == 'U' && parse_hex(s, pos + 2, end, 8, v)) { pos += 10; } - else return false; - } else { - return false; - } - } - return true; -} - -int find_lp_body(const char * s, int len) { - int best = -1; - bool best_has_upper = false; - - for (int i = 0; i < len; i++) { - if (s[i] != '_') continue; - if (i == 0) continue; - if (!is_valid_string_mangle(s, i)) continue; - int body_start = i + 1; - int body_len = len - body_start; - if (body_len <= 0) continue; - - Components test; - if (!demangle_body(s + body_start, body_len, test)) continue; - - bool upper = has_upper_start(s + body_start, body_len); - if (upper) { - if (!best_has_upper || i > best - 1) { - best = body_start; - best_has_upper = true; - } - } else if (!best_has_upper) { - if (best == -1) best = body_start; - } - } - return best; -} - -void unmangle_pkg(const char * s, int len, std::string & out) { - std::string tmp; - if (demangle_body_flat(s, len, tmp) && tmp.find('.') == std::string::npos) { - out += tmp; - } else { - out.append(s, len); - } -} - -// --------------------------------------------------------------------------- -// Helper to produce final malloc'd string -// --------------------------------------------------------------------------- - -char * to_malloc_str(const std::string & s) { - char * ret = (char *)malloc(s.size() + 1); - if (ret) memcpy(ret, s.c_str(), s.size() + 1); - return ret; -} - -// Demangle body and postprocess to human-friendly string. -bool demangle_and_postprocess(const char * body, int body_len, std::string & out) { - Components comps; - if (!demangle_body(body, body_len, comps)) return false; - out = postprocess_name(comps); - return true; -} - -} // anonymous namespace - -// --------------------------------------------------------------------------- -// Public API -// --------------------------------------------------------------------------- - -char * lean_demangle_symbol(const char * symbol) { - if (!symbol || !symbol[0]) return nullptr; - - int slen = (int)strlen(symbol); - - // Handle lean_apply_N -> - { - const char * rest = starts_with(symbol, "lean_apply_"); - if (rest && is_all_digits(rest)) { - std::string result = " 0 && demangle_and_postprocess(rest, body_len, out)) { - std::string result = "[init] " + out; - if (cold_suffix) { result += ' '; result.append(cold_suffix, cold_suffix_len); } - return to_malloc_str(result); - } - } - - // _init_lp_ prefix - if ((rest = starts_with(cs, "_init_lp_")) != nullptr) { - int after_len = core_len - (int)(rest - cs); - int body_idx = find_lp_body(rest, after_len); - if (body_idx >= 0) { - std::string pkg_out; - unmangle_pkg(rest, body_idx - 1, pkg_out); - if (demangle_and_postprocess(rest + body_idx, after_len - body_idx, out)) { - std::string result = "[init] " + out + " (" + pkg_out + ")"; - if (cold_suffix) { result += ' '; result.append(cold_suffix, cold_suffix_len); } - return to_malloc_str(result); - } - } - } - - // initialize_l_ prefix - if ((rest = starts_with(cs, "initialize_l_")) != nullptr) { - int body_len = core_len - (int)(rest - cs); - if (body_len > 0 && demangle_and_postprocess(rest, body_len, out)) { - std::string result = "[module_init] " + out; - if (cold_suffix) { result += ' '; result.append(cold_suffix, cold_suffix_len); } - return to_malloc_str(result); - } - } - - // initialize_lp_ prefix - if ((rest = starts_with(cs, "initialize_lp_")) != nullptr) { - int after_len = core_len - (int)(rest - cs); - int body_idx = find_lp_body(rest, after_len); - if (body_idx >= 0) { - std::string pkg_out; - unmangle_pkg(rest, body_idx - 1, pkg_out); - if (demangle_and_postprocess(rest + body_idx, after_len - body_idx, out)) { - std::string result = "[module_init] " + out + " (" + pkg_out + ")"; - if (cold_suffix) { result += ' '; result.append(cold_suffix, cold_suffix_len); } - return to_malloc_str(result); - } - } - } - - // initialize_ (bare module init) - if ((rest = starts_with(cs, "initialize_")) != nullptr) { - int body_len = core_len - (int)(rest - cs); - if (body_len > 0 && demangle_and_postprocess(rest, body_len, out)) { - std::string result = "[module_init] " + out; - if (cold_suffix) { result += ' '; result.append(cold_suffix, cold_suffix_len); } - return to_malloc_str(result); - } - } - - // l_ prefix - if ((rest = starts_with(cs, "l_")) != nullptr) { - int body_len = core_len - (int)(rest - cs); - if (body_len > 0 && demangle_and_postprocess(rest, body_len, out)) { - if (cold_suffix) { out += ' '; out.append(cold_suffix, cold_suffix_len); } - return to_malloc_str(out); - } - } - - // lp_ prefix - if ((rest = starts_with(cs, "lp_")) != nullptr) { - int after_len = core_len - (int)(rest - cs); - int body_idx = find_lp_body(rest, after_len); - if (body_idx >= 0) { - std::string pkg_out; - unmangle_pkg(rest, body_idx - 1, pkg_out); - if (demangle_and_postprocess(rest + body_idx, after_len - body_idx, out)) { - std::string result = out + " (" + pkg_out + ")"; - if (cold_suffix) { result += ' '; result.append(cold_suffix, cold_suffix_len); } - return to_malloc_str(result); - } - } - } - - return nullptr; -} - -// --------------------------------------------------------------------------- -// Backtrace line parsing -// --------------------------------------------------------------------------- - -static const char * extract_symbol(const char * line, int * sym_len) { - int len = (int)strlen(line); - - // Linux (glibc): ./lean(l_Lean_Meta_foo+0x2a) [0x555...] - for (int i = 0; i < len; i++) { - if (line[i] == '(') { - int start = i + 1; - for (int j = start; j < len; j++) { - if (line[j] == '+' || line[j] == ')') { - if (j > start) { *sym_len = j - start; return line + start; } - break; - } - } - break; - } - } - - // macOS: 2 lean 0x100abc123 l_Lean_Meta_foo + 42 - for (int i = 0; i + 1 < len; i++) { - if (line[i] == '0' && line[i + 1] == 'x') { - int j = i + 2; - while (j < len && ((line[j] >= '0' && line[j] <= '9') || - (line[j] >= 'a' && line[j] <= 'f') || - (line[j] >= 'A' && line[j] <= 'F'))) j++; - while (j < len && line[j] == ' ') j++; - if (j >= len) return nullptr; - int start = j; - while (j < len) { - if (j + 2 < len && line[j] == ' ' && line[j + 1] == '+' && line[j + 2] == ' ') - break; - j++; - } - if (j > start) { *sym_len = j - start; return line + start; } - return nullptr; - } - } - - return nullptr; -} - -char * lean_demangle_bt_line(const char * line) { - if (!line) return nullptr; - - int sym_len = 0; - const char * sym = extract_symbol(line, &sym_len); - if (!sym || sym_len == 0) return nullptr; - - // Make null-terminated copy - char * sym_copy = (char *)malloc(sym_len + 1); - if (!sym_copy) return nullptr; - memcpy(sym_copy, sym, sym_len); - sym_copy[sym_len] = '\0'; - - char * demangled = lean_demangle_symbol(sym_copy); - free(sym_copy); - if (!demangled) return nullptr; - - // Reconstruct line with demangled name - int line_len = (int)strlen(line); - int dem_len = (int)strlen(demangled); - int prefix_len = (int)(sym - line); - int suffix_start = prefix_len + sym_len; - int suffix_len = line_len - suffix_start; - - int new_len = prefix_len + dem_len + suffix_len; - char * result = (char *)malloc(new_len + 1); - if (!result) { free(demangled); return nullptr; } - - memcpy(result, line, prefix_len); - memcpy(result + prefix_len, demangled, dem_len); - memcpy(result + prefix_len + dem_len, line + suffix_start, suffix_len); - result[new_len] = '\0'; - - free(demangled); - return result; -} diff --git a/src/runtime/demangle.h b/src/runtime/demangle.h deleted file mode 100644 index 0af19df94c4f..000000000000 --- a/src/runtime/demangle.h +++ /dev/null @@ -1,26 +0,0 @@ -/* -Copyright (c) 2025 Microsoft Corporation. All rights reserved. -Released under Apache 2.0 license as described in the file LICENSE. - -Author: Leonardo de Moura -*/ -#pragma once - -/* - * Demangle Lean symbol names in backtrace lines. - * - * lean_demangle_symbol(symbol): - * Given a mangled C symbol name (e.g. "l_Lean_Meta_Grind_foo"), - * returns a malloc'd string with the demangled name (e.g. "Lean.Meta.Grind.foo"), - * or nullptr if the symbol is not a Lean name. - * - * lean_demangle_bt_line(line): - * Given a backtrace line from backtrace_symbols(), extracts the symbol, - * demangles it, and returns a malloc'd string with the demangled name - * substituted in. Returns nullptr if nothing was demangled. - * - * Callers must free() non-null return values. - */ - -char * lean_demangle_symbol(const char * symbol); -char * lean_demangle_bt_line(const char * line); diff --git a/src/runtime/object.cpp b/src/runtime/object.cpp index c031e8a0bcf9..aefb0064abe1 100644 --- a/src/runtime/object.cpp +++ b/src/runtime/object.cpp @@ -31,7 +31,13 @@ Author: Leonardo de Moura #if LEAN_SUPPORTS_BACKTRACE #include #include -#include "runtime/demangle.h" +// Lean-exported demangler from Lean.Compiler.NameDemangling. +// Declared as a weak symbol so leanrt doesn't require libLean at link time. +// When the Lean demangler is linked in, it overrides this stub. +extern "C" __attribute__((weak)) lean_object * lean_demangle_bt_line_cstr(lean_object * s) { + lean_dec(s); + return lean_mk_string(""); +} #endif // HACK: for unknown reasons, std::isnan(x) fails on msys64 because math.h @@ -137,13 +143,20 @@ static void print_backtrace(bool force_stderr) { if (char ** symbols = backtrace_symbols(bt_buf, nptrs)) { bool raw = getenv("LEAN_BACKTRACE_RAW"); for (int i = 0; i < nptrs; i++) { - char * demangled = raw ? nullptr : lean_demangle_bt_line(symbols[i]); - if (demangled) { - panic_eprintln(demangled, force_stderr); - free(demangled); - } else { - panic_eprintln(symbols[i], force_stderr); + if (!raw) { + lean_object * line_obj = lean_mk_string(symbols[i]); + lean_object * result = lean_demangle_bt_line_cstr(line_obj); + char const * result_str = lean_string_cstr(result); + if (result_str[0] != '\0') { + panic_eprintln(result_str, force_stderr); + lean_dec(result); + lean_dec(line_obj); + continue; + } + lean_dec(result); + lean_dec(line_obj); } + panic_eprintln(symbols[i], force_stderr); } // According to `man backtrace`, each `symbols[i]` should NOT be freed free(symbols); diff --git a/tests/lean/run/demangling.lean b/tests/lean/run/demangling.lean new file mode 100644 index 000000000000..8a5f59b13e1f --- /dev/null +++ b/tests/lean/run/demangling.lean @@ -0,0 +1,539 @@ +module +import Lean.Compiler.NameMangling +import Lean.Compiler.NameDemangling + +/-! +# Tests for Lean Name Demangling + +Tests the full demangling pipeline from `NameDemangling.lean`. +-/ + +open Lean.Name.Demangle +open Lean (Name) + +def check (label : String) (got expected : String) : IO Unit := do + if got != expected then + throw <| .userError s!"FAIL [{label}]: got {repr got}, expected {repr expected}" + +def checkSome (label : String) (got : Option String) (expected : String) : IO Unit := do + match got with + | some v => check label v expected + | none => throw <| .userError s!"FAIL [{label}]: got none, expected {repr expected}" + +def checkNone (label : String) (got : Option String) : IO Unit := do + match got with + | some v => throw <| .userError s!"FAIL [{label}]: got {repr v}, expected none" + | none => pure () + +def checkName (label : String) (got expected : Name) : IO Unit := do + if got != expected then + throw <| .userError s!"FAIL [{label}]: got {repr got}, expected {repr expected}" + +-- Helper to build name with compiler suffixes +private def mkName (parts : List (String ⊕ Nat)) : Name := + parts.foldl (fun n p => + match p with + | .inl s => n.str s + | .inr k => n.num k) .anonymous + +-- Verify mangle → demangle round-trips: demangle(mangle(name)) == name +private def checkRoundTrip (label : String) (parts : List (String ⊕ Nat)) : IO Unit := do + let name := mkName parts + let mangled := name.mangle "" + let demangled := Name.demangle mangled + checkName s!"roundtrip {label}" demangled name + +-- ============================================================================ +-- String.mangle +-- ============================================================================ + +#eval check "mangle alphanumeric" (String.mangle "hello") "hello" +#eval check "mangle underscore" (String.mangle "a_b") "a__b" +#eval check "mangle double underscore" (String.mangle "__") "____" +#eval check "mangle dot" (String.mangle ".") "_x2e" +#eval check "mangle a.b" (String.mangle "a.b") "a_x2eb" +#eval check "mangle lambda" (String.mangle "\u03bb") "_u03bb" +#eval check "mangle astral" (String.mangle (String.singleton (Char.ofNat 0x1d55c))) "_U0001d55c" +#eval check "mangle empty" (String.mangle "") "" + +-- ============================================================================ +-- Name.mangle +-- ============================================================================ + +#eval check "mangle simple name" (Name.mangle `Lean.Meta.Sym.main) "l_Lean_Meta_Sym_main" +#eval check "mangle single" (Name.mangle `main) "l_main" +#eval check "mangle underscore comp" (Name.mangle `a_b) "l_a__b" +#eval check "mangle underscore comp.c" (Name.mangle `a_b.c) "l_a__b_c" +#eval do + let name := mkName [.inl "_private", .inl "Lean", .inl "Meta", .inl "Basic", + .inr 0, .inl "Lean", .inl "Meta", .inl "withMVarContextImp"] + check "mangle private" (name.mangle "l_") + "l___private_Lean_Meta_Basic_0__Lean_Meta_withMVarContextImp" +#eval do + let name := mkName [.inr 42] + check "mangle numeric root" (name.mangle "l_") "l_42_" +#eval do + let name := mkName [.inr 42, .inl "foo"] + check "mangle numeric root + str" (name.mangle "l_") "l_42__foo" +#eval do + let name := mkName [.inl "a", .inl "x27"] + check "mangle disambiguation" (name.mangle "l_") "l_a_00x27" +#eval do + let name := mkName [.inl "0foo"] + check "mangle digit start disambiguation" (name.mangle "l_") "l_000foo" +#eval do + check "mangle custom prefix" (Name.mangle `foo "lp_pkg_") "lp_pkg_foo" + +-- ============================================================================ +-- Name.demangle (core algorithm, no postprocessing) +-- ============================================================================ + +#eval checkName "demangle simple" (Name.demangle "Lean_Meta_Sym_main") `Lean.Meta.Sym.main +#eval checkName "demangle single" (Name.demangle "main") `main +#eval checkName "demangle underscore" (Name.demangle "a__b") `a_b +#eval checkName "demangle underscore multi" (Name.demangle "a__b_c") `a_b.c +#eval checkName "demangle numeric" + (Name.demangle "foo_42__bar") (mkName [.inl "foo", .inr 42, .inl "bar"]) +#eval checkName "demangle numeric root" + (Name.demangle "42_") (mkName [.inr 42]) +#eval checkName "demangle numeric at end" + (Name.demangle "foo_42_") (mkName [.inl "foo", .inr 42]) +#eval checkName "demangle disambiguation 00" + (Name.demangle "a_00x27") (mkName [.inl "a", .inl "x27"]) +#eval checkName "demangle disambiguation root" + (Name.demangle "000foo") (mkName [.inl "0foo"]) +#eval checkName "demangle hex x" + (Name.demangle "a_x2eb") (mkName [.inl "a.b"]) +#eval checkName "demangle hex u" + (Name.demangle "_u03bb") (mkName [.inl "\u03bb"]) +#eval checkName "demangle hex U" + (Name.demangle "_U0001d55c") (mkName [.inl (String.singleton (Char.ofNat 0x1d55c))]) +#eval checkName "demangle private" + (Name.demangle "__private_Lean_Meta_Basic_0__Lean_Meta_withMVarContextImp") + (mkName [.inl "_private", .inl "Lean", .inl "Meta", .inl "Basic", + .inr 0, .inl "Lean", .inl "Meta", .inl "withMVarContextImp"]) +#eval checkName "demangle boxed suffix" + (Name.demangle "foo___boxed") (mkName [.inl "foo", .inl "_boxed"]) +#eval checkName "demangle underscore-ending component" + (Name.demangle "a___00b") (mkName [.inl "a_", .inl "b"]) + +-- ============================================================================ +-- Mangle/demangle round-trips +-- ============================================================================ + +#eval checkRoundTrip "simple" [.inl "Lean", .inl "Meta", .inl "main"] +#eval checkRoundTrip "single" [.inl "a"] +#eval checkRoundTrip "three" [.inl "Foo", .inl "Bar", .inl "baz"] +#eval checkRoundTrip "numeric" [.inl "foo", .inr 0, .inl "bar"] +#eval checkRoundTrip "numeric root" [.inr 42] +#eval checkRoundTrip "numeric multi" [.inl "a", .inr 1, .inl "b", .inr 2, .inl "c"] +#eval checkRoundTrip "_private" [.inl "_private"] +#eval checkRoundTrip "underscore" [.inl "a_b", .inl "c_d"] +#eval checkRoundTrip "_at_ _spec" [.inl "_at_", .inl "_spec"] +#eval checkRoundTrip "private name" + [.inl "_private", .inl "Lean", .inl "Meta", .inl "Basic", .inr 0, + .inl "Lean", .inl "Meta", .inl "withMVarContextImp"] +#eval checkRoundTrip "boxed" [.inl "Lean", .inl "Meta", .inl "foo", .inl "_boxed"] +#eval checkRoundTrip "redArg" [.inl "Lean", .inl "Meta", .inl "foo", .inl "_redArg"] +#eval checkRoundTrip "specialization" + [.inl "List", .inl "map", .inl "_at_", .inl "Foo", .inl "bar", .inl "_spec", .inr 3] +#eval checkRoundTrip "lambda" [.inl "Foo", .inl "bar", .inl "_lambda", .inr 0] +#eval checkRoundTrip "lambda 2" [.inl "Foo", .inl "bar", .inl "_lambda", .inr 2] +#eval checkRoundTrip "closed" [.inl "myConst", .inl "_closed", .inr 0] +#eval checkRoundTrip "dot char" [.inl "a.b"] +#eval checkRoundTrip "unicode" [.inl "\u03bb"] +#eval checkRoundTrip "unicode arrow" [.inl "a", .inl "b\u2192c"] +#eval checkRoundTrip "disambiguation x27" [.inl "a", .inl "x27"] +#eval checkRoundTrip "disambiguation 0foo" [.inl "0foo"] +#eval checkRoundTrip "disambiguation underscore end" [.inl "a_", .inl "b"] +#eval checkRoundTrip "complex" + [.inl "Lean", .inl "MVarId", .inl "withContext", .inl "_at_", + .inl "_private", .inl "Lean", .inl "Meta", .inl "Sym", .inr 0, + .inl "Lean", .inl "Meta", .inl "Sym", .inl "BackwardRule", .inl "apply", + .inl "_spec", .inr 2, .inl "_redArg", .inl "_lambda", .inr 0, .inl "_boxed"] + +-- ============================================================================ +-- Basic l_ prefix demangling +-- ============================================================================ + +#eval checkSome "simple l_" (demangleSymbol "l_Lean_Meta_Sym_main") "Lean.Meta.Sym.main" +#eval checkSome "single l_" (demangleSymbol "l_main") "main" + +-- ============================================================================ +-- lp_ prefix with package names +-- ============================================================================ + +#eval do + let mangled := Name.mangle `Lean.Meta.foo (s!"lp_{String.mangle "std"}_") + checkSome "lp_ simple" (demangleSymbol mangled) "Lean.Meta.foo (std)" + +#eval do + let mangled := Name.mangle `Lean.Meta.foo (s!"lp_{String.mangle "my_pkg"}_") + checkSome "lp_ underscore pkg" (demangleSymbol mangled) "Lean.Meta.foo (my_pkg)" + +#eval do + -- Package with escaped chars (hyphen → _x2d): split must not break mid-escape + let mangled := Name.mangle `Lean.Meta.foo (s!"lp_{String.mangle "my-pkg"}_") + checkSome "lp_ escaped pkg" (demangleSymbol mangled) "Lean.Meta.foo (my-pkg)" + +#eval do + let name := mkName [.inl "_private", .inl "X", .inr 0, .inl "Y", .inl "foo"] + let mangled := name.mangle (s!"lp_{String.mangle "pkg"}_") + checkSome "lp_ private decl" (demangleSymbol mangled) "Y.foo [private] (pkg)" + +-- ============================================================================ +-- _init_ prefixes +-- ============================================================================ + +#eval checkSome "init l_" + (demangleSymbol "_init_l_Lean_Meta_foo") "[init] Lean.Meta.foo" + +#eval do + let name := mkName [.inl "_private", .inl "X", .inr 0, .inl "Y", .inl "foo"] + let mangled := "_init_" ++ name.mangle "l_" + checkSome "init l_ private" (demangleSymbol mangled) "[init] Y.foo [private]" + +#eval do + let mangled := Name.mangle `Lean.Meta.foo (s!"lp_{String.mangle "std"}_") + checkSome "init lp_" (demangleSymbol ("_init_" ++ mangled)) + "[init] Lean.Meta.foo (std)" + +-- ============================================================================ +-- initialize_ prefixes +-- ============================================================================ + +#eval checkSome "initialize bare" + (demangleSymbol "initialize_Init_Control_Basic") "[module_init] Init.Control.Basic" +#eval checkSome "initialize l_" + (demangleSymbol "initialize_l_Lean_Meta_foo") "[module_init] Lean.Meta.foo" + +#eval do + let mangled := Name.mangle `Lean.Meta.foo (s!"lp_{String.mangle "std"}_") + checkSome "initialize lp_" (demangleSymbol ("initialize_" ++ mangled)) + "[module_init] Lean.Meta.foo (std)" + +-- ============================================================================ +-- lean_apply_N and _lean_main +-- ============================================================================ + +#eval checkSome "lean_apply_5" (demangleSymbol "lean_apply_5") "" +#eval checkSome "lean_apply_12" (demangleSymbol "lean_apply_12") "" +#eval checkSome "lean_main" (demangleSymbol "_lean_main") "[lean] main" + +-- ============================================================================ +-- .cold.N suffix handling +-- ============================================================================ + +#eval do + let mangled := Name.mangle `Lean.Meta.foo._redArg "l_" + checkSome "cold suffix" (demangleSymbol (mangled ++ ".cold.1")) + "Lean.Meta.foo [arity↓] .cold.1" + +#eval checkSome "cold suffix plain" + (demangleSymbol "l_Lean_Meta_foo.cold") "Lean.Meta.foo .cold" + +#eval checkSome "cold lean_apply" + (demangleSymbol "lean_apply_5.cold.1") " .cold.1" + +#eval checkSome "cold lean_main" + (demangleSymbol "_lean_main.cold.1") "[lean] main .cold.1" + +-- ============================================================================ +-- Non-Lean symbols return none +-- ============================================================================ + +#eval checkNone "printf" (demangleSymbol "printf") +#eval checkNone "malloc" (demangleSymbol "malloc") +#eval checkNone "empty" (demangleSymbol "") + +-- ============================================================================ +-- Postprocessing: suffix folding +-- ============================================================================ + +#eval do + let name := mkName [.inl "foo", .inl "_boxed"] + checkSome "boxed" (demangleSymbol (name.mangle "l_")) "foo [boxed]" + +#eval do + let name := mkName [.inl "foo", .inl "_redArg"] + checkSome "redArg" (demangleSymbol (name.mangle "l_")) "foo [arity↓]" + +#eval do + let name := mkName [.inl "foo", .inl "_impl"] + checkSome "impl" (demangleSymbol (name.mangle "l_")) "foo [impl]" + +#eval do + let name := mkName [.inl "foo", .inl "_lam", .inr 0] + checkSome "lambda separate" (demangleSymbol (name.mangle "l_")) "foo [λ]" + +#eval do + let name := mkName [.inl "foo", .inl "_lam_0"] + checkSome "lambda indexed" (demangleSymbol (name.mangle "l_")) "foo [λ]" + +#eval do + let name := mkName [.inl "foo", .inl "_lambda_2"] + checkSome "lambda_2" (demangleSymbol (name.mangle "l_")) "foo._lambda_2" + +#eval do + let name := mkName [.inl "foo", .inl "_elam"] + checkSome "elam" (demangleSymbol (name.mangle "l_")) "foo [λ]" + +#eval do + let name := mkName [.inl "foo", .inl "_elam_1"] + checkSome "elam indexed" (demangleSymbol (name.mangle "l_")) "foo [λ]" + +#eval do + let name := mkName [.inl "foo", .inl "_jp"] + checkSome "jp" (demangleSymbol (name.mangle "l_")) "foo [jp]" + +#eval do + let name := mkName [.inl "foo", .inl "_jp_3"] + checkSome "jp indexed" (demangleSymbol (name.mangle "l_")) "foo._jp_3" + +#eval do + let name := mkName [.inl "myConst", .inl "_closed", .inr 0] + checkSome "closed" (demangleSymbol (name.mangle "l_")) "myConst [closed]" + +#eval do + let name := mkName [.inl "myConst", .inl "_closed_0"] + checkSome "closed indexed" (demangleSymbol (name.mangle "l_")) "myConst._closed_0" + +#eval do + let name := mkName [.inl "foo", .inl "_redArg", .inl "_boxed"] + checkSome "multiple suffixes" (demangleSymbol (name.mangle "l_")) + "foo [boxed, arity↓]" + +#eval do + -- _lam_0 followed by _boxed + let name := mkName [.inl "Lean", .inl "Meta", .inl "Simp", .inl "simpLambda", + .inl "_lam_0", .inl "_boxed"] + checkSome "lambda + boxed" (demangleSymbol (name.mangle "l_")) + "Lean.Meta.Simp.simpLambda [boxed, λ]" + +#eval do + -- _redArg followed by _lam_0 + let name := mkName [.inl "Lean", .inl "profileitIOUnsafe", + .inl "_redArg", .inl "_lam_0"] + checkSome "redArg + lam" (demangleSymbol (name.mangle "l_")) + "Lean.profileitIOUnsafe [λ, arity↓]" + +-- ============================================================================ +-- Postprocessing: private name stripping +-- ============================================================================ + +#eval do + let name := mkName [.inl "_private", .inl "Lean", .inl "Meta", .inl "Basic", + .inr 0, .inl "Lean", .inl "Meta", .inl "foo"] + checkSome "private" (demangleSymbol (name.mangle "l_")) + "Lean.Meta.foo [private]" + +#eval do + let name := mkName [.inl "_private", .inl "Lean", .inl "Meta", .inl "Basic", + .inr 0, .inl "Lean", .inl "Meta", .inl "foo", .inl "_redArg"] + checkSome "private + redArg" (demangleSymbol (name.mangle "l_")) + "Lean.Meta.foo [arity↓, private]" + +-- ============================================================================ +-- Postprocessing: hygienic suffix stripping +-- ============================================================================ + +#eval do + let name := mkName [.inl "Lean", .inl "Meta", .inl "foo", .inl "_@", + .inl "Lean", .inl "Meta", .inl "_hyg", .inr 42] + checkSome "hygienic strip" (demangleSymbol (name.mangle "l_")) + "Lean.Meta.foo" + +#eval do + -- _boxed after _@ hygienic section should still be recognized + let name := mkName [.inl "Lean", .inl "Meta", .inl "foo", .inl "_@", + .inl "Lean", .inl "Meta", .inl "_hyg", .inr 42, .inl "_boxed"] + checkSome "hygienic + boxed" (demangleSymbol (name.mangle "l_")) + "Lean.Meta.foo [boxed]" + +#eval do + -- _lam + _boxed after _@ should both be recognized + let name := mkName [.inl "Lean", .inl "initFn", .inl "_@", + .inl "Lean", .inl "Elab", .inl "_hyg", .inr 42, + .inl "_lam", .inr 0, .inl "_boxed"] + checkSome "hygienic + lam + boxed" (demangleSymbol (name.mangle "l_")) + "Lean.initFn [boxed, λ]" + +-- ============================================================================ +-- Postprocessing: specialization contexts +-- ============================================================================ + +#eval do + let name := mkName [.inl "List", .inl "map", .inl "_at_", + .inl "Foo", .inl "bar", .inl "_spec", .inr 3] + checkSome "specialization" (demangleSymbol (name.mangle "l_")) + "List.map spec at Foo.bar" + +#eval do + let name := mkName [.inl "Lean", .inl "MVarId", .inl "withContext", + .inl "_at_", .inl "Foo", .inl "bar", .inl "_spec", .inr 2, + .inl "_boxed"] + checkSome "spec with base suffix" (demangleSymbol (name.mangle "l_")) + "Lean.MVarId.withContext [boxed] spec at Foo.bar" + +#eval do + let name := mkName [.inl "Lean", .inl "Meta", .inl "foo", + .inl "_at_", .inl "Lean", .inl "Meta", .inl "bar", + .inl "_elam_1", .inl "_redArg", .inl "_spec", .inr 2] + checkSome "spec context flags" (demangleSymbol (name.mangle "l_")) + "Lean.Meta.foo spec at Lean.Meta.bar[λ, arity↓]" + +#eval do + -- Duplicate flag labels are deduplicated: _lam_0 + _elam_1 both map to λ + let name := mkName [.inl "f", .inl "_at_", + .inl "g", .inl "_lam_0", .inl "_elam_1", .inl "_redArg", + .inl "_spec", .inr 1] + checkSome "spec context flags dedup" (demangleSymbol (name.mangle "l_")) + "f spec at g[λ, arity↓]" + +#eval do + let name := mkName [.inl "f", + .inl "_at_", .inl "g", .inl "_spec", .inr 1, + .inl "_at_", .inl "h", .inl "_spec", .inr 2] + checkSome "multiple at" (demangleSymbol (name.mangle "l_")) + "f spec at g spec at h" + +#eval do + -- Multiple spec at with flags on base and contexts + let name := mkName [.inl "f", + .inl "_at_", .inl "g", .inl "_redArg", .inl "_spec", .inr 1, + .inl "_at_", .inl "h", .inl "_lam_0", .inl "_spec", .inr 2, + .inl "_boxed"] + checkSome "multiple at with flags" (demangleSymbol (name.mangle "l_")) + "f [boxed] spec at g[arity↓] spec at h[λ]" + +#eval do + -- Base trailing suffix appearing after _spec N + let name := mkName [.inl "f", .inl "_at_", .inl "g", .inl "_spec", .inr 1, + .inl "_lam_0"] + checkSome "base flags after spec" (demangleSymbol (name.mangle "l_")) + "f [λ] spec at g" + +#eval do + -- spec_0 entries in context should be stripped + let name := mkName [.inl "Lean", .inl "Meta", .inl "transformWithCache", .inl "visit", + .inl "_at_", + .inl "_private", .inl "Lean", .inl "Meta", .inl "Transform", .inr 0, + .inl "Lean", .inl "Meta", .inl "transform", + .inl "Lean", .inl "Meta", .inl "Sym", .inl "unfoldReducible", + .inl "spec_0", .inl "spec_0", + .inl "_spec", .inr 1] + checkSome "spec context strip spec_0" (demangleSymbol (name.mangle "l_")) + "Lean.Meta.transformWithCache.visit spec at Lean.Meta.transform.Lean.Meta.Sym.unfoldReducible" + +#eval do + -- _private in spec context should be stripped + let name := mkName [.inl "Array", .inl "mapMUnsafe", .inl "map", + .inl "_at_", + .inl "_private", .inl "Lean", .inl "Meta", .inl "Transform", .inr 0, + .inl "Lean", .inl "Meta", .inl "transformWithCache", .inl "visit", + .inl "_spec", .inr 1] + checkSome "spec context strip private" (demangleSymbol (name.mangle "l_")) + "Array.mapMUnsafe.map spec at Lean.Meta.transformWithCache.visit" + +-- ============================================================================ +-- Complex real-world name +-- ============================================================================ + +#eval do + let name := mkName [.inl "Lean", .inl "MVarId", .inl "withContext", + .inl "_at_", + .inl "_private", .inl "Lean", .inl "Meta", .inl "Sym", .inr 0, + .inl "Lean", .inl "Meta", .inl "Sym", .inl "BackwardRule", .inl "apply", + .inl "_spec", .inr 2, + .inl "_redArg", .inl "_lambda", .inr 0, .inl "_boxed"] + checkSome "complex" (demangleSymbol (name.mangle "l_")) + "Lean.MVarId.withContext [boxed, λ, arity↓] spec at Lean.Meta.Sym.BackwardRule.apply" + +-- ============================================================================ +-- Backtrace line parsing: Linux glibc format +-- ============================================================================ + +#eval checkSome "bt linux" + (demangleBtLine "./lean(l_Lean_Meta_foo+0x2a) [0x555555555555]") + "./lean(Lean.Meta.foo+0x2a) [0x555555555555]" + +#eval do + let name := mkName [.inl "foo", .inl "_boxed"] + let sym := name.mangle "l_" + checkSome "bt linux boxed" + (demangleBtLine s!"./lean({sym}+0x10) [0x7fff]") + "./lean(foo [boxed]+0x10) [0x7fff]" + +#eval do + let name := mkName [.inl "_private", .inl "Lean", .inl "Meta", .inl "Basic", + .inr 0, .inl "Lean", .inl "Meta", .inl "foo"] + let sym := name.mangle "l_" + checkSome "bt linux private" + (demangleBtLine s!"./lean({sym}+0x10) [0x7fff]") + "./lean(Lean.Meta.foo [private]+0x10) [0x7fff]" + +#eval do + let name := mkName [.inl "_private", .inl "Lean", .inl "Meta", .inl "Basic", + .inr 0, .inl "Lean", .inl "Meta", .inl "foo", .inl "_redArg"] + let sym := name.mangle "l_" + checkSome "bt linux private + redArg" + (demangleBtLine s!"./lean({sym}+0x10) [0x7fff]") + "./lean(Lean.Meta.foo [arity↓, private]+0x10) [0x7fff]" + +-- ============================================================================ +-- Backtrace line parsing: macOS format +-- ============================================================================ + +#eval checkSome "bt macos" + (demangleBtLine "3 lean 0x00000001001234 l_Lean_Meta_foo + 42") + "3 lean 0x00000001001234 Lean.Meta.foo + 42" + +#eval do + let name := mkName [.inl "foo", .inl "_redArg"] + let sym := name.mangle "l_" + checkSome "bt macos redArg" + (demangleBtLine s!"3 lean 0x00000001001234 {sym} + 42") + "3 lean 0x00000001001234 foo [arity↓] + 42" + +-- ============================================================================ +-- Backtrace line parsing: non-Lean lines unchanged +-- ============================================================================ + +#eval checkNone "bt non-lean" + (demangleBtLine "./lean(printf+0x10) [0x7fff]") + +#eval checkNone "bt no parens" + (demangleBtLine "just some random text") + +-- ============================================================================ +-- Edge cases: never crashes +-- ============================================================================ + +#eval do + let inputs := #[ + "", "l_", "lp_", "lp_x", "_init_", "initialize_", + "l_____", "lp____", "l_00", "l_0", + "some random string", "l_ space", + "_init_l_", "_init_lp_", "_init_lp_x", + "initialize_l_", "initialize_lp_", "initialize_lp_x", + "lean_apply_", "lean_apply_x", "lean_apply_0", + "l_x", "l_0", "l_00", "l___", "lp_a_b", + ".cold", ".cold.1", "l_.cold", "l_x.cold.99" + ] + inputs.forM fun inp => do + let _ := demangleSymbol inp + let _ := demangleBtLine inp + pure () + +-- ============================================================================ +-- C export wrappers +-- ============================================================================ + +#eval check "export symbol" + ((demangleSymbol "l_Lean_Meta_foo").getD "") "Lean.Meta.foo" +#eval check "export symbol none" + ((demangleSymbol "printf").getD "") "" +#eval check "export bt line" + ((demangleBtLine "./lean(l_foo+0x1) [0x2]").getD "") "./lean(foo+0x1) [0x2]" +#eval check "export bt line none" + ((demangleBtLine "no lean symbols here").getD "") ""