diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..c6c97c6 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,100 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +asm-parser is a C++ tool that categorizes and filters assembly code for Compiler Explorer. It processes both regular compiler assembly output and GNU objdump binary output, providing JSON or text output with filtered assembly suitable for display in CE. + +## Build Commands + +**Initial Setup:** +```bash +./setup.sh # Sets up Python venv, installs Conan2, configures C++20 profile +``` + +**Development Build:** +```bash +cd build +cmake .. -DCMAKE_BUILD_TYPE=Debug -G Ninja # Ninja preferred +cmake --build . --config Debug +``` + +**Production Build:** +```bash +mkdir -p build && cd build +cmake -GNinja -DCMAKE_BUILD_TYPE=Release .. +cmake --build . --target asm-parser +``` + +**Running Tests:** +```bash +make test # Via CMake/CTest +./build/src/test/asm-parser-test # Direct execution +``` + +**Code Formatting:** +```bash +clang-format -i src/*/*.cpp src/*/*.hpp +``` + +## Architecture + +**Core Components:** +- `src/assembly/parser.{cpp,hpp}` - Parses compiler assembly text output +- `src/objdump/parser.{cpp,hpp}` - Parses GNU objdump binary output +- `src/types/` - Core interfaces (IParser) and data structures (Filter, Line) +- `src/utils/` - JSON output, regex wrappers, library detection utilities + +**Parser Architecture:** +Both parsers implement `IParser` interface with state machine-based parsing. The `Filter` struct controls which filtering operations are applied (directives, unused labels, comments, library functions, etc.). + +**Key Features:** +- Binary mode: Processes objdump output with addresses, opcodes, relocations +- Assembly text mode: Processes compiler-generated assembly +- Label analysis: Identifies and removes unused labels/functions +- Library detection: Filters external library code based on file paths +- Multiple output formats: JSON (default) or filtered text + +## Testing Framework + +Uses **Catch2** with **ApprovalTests** for regression testing. Test data in `/resources/` includes real-world assembly examples and bug reproduction cases from Compiler Explorer. + +**Test file naming convention:** +- Input: `resources/example.asm` +- Expected output: `resources/asmtext_filter_tests.example.approved.txt` + +Tests cover various architectures, compiler outputs, and edge cases from CE bug reports. + +## Dependencies + +Managed via **Conan 2.x**: +- Catch2 2.13.10 (testing) +- ApprovalTests.cpp 10.12.2 (golden master testing) +- fmt 11.0.0 (string formatting) +- ctre 3.7.1 (compile-time regex) + +## Development Notes + +- Requires GCC 12+ or equivalent with C++20 support +- Debug builds include sanitizers (AddressSanitizer, LeakSanitizer, UBSan) +- Release builds use -O3 with Link Time Optimization +- UTF-8 locale required for Unicode support +- Production deployment copies binary to `/usr/local/bin/asm-parser` + +## Common Usage Patterns + +**Binary objdump processing:** +```bash +objdump -d a.out -l --insn-width=16 | asm-parser -stdin -binary +``` + +**Assembly text filtering:** +```bash +asm-parser -directives -unused_labels -comment_only file.asm +``` + +**Text output mode:** +```bash +asm-parser -outputtext -library_functions input.asm +``` \ No newline at end of file diff --git a/resources/asmtext_filter_tests.ce-bug-1648.approved.txt b/resources/asmtext_filter_tests.ce-bug-1648.approved.txt new file mode 100644 index 0000000..55a3977 --- /dev/null +++ b/resources/asmtext_filter_tests.ce-bug-1648.approved.txt @@ -0,0 +1 @@ +{"asm": [],"labelDefinitions": {}, "filteredCount": 35} diff --git a/resources/ce-bug-1648.asm b/resources/ce-bug-1648.asm new file mode 100644 index 0000000..e026372 --- /dev/null +++ b/resources/ce-bug-1648.asm @@ -0,0 +1,36 @@ + +output.s: file format elf64-x86-64 + + +Disassembly of section .text: + +0000000000401000
: + 401000: 48 83 ec 08 sub rsp,0x8 + 401004: f2 0f 10 05 24 10 00 movsd xmm0,QWORD PTR [rip+0x1024] # 402030 <.rodata> + 40100b: 00 + 40100c: f2 0f 10 0d 1c 10 00 movsd xmm1,QWORD PTR [rip+0x101c] # 402030 <.rodata+0x8> + 401013: 00 + 401014: f2 0f 10 15 1c 10 00 movsd xmm2,QWORD PTR [rip+0x101c] # 402038 <.rodata+0x10> + 40101b: 00 + 40101c: f2 0f 10 1d 1c 10 00 movsd xmm3,QWORD PTR [rip+0x101c] # 402040 <.rodata+0x18> + 401023: 00 + 401024: e8 47 00 00 00 call 401070 <__divdc3> + 401029: 48 83 c4 08 add rsp,0x8 + 40102d: c3 ret + +000000000040102e <__unused_func>: + 40102e: 48 83 ec 08 sub rsp,0x8 + 401032: 48 83 c4 08 add rsp,0x8 + 401036: c3 ret + +0000000000401070 <__divdc3>: + 401070: 48 83 ec 08 sub rsp,0x8 + 401074: f2 0f 5e c2 divsd xmm0,xmm2 + 401078: f2 0f 5e cb divsd xmm1,xmm3 + 40107c: 48 83 c4 08 add rsp,0x8 + 401080: c3 ret + +0000000000401090 <__another_unused>: + 401090: 48 83 ec 08 sub rsp,0x8 + 401094: 48 83 c4 08 add rsp,0x8 + 401098: c3 ret \ No newline at end of file diff --git a/src/objdump/parser.cpp b/src/objdump/parser.cpp index 59d102b..d33a86a 100644 --- a/src/objdump/parser.cpp +++ b/src/objdump/parser.cpp @@ -134,7 +134,7 @@ void AsmParser::ObjDumpParser::label() if (this->filter.library_functions) this->maybe_remove_last_function(); - this->state.ignoreUntilNextLabel = AssemblyTextParserUtils::shouldIgnoreFunction(this->state.text, this->filter); + this->state.ignoreUntilNextLabel = this->shouldIgnoreFunction(this->state.text); if (this->state.ignoreUntilNextLabel) return; @@ -159,9 +159,10 @@ void AsmParser::ObjDumpParser::labelref() { this->state.currentLabelReference.name = this->state.text.substr(this->state.currentLabelReference.range.start_col); - if (!AssemblyTextParserUtils::shouldIgnoreFunction(this->state.currentLabelReference.name, this->filter)) + if (!AsmParser::AssemblyTextParserUtils::shouldIgnoreFunction(this->state.currentLabelReference.name, this->filter)) { this->state.currentLine.labels.push_back(this->state.currentLabelReference); + this->referenced_functions.insert(this->state.currentLabelReference.name); } } catch (...) @@ -297,6 +298,23 @@ void AsmParser::ObjDumpParser::address() this->state.text.clear(); } +bool AsmParser::ObjDumpParser::shouldIgnoreFunction(std::string_view name) const +{ + if (name == "main") + { + return false; + } + + // Don't filter if the function is referenced by a non-filtered function + if (this->referenced_functions.count(std::string(name)) > 0) + { + return false; + } + + // Apply the original filtering logic + return AssemblyTextParserUtils::shouldIgnoreFunction(name, this->filter); +} + void AsmParser::ObjDumpParser::setReproducible() { this->reproducible = true; diff --git a/src/objdump/parser.hpp b/src/objdump/parser.hpp index 2423e79..dea20cf 100644 --- a/src/objdump/parser.hpp +++ b/src/objdump/parser.hpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace AsmParser { @@ -49,12 +50,12 @@ class ObjDumpParser : public IParser LibraryDetection lib_detection; std::vector lines; std::vector labels; + std::unordered_set referenced_functions; bool reproducible; size_t total_lines{}; - // todo: bad names void actually_address(); void actually_filename(); void do_file_check(std::string_view filename); diff --git a/src/test/asmtext_filter_tests.cpp b/src/test/asmtext_filter_tests.cpp index 0228f43..0061ecd 100644 --- a/src/test/asmtext_filter_tests.cpp +++ b/src/test/asmtext_filter_tests.cpp @@ -269,3 +269,37 @@ TEST_CASE("example-llvm-objdump") ApprovalTests::Approvals::verify(ss.str()); } + +TEST_CASE("ce-bug-1648") +{ + AsmParser::Filter filter; + filter.binary = true; + filter.plt = true; + filter.library_functions = true; + filter.unused_labels = true; + filter.code_only = true; + + std::string asmpath; + if (std::filesystem::current_path().string().ends_with("test")) + { + asmpath = "../../../resources/ce-bug-1648.asm"; + } + else + { + asmpath = "../../resources/ce-bug-1648.asm"; + } + + AsmParser::ObjDumpParser parser(filter); + parser.setReproducible(); + + std::fstream fs; + fs.open(asmpath, std::fstream::in); + REQUIRE(fs.is_open() == true); + + parser.fromStream(fs); + + std::stringstream ss; + parser.outputJson(ss); + + ApprovalTests::Approvals::verify(ss.str()); +}