diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5c7dd7a..fc1d71b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -16,9 +16,6 @@ concurrency: cancel-in-progress: true jobs: - # ============================================================================ - # Build (CLI + Plugin) - # ============================================================================ build-linux: name: Build (Linux) runs-on: ubuntu-latest @@ -35,7 +32,7 @@ jobs: run: git clone --depth 1 --recurse-submodules https://github.com/HexRaysSA/ida-sdk ida-sdk - name: Configure - run: cmake -B build -DCMAKE_BUILD_TYPE=Release -DIDASQL_WITH_AI_AGENT=ON -DIDASQL_BUILD_EXAMPLES=ON + run: cmake -B build -DCMAKE_BUILD_TYPE=Release -DIDASQL_WITH_MCP=OFF -DIDASQL_BUILD_EXAMPLES=ON - name: Build run: cmake --build build --config Release @@ -68,7 +65,7 @@ jobs: run: git clone --depth 1 --recurse-submodules https://github.com/HexRaysSA/ida-sdk ida-sdk - name: Configure - run: cmake -B build -DCMAKE_BUILD_TYPE=Release -DIDASQL_WITH_AI_AGENT=ON -DIDASQL_BUILD_EXAMPLES=ON + run: cmake -B build -DCMAKE_BUILD_TYPE=Release -DIDASQL_WITH_MCP=OFF -DIDASQL_BUILD_EXAMPLES=ON - name: Build run: cmake --build build --config Release @@ -103,7 +100,7 @@ jobs: run: git clone --depth 1 --recurse-submodules https://github.com/HexRaysSA/ida-sdk ida-sdk - name: Configure - run: cmake -B build -DIDASQL_WITH_AI_AGENT=ON -DIDASQL_BUILD_EXAMPLES=ON + run: cmake -B build -DIDASQL_WITH_MCP=OFF -DIDASQL_BUILD_EXAMPLES=ON - name: Build run: cmake --build build --config Release @@ -120,117 +117,10 @@ jobs: name: cli-windows path: ${{ env.IDASDK }}/src/bin/idasql.exe - # ============================================================================ - # Build no-agent variant (CLI + Plugin) — lightweight HTTP-only, no AI deps - # ============================================================================ - build-linux-no-agent: - name: Build no-agent (Linux) - runs-on: ubuntu-latest - env: - IDASDK: ${{ github.workspace }}/ida-sdk - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Checkout submodules - run: git submodule update --init --recursive - - - name: Setup IDA SDK - run: git clone --depth 1 --recurse-submodules https://github.com/HexRaysSA/ida-sdk ida-sdk - - - name: Configure - run: cmake -B build -DCMAKE_BUILD_TYPE=Release -DIDASQL_WITH_AI_AGENT=OFF - - - name: Build - run: cmake --build build --config Release - - - name: Upload plugin (no-agent) - uses: actions/upload-artifact@v4 - with: - name: plugin-no-agent-linux - path: ${{ env.IDASDK }}/src/bin/plugins/idasql.so - - - name: Upload CLI (no-agent) - uses: actions/upload-artifact@v4 - with: - name: cli-no-agent-linux - path: ${{ env.IDASDK }}/src/bin/idasql - - build-macos-no-agent: - name: Build no-agent (macOS) - runs-on: macos-latest - env: - IDASDK: ${{ github.workspace }}/ida-sdk - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Checkout submodules - run: git submodule update --init --recursive - - - name: Setup IDA SDK - run: git clone --depth 1 --recurse-submodules https://github.com/HexRaysSA/ida-sdk ida-sdk - - - name: Configure - run: cmake -B build -DCMAKE_BUILD_TYPE=Release -DIDASQL_WITH_AI_AGENT=OFF - - - name: Build - run: cmake --build build --config Release - - - name: Upload plugin (no-agent) - uses: actions/upload-artifact@v4 - with: - name: plugin-no-agent-macos - path: ${{ env.IDASDK }}/src/bin/plugins/idasql.dylib - - - name: Upload CLI (no-agent) - uses: actions/upload-artifact@v4 - with: - name: cli-no-agent-macos - path: ${{ env.IDASDK }}/src/bin/idasql - - build-windows-no-agent: - name: Build no-agent (Windows) - runs-on: windows-latest - env: - IDASDK: ${{ github.workspace }}/ida-sdk - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Checkout submodules - shell: bash - run: git submodule update --init --recursive - - - name: Setup IDA SDK - shell: bash - run: git clone --depth 1 --recurse-submodules https://github.com/HexRaysSA/ida-sdk ida-sdk - - - name: Configure - run: cmake -B build -DIDASQL_WITH_AI_AGENT=OFF - - - name: Build - run: cmake --build build --config Release - - - name: Upload plugin (no-agent) - uses: actions/upload-artifact@v4 - with: - name: plugin-no-agent-windows - path: ${{ env.IDASDK }}/src/bin/plugins/idasql.dll - - - name: Upload CLI (no-agent) - uses: actions/upload-artifact@v4 - with: - name: cli-no-agent-windows - path: ${{ env.IDASDK }}/src/bin/idasql.exe - - # ============================================================================ - # Release - # ============================================================================ publish-release: name: Publish Release if: startsWith(github.ref, 'refs/tags/') - needs: [build-linux, build-macos, build-windows, build-linux-no-agent, build-macos-no-agent, build-windows-no-agent] + needs: [build-linux, build-macos, build-windows] runs-on: ubuntu-latest steps: - name: Checkout @@ -243,89 +133,48 @@ jobs: path: plugins merge-multiple: true - - name: Download CLI (with agent) + - name: Download CLI (Linux) uses: actions/download-artifact@v4 with: name: cli-linux path: cli-linux - - uses: actions/download-artifact@v4 - with: - name: cli-macos - path: cli-macos - - uses: actions/download-artifact@v4 - with: - name: cli-windows - path: cli-windows - - name: Download no-agent plugins + - name: Download CLI (macOS) uses: actions/download-artifact@v4 with: - pattern: plugin-no-agent-* - path: plugins-no-agent - merge-multiple: true + name: cli-macos + path: cli-macos - - name: Download CLI (no agent) + - name: Download CLI (Windows) uses: actions/download-artifact@v4 with: - name: cli-no-agent-linux - path: cli-no-agent-linux - - uses: actions/download-artifact@v4 - with: - name: cli-no-agent-macos - path: cli-no-agent-macos - - uses: actions/download-artifact@v4 - with: - name: cli-no-agent-windows - path: cli-no-agent-windows + name: cli-windows + path: cli-windows - name: Prepare release run: | - mkdir -p release/plugin release/plugin-no-agent + mkdir -p release/plugin mkdir -p release/cli/{windows,linux,macos} - mkdir -p release/cli-no-agent/{windows,linux,macos} - # Plugin with agent (flat, IDA-compatible) cp plugins/idasql.dll release/plugin/ || true cp plugins/idasql.so release/plugin/ || true cp plugins/idasql.dylib release/plugin/ || true cp ida-plugin.json release/plugin/ - # Plugin without agent (flat, IDA-compatible) - cp plugins-no-agent/idasql.dll release/plugin-no-agent/ || true - cp plugins-no-agent/idasql.so release/plugin-no-agent/ || true - cp plugins-no-agent/idasql.dylib release/plugin-no-agent/ || true - cp ida-plugin.json release/plugin-no-agent/ - # CLI with agent (organized by platform) cp cli-windows/idasql.exe release/cli/windows/ || true cp cli-linux/idasql release/cli/linux/ || true cp cli-macos/idasql release/cli/macos/ || true - # CLI without agent (organized by platform) - cp cli-no-agent-windows/idasql.exe release/cli-no-agent/windows/ || true - cp cli-no-agent-linux/idasql release/cli-no-agent/linux/ || true - cp cli-no-agent-macos/idasql release/cli-no-agent/macos/ || true echo "=== Plugin ===" && ls -la release/plugin/ - echo "=== Plugin (no-agent) ===" && ls -la release/plugin-no-agent/ - echo "=== CLI (agent) ===" && ls -laR release/cli/ - echo "=== CLI (no-agent) ===" && ls -laR release/cli-no-agent/ + echo "=== CLI ===" && ls -laR release/cli/ - name: Create plugin zip run: | cd release/plugin zip -9 ../idasql_plugin-${{ github.ref_name }}.zip * - - name: Create plugin zip (no agent) - run: | - cd release/plugin-no-agent - zip -9 ../idasql_plugin-no-agent-${{ github.ref_name }}.zip * - - - name: Create CLI zip (with agent) + - name: Create CLI zip run: | cd release/cli zip -r9 ../idasql_cli-${{ github.ref_name }}.zip windows linux macos - - name: Create CLI zip (no agent) - run: | - cd release/cli-no-agent - zip -r9 ../idasql_cli-no-agent-${{ github.ref_name }}.zip windows linux macos - - name: Create Release uses: softprops/action-gh-release@v2 with: @@ -334,6 +183,4 @@ jobs: generate_release_notes: true files: | release/idasql_plugin-${{ github.ref_name }}.zip - release/idasql_plugin-no-agent-${{ github.ref_name }}.zip release/idasql_cli-${{ github.ref_name }}.zip - release/idasql_cli-no-agent-${{ github.ref_name }}.zip diff --git a/CMakeLists.txt b/CMakeLists.txt index 3a90244..b62c7e8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.20) -project(idasql VERSION 0.0.7 LANGUAGES C CXX) +project(idasql LANGUAGES C CXX) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -69,76 +69,35 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src/include) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src/lib/include) # ============================================================================ -# AI Agent Support (libagents - optional) +# MCP Support (optional) # ============================================================================ -option(IDASQL_WITH_AI_AGENT "Build with AI agent support (libagents)" ON) - -if(IDASQL_WITH_AI_AGENT) - message(STATUS "idasql: Building with AI agent support via libagents") - - # Prompt header generation is required when AI agent support is enabled. - find_package(Python3 REQUIRED COMPONENTS Interpreter) - set(PROMPT_MD "${CMAKE_CURRENT_SOURCE_DIR}/prompts/idasql_agent.md") - set(IDASQL_GENERATED_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated" CACHE INTERNAL "") - set(PROMPT_HPP "${IDASQL_GENERATED_INCLUDE_DIR}/idasql_agent_prompt.hpp") - - file(MAKE_DIRECTORY ${IDASQL_GENERATED_INCLUDE_DIR}) - - add_custom_command( - OUTPUT ${PROMPT_HPP} - COMMAND ${Python3_EXECUTABLE} - ${CMAKE_CURRENT_SOURCE_DIR}/scripts/embed_prompt.py - ${PROMPT_MD} ${PROMPT_HPP} - DEPENDS ${PROMPT_MD} - COMMENT "Checking/regenerating idasql_agent_prompt.hpp" - ) - add_custom_target(generate_prompt DEPENDS ${PROMPT_HPP}) - - # libagents build options - set(LIBAGENTS_BUILD_TESTS OFF CACHE BOOL "" FORCE) - set(LIBAGENTS_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE) - set(LIBAGENTS_BUILD_CLI OFF CACHE BOOL "" FORCE) - set(LIBAGENTS_BUILD_COPILOT ON CACHE BOOL "" FORCE) - - # FetchContent from GitHub (with recursive submodules for fastmcpp, SDKs) - message(STATUS "idasql: Fetching libagents from GitHub...") - FetchContent_Declare(libagents - GIT_REPOSITORY https://github.com/0xeb/libagents.git - GIT_TAG ae162192d49126da6328254aab73417d673379cc - GIT_SUBMODULES_RECURSE TRUE - ) - FetchContent_MakeAvailable(libagents) - - # ======================================================================== - # Build speed: enable MSVC parallel compilation for dependency targets. - # IDA SDK's /MP only applies to ida_add_plugin/ida_add_idalib targets; - # FetchContent dependencies compile their files sequentially without this. - # ======================================================================== - if(MSVC) - foreach(_dep_target fastmcpp_core copilot_sdk_cpp claude_sdk libagents) - if(TARGET ${_dep_target}) - target_compile_options(${_dep_target} PRIVATE /MP) - endif() - endforeach() +option(IDASQL_WITH_MCP "Build with MCP server support (fastmcpp)" OFF) + +if(IDASQL_WITH_MCP) + message(STATUS "idasql: Building with MCP support via fastmcpp") + + # Reuse an existing fastmcpp target when available in a parent build. + if(NOT TARGET fastmcpp_core) + # Keep the dependency surface minimal for idasql. + set(FASTMCPP_BUILD_TESTS OFF CACHE BOOL "" FORCE) + set(FASTMCPP_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE) + set(FASTMCPP_ENABLE_POST_STREAMING OFF CACHE BOOL "" FORCE) + set(FASTMCPP_ENABLE_SAMPLING_HTTP_HANDLERS OFF CACHE BOOL "" FORCE) + set(FASTMCPP_FETCH_CURL OFF CACHE BOOL "" FORCE) + + message(STATUS "idasql: Fetching fastmcpp from GitHub...") + FetchContent_Declare(fastmcpp + GIT_REPOSITORY https://github.com/0xeb/fastmcpp.git + GIT_TAG 4837e8e0a27d0011b73643fe02b3553cd3935c34 + GIT_SHALLOW TRUE + ) + FetchContent_MakeAvailable(fastmcpp) endif() - # Build speed: merge translation units to reduce header re-parsing. - # Note: fastmcpp_core is excluded — its server files use duplicate - # anonymous-namespace symbols that conflict under unity builds. - foreach(_dep_target copilot_sdk_cpp claude_sdk libagents) - if(TARGET ${_dep_target}) - set_target_properties(${_dep_target} PROPERTIES UNITY_BUILD ON) - endif() - endforeach() - - # Exclude unused targets from default build (e.g. claude-sdk debug tool) - foreach(_unused_target debug) - if(TARGET ${_unused_target}) - set_target_properties(${_unused_target} PROPERTIES EXCLUDE_FROM_ALL TRUE) - endif() - endforeach() - + if(MSVC AND TARGET fastmcpp_core) + target_compile_options(fastmcpp_core PRIVATE /MP) + endif() endif() # Add subdirectories diff --git a/CMakePresets.json b/CMakePresets.json index 84173c2..a7a6962 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -8,18 +8,7 @@ "binaryDir": "${sourceDir}/build", "cacheVariables": { "CMAKE_BUILD_TYPE": "Release", - "IDASQL_WITH_AI_AGENT": "ON" - } - }, - { - "name": "release-no-agent", - "displayName": "Release (no AI agent)", - "description": "Build without AI agent support to reduce binary size. Excludes libagents, fastmcpp, claude_sdk, copilot_sdk_cpp, easywsclient, tiny-process-library.", - "generator": "Visual Studio 17 2022", - "binaryDir": "${sourceDir}/build-no-agent", - "cacheVariables": { - "CMAKE_BUILD_TYPE": "Release", - "IDASQL_WITH_AI_AGENT": "OFF" + "IDASQL_WITH_MCP": "OFF" } } ] diff --git a/README.md b/README.md index f339a74..9a7ef0a 100644 --- a/README.md +++ b/README.md @@ -1,55 +1,33 @@ # IDASQL -Talk to your IDA database. +Query IDA Pro databases with SQL. ``` -you: what's the most called function? -agent: The most called function is `__security_check_cookie` with 1,247 callers. - This is typical for MSVC-compiled binaries - it's the stack canary check. - -you: find functions that reference "password" strings -agent: Found 3 functions referencing password-related strings: - - validate_credentials (0x401240) - references "password incorrect" - - parse_config (0x402890) - references "password_hash" - - debug_dump (0x405120) - references "password: %s" - -you: decompile validate_credentials -agent: [displays pseudocode] +you: /idasql analyze this binary; tell me the most called function +assistant: __security_check_cookie has the highest fan-in (1,247 callers). + It is likely the compiler stack-cookie check used by MSVC. ``` -Ask complex questions that would take hours to script: +Use any workflow you prefer: +- Run SQL directly in `idasql` +- Use `.http`/`--http` for stateless automation +- Use `.mcp`/`--mcp` when built with `-DIDASQL_WITH_MCP=ON` +- Pair with your favorite coding CLI and the `/idasql` skill -``` -you: find functions that make at most 3 calls, where those callees - don't go deeper than 5 levels in the call graph +IDASQL exposes IDA Pro databases as SQL tables. -agent: Looking for shallow functions with bounded call depth... - - Found 47 functions matching your criteria: - - init_config (0x401000) - 2 calls, max depth 3 - - validate_checksum (0x401200) - 3 calls, max depth 4 - - parse_header (0x401400) - 1 call, max depth 2 - ... - - These are good candidates for isolated analysis - simple entry - points that don't spiral into deep call chains. -``` - -IDASQL exposes IDA Pro databases as SQL tables with a built-in AI agent. Ask questions in plain English. The agent writes SQL, runs queries, and explains results. - -Works as a **standalone CLI** (query `.i64` files directly) or as an **IDA plugin** (query the open database). No scripting. No IDAPython. Just ask. +Works as a **standalone CLI** (query `.i64` files directly) or as an **IDA plugin** (query the open database). No scripting. No IDAPython. Just SQL. **No indexing required.** IDA already has everything indexed. Queries run instantly against the live database. ## Features -- **AI Agent** - Natural language queries with Claude or GitHub Copilot - **SQL Interface** - Full SQL access to functions, strings, imports, xrefs, instructions, types - **Unified Entity Search** - `grep` table + `grep()` function search functions, labels, segments, types, members, and enums - **Standalone CLI** - Query `.i64` files without opening IDA GUI -- **IDA Plugin** - SQL/AI interface inside IDA's command line -- **Remote Server** - Query IDA from external tools, scripts, or coding agents -- **Zero Setup** - Uses your existing Claude Code or Copilot authentication +- **IDA Plugin** - SQL interface inside IDA's command line +- **Remote Server** - Query IDA from external tools via HTTP or MCP +- **Optional MCP** - Build-time flag (`-DIDASQL_WITH_MCP=ON`), off by default ## Screenshots @@ -77,39 +55,28 @@ idasql> SELECT name FROM funcs WHERE size > 1000; idasql> .tables -- list available tables idasql> .schema funcs -- show table schema idasql> .http start -- start HTTP server from REPL -idasql> .mcp start -- start MCP server from REPL +idasql> .mcp start -- start MCP server from REPL (if built with MCP) idasql> .quit -- exit ``` ![CLI Interactive](assets/idasql_cli_interactive_1.jpg) -### CLI - AI Agent - -Talk to the database in plain English. The agent translates to SQL, runs queries, and explains results. +### Skill Workflow (External CLI) ```bash -idasql -s database.i64 -i --agent +idasql -s database.i64 --http 8081 +# or, if built with -DIDASQL_WITH_MCP=ON: +idasql -s database.i64 --mcp ``` -Ask questions naturally: +In your favorite coding CLI, use the `/idasql` skill: ``` -idasql> how many functions are there? -idasql> which function is called the most? -idasql> find strings that look like URLs -idasql> what imports are related to file operations? -idasql> show me the largest functions +/idasql analyze this binary; tell me the most called functions. +/idasql find functions that reference "password" strings and rank by xrefs. +/idasql show callers of CreateFileW and summarize error handling. ``` -![CLI Agent](assets/idasql_cli_agent_2.jpg) - -The agent can generate visualizations like call graphs: - -``` -idasql> draw a call graph for the main function -idasql> visualize the call hierarchy of CreateFileW -``` - -![CLI Agent Call Graph](assets/idasql_cli_agent_3.jpg) +![CLI Skill Workflow](assets/idasql_cli_handoff_1.jpg) ### IDA Plugin @@ -117,29 +84,19 @@ Select `idasql` from the CLI dropdown at the bottom of IDA: ![Plugin CLI Select](assets/idasql_plugin_cli_select.jpg) -Type SQL or natural language questions directly. The agent has full access to IDA's capabilities: - -``` -idasql> what's the busiest function? -idasql> find functions that reference "error" -idasql> which structures have the most members? -``` - -![Plugin Agent](assets/idasql_plugin_agent_1.jpg) - -The agent can invoke the decompiler, analyze types, and trace cross-references: +Type SQL directly, or expose the open database through `.http` / `.mcp` for external tooling: ``` -idasql> decompile the free_base function -idasql> what does the _CONTEXT structure look like? -idasql> who calls VirtualAlloc and what do they do with it? +idasql> SELECT name, size FROM funcs ORDER BY size DESC LIMIT 10; +idasql> .http start +idasql> .mcp start ``` -![Plugin Decompile](assets/idasql_plugin_agent_2.jpg) +![Plugin Workflow](assets/idasql_plugin_handoff_1.jpg) ### HTTP Server -The plugin can run an HTTP server for scripting, tooling, and agent workflows: +The plugin can run an HTTP server for scripting and tooling workflows: ```bash idasql -s database.i64 --http 8081 --token @@ -157,12 +114,6 @@ idasql -s database.i64 -q "SELECT name, address FROM funcs LIMIT 10" # Interactive mode idasql -s database.i64 -i -# AI agent mode -idasql -s database.i64 -i --agent - -# One-shot natural language query -idasql -s database.i64 --prompt "find the largest function" - # Run SQL script idasql -s database.i64 -f queries.sql @@ -178,12 +129,20 @@ idasql -s database.i64 --export dump.sql 1. Build and install the plugin 2. Open a database in IDA 3. Select `idasql` from the command interpreter dropdown -4. Type SQL or natural language (in agent mode) +4. Type SQL directly ```sql SELECT name, printf('0x%X', address) as addr FROM funcs WHERE size > 1000; ``` +Plugin-only UI context query: + +```sql +SELECT get_ui_context_json(); +``` + +`get_ui_context_json()` is available in GUI plugin runtime only (not idalib/CLI mode). + ## Available Tables | Table | Description | @@ -201,6 +160,7 @@ SELECT name, printf('0x%X', address) as addr FROM funcs WHERE size > 1000; | `breakpoints` | Breakpoints - address, type, enabled, condition (full CRUD) | | `grep` | Unified entity search table (`pattern`, `name`, `kind`, `address`, `ordinal`, `parent_name`, `full_name`) | | `grep(pattern, limit, offset)` | Unified entity search function that returns JSON | +| `get_ui_context_json()` | Plugin-only UI context JSON (GUI runtime only) | ### Local Variable Mutation @@ -212,7 +172,6 @@ Hex-Rays-backed local variable surfaces: - `UPDATE ctree_lvars SET name/type ... WHERE func_addr = ... AND idx = ...` as SQL update path Use `idx`-based writes when possible. Some internal/decompiler temps can be hidden or non-nameable. -For full workflow and edge-case guidance, use `idasql/prompts/idasql_agent.md`. ```sql SELECT list_lvars(0x401000); @@ -453,61 +412,19 @@ INSERT INTO types_members (type_ordinal, member_name, member_type) VALUES (42, ' INSERT INTO types_enum_values (type_ordinal, value_name, value) VALUES (15, 'FLAG_ACTIVE', 1); ``` -## AI Agent - -The agent translates natural language to SQL, executes queries, and explains results. It has access to all tables and can perform multi-step analysis. - -``` -idasql> what functions have the most cross-references? -idasql> find strings that look like file paths -idasql> show me the call hierarchy of main -idasql> which imports are related to networking? -``` - -See an [example session](examples/agent_session_strings.md) analyzing string references in a binary. - -### Prerequisites for AI Features - -The AI agent requires one of these CLI tools installed and authenticated: +## Skill-Assisted Workflows -| Provider | CLI Tool | Install | Login | -|----------|----------|---------|-------| -| Claude (default) | [Claude Code](https://docs.anthropic.com/en/docs/claude-code) | `npm install -g @anthropic-ai/claude-code` | Run `claude`, then `/login` | -| GitHub Copilot | [Copilot CLI](https://github.com/features/copilot/cli/) | `npm install -g @github/copilot` | Run `copilot`, then `/login` | +Use IDASQL as the data plane and drive analysis from your preferred coding CLI with the `/idasql` skill. -**Important:** You must be logged in before using AI features. No API keys needed for basic usage. - -### Provider Configuration - -``` -.agent provider claude # Claude (default) -.agent provider copilot # GitHub Copilot -``` - -### Bring Your Own Key (BYOK) - -For direct API access or local inference, enable BYOK mode: - -``` -.agent byok enable -.agent byok type anthropic # or: openai, azure -.agent byok key sk-ant-... -.agent byok model claude-sonnet-4-20250514 -.agent byok endpoint https://api.anthropic.com # optional -``` - -With `copilot` provider and BYOK, you can point to local inference servers (Ollama, LM Studio, vLLM) using OpenAI-compatible endpoints: +Example prompts: ``` -.agent provider copilot -.agent byok enable -.agent byok type openai -.agent byok endpoint http://localhost:11434/v1 -.agent byok model llama3 -.agent byok key unused +/idasql analyze this binary; tell me the high-risk entry points and why. +/idasql find all callers of VirtualAlloc and summarize allocation patterns. +/idasql list functions touching registry APIs, then map related strings. ``` -Settings persist in `~/.idasql/agent_settings.json`. +The assistant can run focused SQL queries through IDASQL and then summarize findings in plain language. ## Building @@ -527,7 +444,7 @@ cmake --build build/cli --config Release ### Plugin ```bash -cmake -S src/plugin -B build/plugin -DIDASQL_WITH_AI_AGENT=ON +cmake -S src/plugin -B build/plugin -DIDASQL_WITH_MCP=OFF cmake --build build/plugin --config Release ``` @@ -590,6 +507,8 @@ The server uses a random port (8100-8199) to avoid conflicts with `--http`. For MCP-compatible clients (Claude Desktop, etc.): +`--mcp` and `.mcp` are available only when built with `-DIDASQL_WITH_MCP=ON` (default is `OFF`). + ```bash # Standalone mode idasql -s database.i64 --mcp @@ -610,75 +529,32 @@ Configure your MCP client: } ``` -Tools: `idasql_query` (direct SQL), `idasql_agent` (natural language) +Tools: `idasql_query` (direct SQL) -## Integration with Coding Agents +## Integration with Your Favorite CLI -The CLI is designed for integration with coding agents (Claude Code, Cursor, Aider, Cline, etc.). Agents can query IDA databases directly without writing IDAPython or understanding IDA's API. +Use IDASQL with any coding CLI that supports a `/idasql` skill. ### Setup -1. Open your target binary in IDA Pro (plugin loads automatically) -2. Start HTTP mode in CLI or REPL (`idasql -s --http 8081` or `.http start`) -3. Use `/query` to execute SQL from scripts or coding agents - -### Instructing an Agent +1. Open your target in IDA Pro, or point CLI mode at an `.i64` file. +2. Start HTTP mode (`idasql -s --http 8081`) or MCP mode (`idasql -s --mcp`) if compiled with MCP. +3. In your coding CLI, run `/idasql` prompts against that backend. -When working with a coding agent on reverse engineering tasks, provide these instructions: +### Example Prompts ``` -IDASQL HTTP server is running on http://127.0.0.1:8081 with token . - -To query the IDA database, use HTTP POST: - - curl -X POST http://127.0.0.1:8081/query -H "Authorization: Bearer " -d "SQL QUERY" - -Available tables: funcs, segments, names, imports, entries, strings, xrefs, instructions, blocks, types - -Example queries: - # List functions - curl -X POST http://127.0.0.1:8081/query -H "Authorization: Bearer " -d "SELECT name, printf('0x%X', address) as addr FROM funcs LIMIT 20" - - # Find strings containing a keyword - curl -X POST http://127.0.0.1:8081/query -H "Authorization: Bearer " -d "SELECT * FROM strings WHERE content LIKE '%error%'" - - # Find callers of a function - curl -X POST http://127.0.0.1:8081/query -H "Authorization: Bearer " -d "SELECT printf('0x%X', from_ea) as caller FROM xrefs WHERE to_ea = 0x401000" - - # Search for any identifier - curl -X POST http://127.0.0.1:8081/query -H "Authorization: Bearer " -d "SELECT name, kind, address FROM grep WHERE pattern = 'CreateFile%' LIMIT 10" -``` - -### Agent Workflow Example - -``` -User: "Find all functions that call CreateFileW and check if they handle errors" - -Agent thinks: I'll query IDASQL to find the callers - -Agent runs: -$ curl -X POST http://127.0.0.1:8081/query -H "Authorization: Bearer abc123" -d " - SELECT DISTINCT func_at(x.from_ea) as caller, printf('0x%X', x.from_ea) as call_site - FROM xrefs x - JOIN imports i ON x.to_ea = i.address - WHERE i.name = 'CreateFileW' -" - -Agent receives: -| caller | call_site | -|---------------------|------------| -| ReadConfigFile | 0x401234 | -| SaveDocument | 0x401890 | -| ... | ... | - -Agent then queries for error handling patterns in those functions... +/idasql analyze this binary; tell me the top 10 largest functions and likely responsibilities. +/idasql find all callers of CreateFileW and summarize error handling behavior. +/idasql identify suspicious hardcoded URLs and the functions that reference them. +/idasql map imports related to crypto and show nearest string evidence. ``` -The agent never needs to write IDAPython. SQL queries are self-contained and portable. +The `/idasql` skill can execute SQL, iterate, and summarize results without requiring IDAPython scripting. ## Claude Code Plugin -IDASQL is available as a Claude Code plugin, allowing Claude to query IDA databases directly within your coding workflow. +IDASQL is available as a Claude Code plugin with 13 topic-focused skills for reverse engineering workflows. ### Prerequisites @@ -689,27 +565,35 @@ IDASQL is available as a Claude Code plugin, allowing Claude to query IDA databa ### Installation ```bash -# Add the marketplace (one-time) -/plugin marketplace add 0xeb/anthropic-xsql-tools-plugin - -# Install idasql plugin -/plugin install idasql@0xeb-tools +claude /install-plugin https://github.com/allthingsida/idasql-skills ``` -### Usage +### Skills -Once installed, the skill is automatically available: +| Skill | Description | +|-------|-------------| +| `connect` | Connection, CLI, HTTP, UI context, routing index | +| `disassembly` | Functions, segments, instructions, blocks | +| `data` | Strings, bytes, string cross-references | +| `xrefs` | Cross-references, imports, entity search | +| `decompiler` | Full decompiler reference (ctree, lvars, union selection) | +| `annotations` | Edit and annotate decompilation and disassembly | +| `types` | Type system mechanics (structs, unions, enums, parse_decls) | +| `debugger` | Breakpoints and byte patching | +| `storage` | Persistent key-value storage (netnode) | +| `idapython` | Python execution via SQL | +| `functions` | SQL functions reference | +| `analysis` | Analysis workflows, security audits, advanced SQL | +| `resource` | Recursive source recovery methodology | -``` -"Using idasql, count functions in myfile.i64" -"Using idasql, decompile main in test.i64" -"Using idasql, find strings containing 'password'" -``` +### Usage -### Updating +Once installed, skills are automatically available: -```bash -/plugin update idasql +``` +/idasql analyze this binary; tell me what it does first. +/idasql count functions in myfile.i64 and list the largest 20. +/idasql find strings containing 'password' and map referencing functions. ``` ### Troubleshooting @@ -726,7 +610,7 @@ git config --global url."https://github.com/".insteadOf "git@github.com:" - **[libxsql](https://github.com/0xeb/libxsql)** - Header-only C++17 library for exposing C++ data structures as SQLite virtual tables. Provides the fluent builder API for defining tables, constraint pushdown, and HTTP thinclient support. -- **[libagents](https://github.com/0xeb/libagents)** - C++ library for building AI agents with tool use. Powers the natural language interface with support for Claude (Anthropic) and GitHub Copilot providers. +- **[fastmcpp](https://github.com/0xeb/fastmcpp)** - Optional MCP server implementation used when building with `-DIDASQL_WITH_MCP=ON`. ## Author diff --git a/benchmark.md b/benchmark.md deleted file mode 100644 index 5da72ed..0000000 --- a/benchmark.md +++ /dev/null @@ -1,47 +0,0 @@ -# Build Performance Benchmark - -## Environment - -- **OS**: Windows 11 -- **CPU**: 24 logical processors -- **Compiler**: MSVC (Visual Studio 17.14) -- **CMake**: 4.0.3 -- **Config**: Release -- **Features**: All ON (AI Agent, HTTP, Plugin, CLI, Examples) - -## Results (clean rebuild, deps already fetched) - -| Branch | Build Time | Speedup | -|--------|-----------|---------| -| `main` (baseline) | ~280 sec (4.7 min) | — | -| `build/faster-windows-build` | ~175 sec (2.9 min) | **~38% faster** | - -## Changes Applied - -### 1. MSVC `/MP` on dependency targets (~80-85 sec saved) -Dependency libraries (fastmcpp_core, copilot_sdk_cpp, claude_sdk, libagents) were compiling -files sequentially because IDA SDK's `/MP` flag only applies to `ida_add_plugin`/`ida_add_idalib` -targets. Adding `/MP` enables multi-processor compilation within each dependency target. - -### 2. Unity build on compatible dependency targets (~20-30 sec saved) -Enabled `UNITY_BUILD` for copilot_sdk_cpp, claude_sdk, and libagents. This merges -translation units to reduce redundant header parsing. - -**Note**: fastmcpp_core is excluded because its server files (stdio_server.cpp, -sse_server.cpp, streamable_http_server.cpp) use duplicate anonymous-namespace symbols -that conflict under unity builds. - -### 3. Pinned dependency versions with `GIT_SHALLOW` (~5-30 sec saved on configure) -Changed `GIT_TAG main` to specific commit SHAs and added `GIT_SHALLOW TRUE` for both -libxsql and libagents. This avoids re-fetching on every configure and reduces clone size. - -### 4. Excluded unused targets -The `debug` tool from claude-agent-sdk-cpp is excluded from the default build via -`EXCLUDE_FROM_ALL`. - -## Not Yet Applied (potential further improvements) - -- **PCH for idasql targets**: Precompiled headers for CLI/plugin/examples could save - another ~50-60 sec by eliminating redundant IDA SDK + xsql header parsing. -- **libxsql partial de-header-only**: Moving non-template code to .cpp files would - reduce per-TU compile cost. Requires upstream libxsql changes. diff --git a/examples/agent_session_strings.md b/docs/agent_session_strings.md similarity index 100% rename from examples/agent_session_strings.md rename to docs/agent_session_strings.md diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 558b5d2..e1ea5f4 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,44 +1,35 @@ # examples/CMakeLists.txt - IDASQL Example Programs # -# Standalone build: -# cmake -B build -# cmake --build build --config Release +cmake_minimum_required(VERSION 3.20) + +# Build from repository root: +# cmake -S . -B build +# cmake --build build --config Release --target example_basic # # Run: # Windows: set PATH=%IDASDK%\src\bin;%PATH% # build\Release\example_basic.exe database.i64 # Linux: LD_LIBRARY_PATH=$IDASDK/src/bin ./build/example_basic database.i64 -# Standalone mode: set up project and dependencies if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) - cmake_minimum_required(VERSION 3.20) - project(idasql_examples VERSION 1.0 LANGUAGES C CXX) - set(CMAKE_CXX_STANDARD 20) - set(CMAKE_CXX_STANDARD_REQUIRED ON) - - # Include IDA SDK bootstrap - include($ENV{IDASDK}/src/cmake/bootstrap.cmake) - find_package(idasdk REQUIRED) + project(idasql_examples LANGUAGES NONE) + message(FATAL_ERROR + "idasql examples are built from the repository root.\n" + "Use:\n" + " cmake -S . -B build\n" + " cmake --build build --config Release --target example_basic" + ) endif() -# ============================================================================ -# xsql dependency (provides SQLite) -# ============================================================================ +if(NOT TARGET idasql) + message(FATAL_ERROR "idasql_examples requires parent target 'idasql'.") +endif() -if(TARGET xsql::xsql) - message(STATUS "idasql_examples: Using xsql::xsql from parent project") -elseif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../external/libxsql/CMakeLists.txt") - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../external/libxsql ${CMAKE_CURRENT_BINARY_DIR}/libxsql) - message(STATUS "idasql_examples: Added libxsql from external/libxsql") -else() - message(FATAL_ERROR "libxsql not found. Initialize submodule: git submodule update --init external/libxsql") +if(NOT TARGET xsql::xsql) + message(FATAL_ERROR "idasql_examples requires parent target 'xsql::xsql'.") endif() -# libidasql interface library (headers only) -add_library(idasql_lib INTERFACE) -target_include_directories(idasql_lib INTERFACE - ${CMAKE_CURRENT_SOURCE_DIR}/../src/lib/include -) +set(IDASQL_EXAMPLE_LIB idasql) # Helper function to create examples function(add_idasql_example name) @@ -46,7 +37,7 @@ function(add_idasql_example name) TYPE EXECUTABLE SOURCES ${name}.cpp ) - target_link_libraries(${name} PRIVATE idasql_lib xsql::xsql) + target_link_libraries(${name} PRIVATE ${IDASQL_EXAMPLE_LIB} xsql::xsql) target_compile_definitions(${name} PRIVATE USE_IDA_SDK USE_HEXRAYS) if(MSVC) target_compile_options(${name} PRIVATE /bigobj) diff --git a/examples/example_basic.cpp b/examples/example_basic.cpp index c573889..af7b089 100644 --- a/examples/example_basic.cpp +++ b/examples/example_basic.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** * example_basic.cpp - Basic IDASQL usage with Session * diff --git a/examples/example_breakpoints.cpp b/examples/example_breakpoints.cpp index c0a2bcb..da591db 100644 --- a/examples/example_breakpoints.cpp +++ b/examples/example_breakpoints.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** * example_breakpoints.cpp - Breakpoint management with IDASQL * diff --git a/examples/example_custom_vtable.cpp b/examples/example_custom_vtable.cpp index 4723ce7..c728a00 100644 --- a/examples/example_custom_vtable.cpp +++ b/examples/example_custom_vtable.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** * example_custom_vtable.cpp - Creating custom virtual tables * @@ -17,7 +20,9 @@ #include #include -// IDA SDK +// IDA SDK (ida.hpp must come first for base types) +#include +#include #include #include diff --git a/examples/example_decompiler.cpp b/examples/example_decompiler.cpp index f184c9b..cc5f4d7 100644 --- a/examples/example_decompiler.cpp +++ b/examples/example_decompiler.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** * example_decompiler.cpp - Hex-Rays decompiler analysis with IDASQL * diff --git a/examples/example_functions.cpp b/examples/example_functions.cpp index 139482f..16db99d 100644 --- a/examples/example_functions.cpp +++ b/examples/example_functions.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** * example_functions.cpp - Function analysis with IDASQL * diff --git a/examples/example_grep_entities.cpp b/examples/example_grep_entities.cpp index 8eb2751..ada6fe3 100644 --- a/examples/example_grep_entities.cpp +++ b/examples/example_grep_entities.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** * example_grep_entities.cpp - Grep table composability examples * diff --git a/examples/example_grep_search.cpp b/examples/example_grep_search.cpp index 4a47f8a..06e640d 100644 --- a/examples/example_grep_search.cpp +++ b/examples/example_grep_search.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** * example_grep_search.cpp - Grep-style unified entity search * diff --git a/examples/example_instructions.cpp b/examples/example_instructions.cpp index 8ad071d..f5d631d 100644 --- a/examples/example_instructions.cpp +++ b/examples/example_instructions.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** * example_instructions.cpp - Instruction analysis with IDASQL * diff --git a/examples/example_plugin_style.cpp b/examples/example_plugin_style.cpp index 31ab295..0885497 100644 --- a/examples/example_plugin_style.cpp +++ b/examples/example_plugin_style.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** * example_plugin_style.cpp - IDASQL usage when IDA is already running * diff --git a/examples/example_strings.cpp b/examples/example_strings.cpp index 45bceb1..30c901b 100644 --- a/examples/example_strings.cpp +++ b/examples/example_strings.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** * example_strings.cpp - String analysis with IDASQL * diff --git a/ida-plugin.json b/ida-plugin.json index fdd6bee..f611028 100644 --- a/ida-plugin.json +++ b/ida-plugin.json @@ -2,9 +2,9 @@ "IDAMetadataDescriptorVersion": 1, "plugin": { "name": "IDASQL", - "version": "0.0.8", + "version": "0.0.10", "entryPoint": "idasql", - "description": "SQL interface for IDA databases. Query functions, xrefs, strings, types, and more using SQL. Supports local CLI, HTTP REST server, and optional AI agent integration.", + "description": "SQL interface for IDA databases. Query functions, xrefs, strings, types, and more using SQL. Supports local CLI, HTTP REST server, and optional MCP integration.", "urls": { "repository": "https://github.com/allthingsida/idasql" }, diff --git a/prompts/idasql_agent.md b/prompts/idasql_agent.md index 663c0ae..0b7984d 100644 --- a/prompts/idasql_agent.md +++ b/prompts/idasql_agent.md @@ -26,6 +26,23 @@ A comprehensive reference for AI agents to effectively use IDASQL - an SQL inter ### Addresses (ea_t) Everything in a binary has an **address** - a memory location where code or data lives. IDA uses `ea_t` (effective address) as unsigned 64-bit integers. SQL shows these as integers; use `printf('0x%X', address)` for hex display. +Address-taking SQL functions accept: +- integer EA values (preferred for deterministic scripts) +- numeric strings (`'4198400'`, `'0x401000'`) +- symbol names resolved with `get_name_ea(BADADDR, name)` (global names) + +Examples: +```sql +SELECT decompile('DriverEntry'); +SELECT set_type('DriverEntry', 'NTSTATUS DriverEntry(PDRIVER_OBJECT, PUNICODE_STRING);'); +SELECT comment_at('0x401000'); +``` + +If a symbol cannot be resolved, SQL functions return an explicit error like: +`Could not resolve name to address: `. + +Local label lookup that depends on a specific `from` context is not consulted by default (`BADADDR` resolution). Use explicit numeric EAs when needed. + ### Functions IDA groups code into **functions** with: - `address` / `start_ea` - Where the function begins @@ -84,6 +101,42 @@ Core decompiler surfaces: --- +## Context Awareness (Plugin UI) + +Use `get_ui_context_json()` when the user asks context-aware questions such as: +- "what am I looking at?" +- "what is on the screen?" +- "what's selected?" +- references like "this", "here", "current", "selected", or "that function" +- "grab the UI context" + +Behavior contract: +- If there is a selection, capture selection begin/end and preview text lines. +- Capture current widget type/title and whether it is a custom view. +- Capture chooser/list selections when available (for example, Local Types selections). +- Capture code context (address/function/segment) when available. +- In non-address views, return structured context with `has_address: false` and a reason. + +Temporal reference policy: +- For `this` / `here` / `current` / `selected`, capture a fresh context snapshot for this user question. +- For `that` / `previous` / `earlier`, use the most recently captured snapshot for this working flow when available. + +Freshness rule: +- Capture context once per user question, then reuse it while answering that question. +- Refresh context only when the user explicitly asks to re-check or refresh the UI context. + +Availability: +- `get_ui_context_json()` is plugin-only (GUI runtime). +- It is not available in idalib/CLI mode. +- If unavailable, continue with non-UI SQL workflows and state that UI context is unavailable in this runtime. + +Database orientation: +- Use `SELECT * FROM welcome` for a quick database overview (processor, bitness, address range, entry point, counts). +- The `welcome` table contains only database metadata — no UI context. +- For UI context (focused widget, selection, code location), use `get_ui_context_json()`. + +--- + ## Command-Line Interface IDASQL provides SQL access to IDA databases via command line or as a server. @@ -200,6 +253,7 @@ PRAGMA idasql.query_timeout_ms = 60000; -- set timeout (0 disables) PRAGMA idasql.queue_admission_timeout_ms = 120000; PRAGMA idasql.max_queue = 64; -- 0 = unbounded PRAGMA idasql.hints_enabled = 1; -- 1/0, on/off +PRAGMA idasql.enable_idapython = 1; -- 1/0, enable SQL Python execution PRAGMA idasql.timeout_push = 15000; -- push old timeout, set new PRAGMA idasql.timeout_pop; -- restore previous timeout ``` @@ -621,7 +675,7 @@ FROM disasm_calls WHERE callee_name LIKE '%malloc%'; ### Database Modification -Most write examples are documented next to their tables (`breakpoints`, `segments`, `names`, `instructions`, `types*`, `bookmarks`, `comments`, `ctree_lvars`). +Most write examples are documented next to their tables (`breakpoints`, `segments`, `names`, `instructions`, `types*`, `bookmarks`, `comments`, `ctree_lvars`, `netnode_kv`). Quick capability matrix: | Table | INSERT | UPDATE columns | DELETE | @@ -639,6 +693,7 @@ Quick capability matrix: | `types_members` | Yes | Yes | Yes | | `types_enum_values` | Yes | Yes | Yes | | `ctree_lvars` | — | `name`, `type`, `comment` | — | +| `netnode_kv` | Yes | `value` | Yes | Write support is covered by integration/e2e tests in: - `tests/idasql/write_operations_phase3_test.cpp` @@ -647,6 +702,7 @@ Write support is covered by integration/e2e tests in: - `tests/idasql/comments_table_test.cpp` - `tests/idasql/names_table_test.cpp` - `tests/idasql/bytes_table_test.cpp` +- `tests/idasql/netnode_kv_table_test.cpp` - `tests/idasql/patch_functions_test.cpp` - `tests/idasql/patched_bytes_table_test.cpp` @@ -1049,6 +1105,31 @@ UPDATE bookmarks SET description = 'confirmed branch' WHERE index = 0; DELETE FROM bookmarks WHERE index = 0; ``` +#### netnode_kv +Persistent key-value store backed by IDA netnodes. Data is saved inside the IDB automatically. Supports full CRUD and O(1) key lookup via `WHERE key = '...'`. + +| Column | Type | Writable | Description | +|--------|------|----------|-------------| +| `key` | TEXT | — | Unique key (identity, read-only) | +| `value` | TEXT | Yes | Arbitrary-length value (blob storage) | + +```sql +-- Store a value +INSERT INTO netnode_kv(key, value) VALUES('author', 'alice'); + +-- Read by key (O(1) lookup) +SELECT value FROM netnode_kv WHERE key = 'author'; + +-- List all entries +SELECT * FROM netnode_kv; + +-- Update a value +UPDATE netnode_kv SET value = '2.0' WHERE key = 'version'; + +-- Delete an entry +DELETE FROM netnode_kv WHERE key = 'author'; +``` + #### heads All defined items (code/data heads) in the database. @@ -1324,6 +1405,8 @@ This is **much faster** than scanning all disassembly lines because: - `func_start()` is O(1) lookup in IDA's function index ### Names & Functions +Address argument note: `addr`/`ea`/`func_addr` parameters accept integer EAs, numeric strings, and symbol names. + | Function | Description | |----------|-------------| | `name_at(addr)` | Name at address | @@ -1359,13 +1442,47 @@ This is **much faster** than scanning all disassembly lines because: |----------|-------------| | `set_name(addr, name)` | Set name at address | | `type_at(addr)` | Read type declaration applied at address | -| `set_type(addr, decl)` | Apply C declaration/type at address (empty decl clears type) | +| `set_type(addr, decl)` | Apply C declaration/type at address (empty decl clears type; `addr` may be EA, numeric string, or symbol name) | | `parse_decls(text)` | Import C declarations (struct/union/enum/typedef) into local types | Preferred SQL write surface for function metadata: - `UPDATE funcs SET name = '...', prototype = '...' WHERE address = ...` - `prototype` maps to `type_at/set_type` behavior and invalidates decompiler cache. +### Python Execution +| Function | Description | +|----------|-------------| +| `idapython_snippet(code[, sandbox])` | Execute Python snippet and return captured output text | +| `idapython_file(path[, sandbox])` | Execute Python file and return captured output text | + +Runtime guard: + +```sql +PRAGMA idasql.enable_idapython = 1; +``` + +Examples: + +```sql +SELECT idapython_snippet('print("hello from idapython")'); +SELECT idapython_file('C:/temp/script.py'); +SELECT idapython_snippet('counter = globals().get("counter", 0) + 1; print(counter)', 'alpha'); +``` + +Notes: +- disabled by default until pragma is enabled +- Python exceptions propagate as SQL errors +- `sandbox` isolates/persists Python globals by sandbox key + +### Context Awareness (Plugin UI) +| Function | Description | +|----------|-------------| +| `get_ui_context_json()` | Return current UI/widget/context JSON for context-aware prompts (plugin-only; executes through the same queued main-thread path and timeout behavior as other SQL functions) | + +```sql +SELECT get_ui_context_json(); +``` + ### Item Analysis | Function | Description | |----------|-------------| @@ -1402,7 +1519,7 @@ SELECT decode_insn(0x401000); | Function | Description | |----------|-------------| -| `decompile(addr)` | **PREFERRED** — Full pseudocode with line prefixes (requires Hex-Rays) | +| `decompile(addr)` | **PREFERRED** — Full pseudocode with line prefixes (`addr` may be EA, numeric string, or symbol name; available when decompiler surfaces are enabled) | | `decompile(addr, 1)` | Same output but forces re-decompilation (use after writes/renames) | | `list_lvars(addr)` | List local variables as JSON | | `rename_lvar(func_addr, lvar_idx, new_name)` | Rename a local variable by index | @@ -1575,10 +1692,16 @@ SELECT set_union_selection_item(0x140001BD0, 42, ''); -- Optional bridge when you want hybrid lookup + explicit item workflow: SELECT call_arg_item(0x140001BD0, 0x140001C3E, 0); --- Non-call expression workflow (e.g., comparisons/ifs): +-- Enum constant rendering in comparisons (e.g., fdwReason == 1 → DLL_PROCESS_ATTACH): +-- PREFERRED: retype the variable to an enum type — the decompiler infers constants automatically +SELECT parse_decls('typedef enum { DLL_PROCESS_DETACH=0, DLL_PROCESS_ATTACH=1 } fdw_reason_t;'); +UPDATE ctree_lvars SET type = 'fdw_reason_t' WHERE func_addr = 0x180001050 AND idx = 1; +SELECT decompile(0x180001050, 1); -- verify enum names appear + +-- Non-call expression workflow — advanced per-operand numform control: -- 1) resolve expression item deterministically by ea + op_name + nth SELECT ctree_item_at(0x140001BD0, 0x140001CBB, 'cot_eq', 0); --- 2) apply/read via generic expression helpers +-- 2) apply/read via generic expression helpers (opnum = disassembly operand index) SELECT set_numform_ea_expr(0x140001BD0, 0x140001CBB, 0, 'enum:operations_e', 'cot_eq', 0); SELECT get_numform_ea_expr(0x140001BD0, 0x140001CBB, 0, 'cot_eq', 0); SELECT set_numform_ea_expr(0x140001BD0, 0x140001CBB, 0, 'clear', 'cot_eq', 0); @@ -2673,6 +2796,7 @@ WHERE calling_conv = 'fastcall' AND return_is_ptr = 1; | Instruction analysis | `instructions WHERE func_addr = X` | | View function disassembly | `disasm_func(addr)` or `disasm_range(start, end)` | | View decompiled code | `decompile(addr)` | +| UI/screen context questions | `get_ui_context_json()` (plugin UI only) | | Edit decompiler comments | `UPDATE pseudocode SET comment = '...' WHERE func_addr = X AND ea = Y` | | AST pattern matching | `ctree WHERE func_addr = X` | | Call patterns | `ctree_v_calls`, `disasm_calls` | @@ -2694,6 +2818,7 @@ WHERE calling_conv = 'fastcall' AND return_is_ptr = 1; | Add struct members | `types_members` (INSERT) | | Add enum values | `types_enum_values` (INSERT) | | Modify database | `funcs`, `names`, `comments`, `bookmarks` (INSERT/UPDATE/DELETE) | +| Store custom key-value data | `netnode_kv` (full CRUD, persists in IDB) | | Entity search (structured) | `grep WHERE pattern = '...'` | | Entity search (JSON) | `grep('pattern', limit, offset)` | diff --git a/scripts/embed_prompt.py b/scripts/embed_prompt.py deleted file mode 100644 index 53e2924..0000000 --- a/scripts/embed_prompt.py +++ /dev/null @@ -1,103 +0,0 @@ -#!/usr/bin/env python3 -""" -Converts idasql-agent.md to a C++ header with embedded raw string literal. -Only regenerates if the hpp is older than the md or doesn't exist. - -Run: python scripts/embed_prompt.py prompts/idasql_agent.md src/common/idasql_agent_prompt.hpp -""" - -import sys -import os -from datetime import datetime - - -def needs_regeneration(input_path: str, output_path: str) -> bool: - """Check if output needs regeneration based on file timestamps.""" - if not os.path.exists(output_path): - return True - input_mtime = os.path.getmtime(input_path) - output_mtime = os.path.getmtime(output_path) - return input_mtime > output_mtime - - -def split_content(content: str, max_chunk: int = 15000) -> list: - """Split content into chunks that MSVC can handle. - - MSVC has a ~16KB limit per string literal segment. - We split at line boundaries to keep it readable. - """ - chunks = [] - lines = content.split('\n') - current_chunk = [] - current_size = 0 - - for line in lines: - line_size = len(line) + 1 # +1 for newline - if current_size + line_size > max_chunk and current_chunk: - chunks.append('\n'.join(current_chunk)) - current_chunk = [line] - current_size = line_size - else: - current_chunk.append(line) - current_size += line_size - - if current_chunk: - chunks.append('\n'.join(current_chunk)) - - return chunks - - -def embed_prompt(input_path: str, output_path: str, force: bool = False) -> bool: - if not force and not needs_regeneration(input_path, output_path): - print(f"Skipping {output_path} (up-to-date)") - return False - - with open(input_path, 'r', encoding='utf-8') as f: - content = f.read() - - # Use short delimiter (MSVC max is 16 chars) - delimiter = "PROMPT" - - # Split content for MSVC compatibility - chunks = split_content(content) - - # Build concatenated string literals - if len(chunks) == 1: - string_literal = f'R"{delimiter}({chunks[0]}){delimiter}"' - else: - parts = [] - for i, chunk in enumerate(chunks): - parts.append(f'R"{delimiter}({chunk}){delimiter}"') - string_literal = '\n '.join(parts) - - header = f'''// Auto-generated from {os.path.basename(input_path)} -// Generated: {datetime.now().isoformat()} -// DO NOT EDIT - regenerate with: python scripts/embed_prompt.py - -#pragma once - -namespace idasql {{ - -inline constexpr const char* SYSTEM_PROMPT = - {string_literal}; - -}} // namespace idasql -''' - - os.makedirs(os.path.dirname(output_path), exist_ok=True) - with open(output_path, 'w', encoding='utf-8') as f: - f.write(header) - - print(f"Generated {output_path} ({len(content)} bytes, {len(chunks)} chunks)") - return True - - -if __name__ == "__main__": - force = "--force" in sys.argv - args = [a for a in sys.argv[1:] if a != "--force"] - - if len(args) != 2: - print(f"Usage: {sys.argv[0]} [--force] ") - sys.exit(1) - - embed_prompt(args[0], args[1], force) diff --git a/src/cli/CMakeLists.txt b/src/cli/CMakeLists.txt index 3265739..7367b96 100644 --- a/src/cli/CMakeLists.txt +++ b/src/cli/CMakeLists.txt @@ -8,6 +8,7 @@ ida_add_idalib(idasql_cli main.cpp ) target_link_libraries(idasql_cli PRIVATE xsql::xsql) +target_link_libraries(idasql_cli PRIVATE idasql) target_include_directories(idasql_cli PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../lib/include) target_compile_definitions(idasql_cli PRIVATE USE_IDA_SDK) @@ -38,12 +39,12 @@ if(MSVC) elseif(APPLE) set_target_properties(idasql_cli PROPERTIES BUILD_WITH_INSTALL_RPATH TRUE - INSTALL_RPATH "@executable_path;$ENV{IDASDK}/bin" + INSTALL_RPATH "@executable_path;$ENV{IDASDK}/src/bin" ) elseif(UNIX) set_target_properties(idasql_cli PROPERTIES BUILD_WITH_INSTALL_RPATH TRUE - INSTALL_RPATH "$ORIGIN;$ENV{IDASDK}/bin" + INSTALL_RPATH "$ORIGIN;$ENV{IDASDK}/src/bin" ) endif() @@ -53,30 +54,24 @@ if(USE_HEXRAYS) target_compile_definitions(idasql_cli PRIVATE USE_HEXRAYS) endif() -# AI Agent support (from parent project) -if(IDASQL_WITH_AI_AGENT) +# MCP support (from parent project) +if(IDASQL_WITH_MCP) target_sources(idasql_cli PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/../common/ai_agent.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../common/mcp_server.cpp ) target_include_directories(idasql_cli PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common - ${CMAKE_CURRENT_SOURCE_DIR}/../../external/libagents/external/fastmcpp/include - ${IDASQL_GENERATED_INCLUDE_DIR} ) - target_link_libraries(idasql_cli PRIVATE libagents fastmcpp_core) - target_compile_definitions(idasql_cli PRIVATE IDASQL_HAS_AI_AGENT) + target_link_libraries(idasql_cli PRIVATE fastmcpp_core) + target_compile_definitions(idasql_cli PRIVATE IDASQL_HAS_MCP) - if(TARGET generate_prompt) - add_dependencies(idasql_cli generate_prompt) - endif() - - message(STATUS "idasql_cli: AI agent support enabled (with MCP server)") + message(STATUS "idasql_cli: MCP support enabled") endif() # HTTP server support target_sources(idasql_cli PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common/http_server.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../common/welcome_query.cpp ) target_compile_definitions(idasql_cli PRIVATE XSQL_HAS_THINCLIENT) diff --git a/src/cli/main.cpp b/src/cli/main.cpp index cad96f2..762623f 100644 --- a/src/cli/main.cpp +++ b/src/cli/main.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + #include #include @@ -26,32 +29,30 @@ #include #include "../common/http_server.hpp" +#include "../common/idasql_commands.hpp" +#include "../common/json_utils.hpp" +#include "../common/welcome_query.hpp" #include "../common/idasql_version.hpp" -// AI Agent integration (optional, enabled via IDASQL_WITH_AI_AGENT) -#ifdef IDASQL_HAS_AI_AGENT -#include "../common/ai_agent.hpp" -#include "../common/idasql_commands.hpp" +// MCP integration (optional, enabled via IDASQL_WITH_MCP) +#ifdef IDASQL_HAS_MCP #include "../common/mcp_server.hpp" +#endif // Global signal handler state namespace { std::atomic g_quit_requested{false}; - idasql::AIAgent* g_agent = nullptr; +#ifdef IDASQL_HAS_MCP std::unique_ptr g_mcp_server; - std::unique_ptr g_mcp_agent; +#endif std::unique_ptr g_repl_http_server; } extern "C" void signal_handler(int sig) { (void)sig; g_quit_requested.store(true); - if (g_agent) { - g_agent->request_quit(); - } } -#endif // ============================================================================ // Table Printing (shared between remote and local modes) @@ -118,17 +119,6 @@ struct TablePrinter { } }; -// ============================================================================ -// Validation Helpers -// ============================================================================ - -static bool is_safe_table_name(const std::string& name) { - if (name.empty() || name.size() > 128) return false; - return std::all_of(name.begin(), name.end(), [](unsigned char c) { - return std::isalnum(c) || c == '_'; - }); -} - // ============================================================================ // Local Mode - Uses IDA SDK (delay-loaded on Windows) // ============================================================================ @@ -147,6 +137,17 @@ static bool is_safe_table_name(const std::string& name) { #include #include #endif +#include +#include // save_database() + +struct idapython_runtime_guard_t { + bool acquired = false; + ~idapython_runtime_guard_t() { + if (acquired) { + idasql::idapython::runtime_release(); + } + } +}; static void add_query_result_rows(TablePrinter& printer, const idasql::QueryResult& result) { for (const auto& row : result.rows) { @@ -174,135 +175,20 @@ static void print_query_warnings(std::ostream& os, const idasql::QueryResult& re // REPL - Interactive Mode (Local) // ============================================================================ -static void show_help() { - std::cout << R"( -Commands: - .tables List all tables - .schema [table] Show table schema - .info Show database info - .clear Clear session (reset conversation) - .quit / .exit Exit interactive mode - .help Show this help - -SQL queries end with semicolon (;) -Multi-line queries are supported. -)" << std::endl; -} - -static void show_tables(idasql::Database& db) { - auto result = db.query("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;"); - if (!result.success) { - std::cerr << "Error: " << db.error() << "\n"; - return; - } - - std::cout << "Tables:\n"; - for (const auto& row : result.rows) { - std::cout << " " << (row.size() > 0 ? row[0] : "") << "\n"; - } -} - -static void show_schema(idasql::Database& db, const std::string& table) { - if (!is_safe_table_name(table)) { - std::cerr << "Invalid table name\n"; - return; - } - - std::string sql = "SELECT sql FROM sqlite_master WHERE type='table' AND name='" + table + "';"; - auto result = db.query(sql); - if (!result.success) { - std::cerr << "Error: " << db.error() << "\n"; - return; - } - if (!result.rows.empty() && result.rows[0].size() > 0) { - std::cout << result.rows[0][0] << "\n"; - } else { - std::cout << "Not found\n"; - } -} - -// Helper to execute SQL and format results as string (for AI agent) -static std::string execute_sql_to_string(idasql::Database& db, const std::string& sql) { - auto result = db.query(sql); - if (!result.success) { - return "Error: " + std::string(db.error()); - } - - std::stringstream ss; - TablePrinter printer; - add_query_result_rows(printer, result); - - // Capture to string instead of stdout - std::streambuf* old_cout = std::cout.rdbuf(ss.rdbuf()); - printer.print(); - print_query_warnings(std::cout, result); - std::cout.rdbuf(old_cout); - return ss.str(); -} - // Forward declaration (defined in HTTP section below) static std::string query_result_to_json(idasql::Database& db, const std::string& sql); -#ifdef IDASQL_HAS_AI_AGENT -static void run_repl(idasql::Database& db, bool agent_mode, bool verbose, - const std::string& provider_override = "") { -#else static void run_repl(idasql::Database& db) { - [[maybe_unused]] bool agent_mode = false; -#endif std::string line; std::string query; - -#ifdef IDASQL_HAS_AI_AGENT - std::unique_ptr agent; - if (agent_mode) { - auto executor = [&db](const std::string& sql) -> std::string { - return execute_sql_to_string(db, sql); - }; - - // Load settings (includes BYOK, provider, timeout) - idasql::AgentSettings settings = idasql::LoadAgentSettings(); - - // Apply provider override from CLI if specified - if (!provider_override.empty()) { - try { - settings.default_provider = idasql::ParseProviderType(provider_override); - } catch (...) { - // Already validated in argument parsing - } - } - - agent = std::make_unique(executor, settings, verbose); - - // Register signal handler for clean Ctrl-C handling - g_agent = agent.get(); - std::signal(SIGINT, signal_handler); -#ifdef _WIN32 - // Windows also needs SIGBREAK for Ctrl-Break - std::signal(SIGBREAK, signal_handler); -#endif - - agent->start(); // Initialize agent - - std::cout << "IDASQL AI Agent Mode\n" - << "Ask questions in natural language or use SQL directly.\n" - << "Type .help for commands, .clear to reset, .quit to exit\n\n"; - } else { -#endif - std::cout << "IDASQL Interactive Mode\n" - << "Type .help for commands, .clear to reset, .quit to exit\n\n"; -#ifdef IDASQL_HAS_AI_AGENT - } -#endif + std::cout << "IDASQL Interactive Mode\n" + << "Type .help for commands, .quit to exit\n\n"; while (true) { -#ifdef IDASQL_HAS_AI_AGENT - // Check for quit request from signal handler if (g_quit_requested.load()) { std::cout << "\nInterrupted.\n"; break; } -#endif // Prompt std::cout << (query.empty() ? "idasql> " : " ...> "); @@ -313,8 +199,6 @@ static void run_repl(idasql::Database& db) { // Handle dot commands if (query.empty() && !line.empty() && line[0] == '.') { -#ifdef IDASQL_HAS_AI_AGENT - // Use unified command handler for agent mode idasql::CommandCallbacks callbacks; callbacks.get_tables = [&db]() -> std::string { std::stringstream ss; @@ -326,7 +210,7 @@ static void run_repl(idasql::Database& db) { }; callbacks.get_schema = [&db](const std::string& table) -> std::string { auto result = db.query("SELECT sql FROM sqlite_master WHERE name='" + table + "'"); - if (!result.empty() && result.rows[0].size() > 0) { + if (result.success && !result.rows.empty() && result.rows[0].size() > 0) { return std::string(result.rows[0][0]); } return "Table not found: " + table; @@ -334,26 +218,22 @@ static void run_repl(idasql::Database& db) { callbacks.get_info = [&db]() -> std::string { return db.info(); }; - callbacks.clear_session = [&agent]() -> std::string { - if (agent) { - agent->reset_session(); - return "Session cleared (conversation history reset)"; - } - return "Session cleared"; - }; +#ifdef IDASQL_HAS_MCP // MCP server callbacks callbacks.mcp_status = []() -> std::string { if (g_mcp_server && g_mcp_server->is_running()) { - return idasql::format_mcp_status(g_mcp_server->port(), true); + return idasql::format_mcp_status( + g_mcp_server->port(), true, g_mcp_server->bind_addr()); } else { return "MCP server not running\nUse '.mcp start' to start\n"; } }; - callbacks.mcp_start = [&db, &agent](int req_port, const std::string& bind_addr) -> std::string { + callbacks.mcp_start = [&db](int req_port, const std::string& bind_addr) -> std::string { if (g_mcp_server && g_mcp_server->is_running()) { - return idasql::format_mcp_status(g_mcp_server->port(), true); + return idasql::format_mcp_status( + g_mcp_server->port(), true, g_mcp_server->bind_addr()); } // Create MCP server if needed @@ -370,24 +250,14 @@ static void run_repl(idasql::Database& db) { return "Error: " + result.error; }; - // Create MCP agent for natural language queries - g_mcp_agent = std::make_unique(sql_cb); - g_mcp_agent->start(); - - idasql::AskCallback ask_cb = [](const std::string& question) -> std::string { - if (!g_mcp_agent) return "Error: AI agent not available"; - return g_mcp_agent->query(question); - }; - // Start with use_queue=true for CLI mode (main thread execution) - int port = g_mcp_server->start(req_port, sql_cb, ask_cb, bind_addr, true); + int port = g_mcp_server->start(req_port, sql_cb, bind_addr, true); if (port <= 0) { - g_mcp_agent.reset(); return "Error: Failed to start MCP server\n"; } // Print info - std::cout << idasql::format_mcp_info(port, true); + std::cout << idasql::format_mcp_info(port, g_mcp_server->bind_addr()); std::cout << "Press Ctrl+C to stop MCP server and return to REPL...\n\n"; std::cout.flush(); @@ -412,7 +282,6 @@ static void run_repl(idasql::Database& db) { #ifdef _WIN32 std::signal(SIGBREAK, old_break_handler); #endif - g_mcp_agent.reset(); g_quit_requested.store(false); // Reset for continued REPL use return "MCP server stopped. Returning to REPL.\n"; @@ -421,23 +290,25 @@ static void run_repl(idasql::Database& db) { callbacks.mcp_stop = []() -> std::string { if (g_mcp_server && g_mcp_server->is_running()) { g_mcp_server->stop(); - g_mcp_agent.reset(); return "MCP server stopped\n"; } return "MCP server not running\n"; }; +#endif // HTTP server callbacks callbacks.http_status = []() -> std::string { if (g_repl_http_server && g_repl_http_server->is_running()) { - return idasql::format_http_status(g_repl_http_server->port(), true); + return idasql::format_http_status( + g_repl_http_server->port(), true, g_repl_http_server->bind_addr()); } return "HTTP server not running\nUse '.http start' to start\n"; }; callbacks.http_start = [&db](int req_port, const std::string& bind_addr) -> std::string { if (g_repl_http_server && g_repl_http_server->is_running()) { - return idasql::format_http_status(g_repl_http_server->port(), true); + return idasql::format_http_status( + g_repl_http_server->port(), true, g_repl_http_server->bind_addr()); } // Create HTTP server if needed @@ -457,7 +328,8 @@ static void run_repl(idasql::Database& db) { } // Print info - std::cout << idasql::format_http_info(port); + std::cout << idasql::format_http_info( + port, g_repl_http_server->bind_addr(), "Press Ctrl+C to stop and return to REPL."); std::cout.flush(); // Install signal handler so Ctrl+C sets g_quit_requested @@ -510,48 +382,8 @@ static void run_repl(idasql::Database& db) { // Fall through to standard handling break; } -#else - // Non-agent mode: basic command handling - if (line == ".quit" || line == ".exit") break; - if (line == ".tables") { show_tables(db); continue; } - if (line == ".info") { std::cout << db.info(); continue; } - if (line == ".help") { show_help(); continue; } - if (line == ".clear") { - std::cout << "Session cleared\n"; - continue; - } - if (line.substr(0, 7) == ".schema") { - std::string table = line.length() > 8 ? line.substr(8) : ""; - while (!table.empty() && table[0] == ' ') table = table.substr(1); - if (table.empty()) { - std::cerr << "Usage: .schema \n"; - } else { - show_schema(db, table); - } - continue; - } - std::cerr << "Unknown command: " << line << "\n"; - continue; -#endif } -#ifdef IDASQL_HAS_AI_AGENT - // In agent mode, use query for main-thread safety - if (agent_mode && agent) { - std::string result = agent->query(line); - if (!result.empty()) { - std::cout << result << "\n"; - } - - // Check if we were interrupted - if (agent->quit_requested()) { - std::cout << "Interrupted.\n"; - break; - } - continue; - } -#endif - // Standard SQL mode: accumulate query query += line + " "; @@ -572,15 +404,8 @@ static void run_repl(idasql::Database& db) { } } -#ifdef IDASQL_HAS_AI_AGENT exit_repl: - if (agent) { - agent->stop(); - g_agent = nullptr; - } - // Restore default signal handler - std::signal(SIGINT, SIG_DFL); -#endif + return; } // ============================================================================ @@ -760,81 +585,37 @@ static std::string http_queue_and_wait(const std::string& sql) { static std::string query_result_to_json(idasql::Database& db, const std::string& sql) { auto result = db.query(sql); - xsql::json j = {{"success", result.success}}; - - if (result.success) { - j["columns"] = result.columns; - - xsql::json rows = xsql::json::array(); - for (const auto& row : result.rows) { - rows.push_back(row.values); // Row::values is std::vector - } - j["rows"] = rows; - j["row_count"] = result.rows.size(); - if (!result.warnings.empty()) { - j["warnings"] = result.warnings; - } - if (result.timed_out) { - j["timed_out"] = true; - } - if (result.partial) { - j["partial"] = true; - } - if (result.elapsed_ms > 0) { - j["elapsed_ms"] = result.elapsed_ms; - } - } else { - j["error"] = result.error; - } - - return j.dump(); + return idasql::query_result_to_json_safe(result); } -static const char* IDASQL_HELP_TEXT = R"(IDASQL HTTP REST API -==================== - -SQL interface for IDA Pro databases via HTTP. - -Endpoints: - GET / - Welcome message - GET /help - This documentation (for LLM discovery) - POST /query - Execute SQL (body = raw SQL, response = JSON) - GET /status - Server health - POST /shutdown - Stop server - -Tables: - funcs - Functions with address, size, flags - segments - Segment/section information - imports - Imported functions - exports - Exported functions - names - Named locations - strings - String references - comments - User comments - xrefs - Cross references - structs - Structure definitions - struct_members - Structure members - enums - Enumeration definitions - enum_members - Enumeration values - localvars - Local variables (requires Hex-Rays) - pseudocode - Decompiled pseudocode (requires Hex-Rays) - -Example Queries: - SELECT name, start_ea, size FROM funcs ORDER BY size DESC LIMIT 10; - SELECT * FROM imports WHERE name LIKE '%malloc%'; - SELECT s.name, COUNT(*) FROM structs s JOIN struct_members m ON s.id = m.struct_id GROUP BY s.id; - -Response Format: - Success: {"success": true, "columns": [...], "rows": [[...]], "row_count": N} - Error: {"success": false, "error": "message"} - -Authentication (if enabled): - Header: Authorization: Bearer - Or: X-XSQL-Token: - -Example: - curl http://localhost:8081/help - curl -X POST http://localhost:8081/query -d "SELECT name FROM funcs LIMIT 5" -)"; +static std::string build_cli_http_help_text() { + std::ostringstream out; + out << "IDASQL HTTP REST API\n" + << "====================\n\n" + << "SQL interface for IDA Pro databases via HTTP.\n\n" + << "Endpoints:\n" + << " GET / - Welcome message\n" + << " GET /help - This documentation (for LLM discovery)\n" + << " POST /query - Execute SQL (body = raw SQL, response = JSON)\n" + << " GET /status - Server health\n" + << " POST /shutdown - Stop server\n\n" + << "Discover Schema:\n" + << " SELECT name, type FROM sqlite_master WHERE type IN ('table','view') ORDER BY type, name;\n" + << " PRAGMA table_info(funcs);\n\n" + << "Starter Queries:\n" + << " SELECT * FROM welcome;\n" + << " SELECT name, start_ea, size FROM funcs ORDER BY size DESC LIMIT 10;\n\n" + << "Response Format:\n" + << " Success: {\"success\": true, \"columns\": [...], \"rows\": [[...]], \"row_count\": N}\n" + << " Error: {\"success\": false, \"error\": \"message\"}\n\n" + << "Authentication (if enabled):\n" + << " Header: Authorization: Bearer \n" + << " Or: X-XSQL-Token: \n\n" + << "Example:\n" + << " curl http://localhost:8081/help\n" + << " " << idasql::format_query_curl_example("http://localhost:8081") << "\n"; + return out.str(); +} static int run_http_mode(idasql::Database& db, int port, const std::string& bind_addr, const std::string& auth_token) { xsql::thinclient::server_config cfg; @@ -853,17 +634,18 @@ static int run_http_mode(idasql::Database& db, int port, const std::string& bind cfg.setup_routes = [&auth_token, port](httplib::Server& svr) { svr.Get("/", [port](const httplib::Request&, httplib::Response& res) { + const std::string base_url = "http://localhost:" + std::to_string(port); std::string welcome = "IDASQL HTTP Server\n\nEndpoints:\n" " GET /help - API documentation\n" " POST /query - Execute SQL query\n" " GET /status - Health check\n" " POST /shutdown - Stop server\n\n" - "Example: curl -X POST http://localhost:" + std::to_string(port) + "/query -d \"SELECT name FROM funcs LIMIT 5\"\n"; + "Example: " + idasql::format_query_curl_example(base_url) + "\n"; res.set_content(welcome, "text/plain"); }); svr.Get("/help", [](const httplib::Request&, httplib::Response& res) { - res.set_content(IDASQL_HELP_TEXT, "text/plain"); + res.set_content(build_cli_http_help_text(), "text/plain"); }); // POST /query - Queue command for main thread execution @@ -960,10 +742,8 @@ static int run_http_mode(idasql::Database& db, int port, const std::string& bind http_server.run_async(); int actual_port = http_server.port(); - std::cout << "IDASQL HTTP server listening on http://" << cfg.bind_address << ":" << actual_port << "\n"; + std::cout << "IDASQL HTTP server: http://" << cfg.bind_address << ":" << actual_port << "\n"; std::cout << "Database: " << db.info() << "\n"; - std::cout << "Endpoints: /help, /query, /status, /shutdown\n"; - std::cout << "Example: curl http://localhost:" << actual_port << "/help\n"; std::cout << "Press Ctrl+C to stop.\n\n"; std::cout.flush(); @@ -1065,19 +845,9 @@ static void print_usage() { << " --export-tables=X Tables to export: * (all, default) or table1,table2,...\n" << " --http [port] Start HTTP REST server (default: 8080, local mode only)\n" << " --bind Bind address for HTTP/MCP server (default: 127.0.0.1)\n" -#ifdef IDASQL_HAS_AI_AGENT +#ifdef IDASQL_HAS_MCP << " --mcp [port] Start MCP server (default: random port, use in -i mode)\n" << " Or use .mcp start in interactive mode\n" -#endif -#ifdef IDASQL_HAS_AI_AGENT - << " --prompt Natural language query (uses AI agent)\n" - << " --agent Enable AI agent mode in interactive REPL\n" - << " --provider Override AI provider (claude, copilot)\n" - << " --config [path] [val] View/set agent configuration\n" - << " -v, --verbose Show agent debug logs\n" - << "\n" - << "Agent settings stored in: ~/.idasql/agent_settings.json\n" - << "Configure via: .agent provider, .agent byok, .agent timeout\n" #endif << " -h, --help Show this help\n" << " --version Show version\n\n" @@ -1087,12 +857,11 @@ static void print_usage() { << " idasql -s test.i64 -i\n" << " idasql -s test.i64 --export dump.sql\n" << " idasql -s test.i64 --http 8080\n" -#ifdef IDASQL_HAS_AI_AGENT - << " idasql -s test.i64 --prompt \"Find the largest functions\"\n" - << " idasql -s test.i64 -i --agent\n" - << " idasql -s test.i64 --provider copilot --prompt \"How many functions?\"\n" -#endif +#ifdef IDASQL_HAS_MCP << " idasql -s test.i64 --mcp 9000\n"; +#else + ; +#endif } int main(int argc, char* argv[]) { @@ -1126,12 +895,6 @@ int main(int argc, char* argv[]) { int http_port = 8080; bool mcp_mode = false; int mcp_port = 0; // 0 = random port -#ifdef IDASQL_HAS_AI_AGENT - std::string nl_prompt; // --prompt for natural language - bool agent_mode = false; // --agent for interactive mode - bool verbose_mode = false; // -v for verbose agent output - std::string provider_override; // --provider overrides stored setting -#endif // Parse arguments for (int i = 1; i < argc; i++) { @@ -1151,40 +914,22 @@ int main(int argc, char* argv[]) { export_file = argv[++i]; } else if (strncmp(argv[i], "--export-tables=", 16) == 0) { export_tables = argv[i] + 16; -#ifdef IDASQL_HAS_AI_AGENT - } else if (strcmp(argv[i], "--prompt") == 0 && i + 1 < argc) { - nl_prompt = argv[++i]; - } else if (strcmp(argv[i], "--agent") == 0) { - agent_mode = true; - } else if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) { - verbose_mode = true; - } else if (strcmp(argv[i], "--provider") == 0 && i + 1 < argc) { - provider_override = argv[++i]; - // Validate provider name - if (provider_override != "copilot" && provider_override != "Copilot" && - provider_override != "claude" && provider_override != "Claude") { - std::cerr << "Unknown provider: " << provider_override << "\n"; - std::cerr << "Available providers: claude, copilot\n"; - return 1; - } - } else if (strcmp(argv[i], "--config") == 0) { - // Handle --config [path] [value] and exit immediately - std::string config_path = (i + 1 < argc && argv[i + 1][0] != '-') ? argv[++i] : ""; - std::string config_value = (i + 1 < argc && argv[i + 1][0] != '-') ? argv[++i] : ""; - auto [ok, output, code] = idasql::handle_config_command(config_path, config_value); - std::cout << output; - return code; -#endif } else if (strcmp(argv[i], "--http") == 0) { http_mode = true; if (i + 1 < argc && argv[i + 1][0] != '-') { http_port = std::stoi(argv[++i]); } +#ifdef IDASQL_HAS_MCP } else if (strcmp(argv[i], "--mcp") == 0) { mcp_mode = true; if (i + 1 < argc && argv[i + 1][0] != '-') { mcp_port = std::stoi(argv[++i]); } +#else + } else if (strcmp(argv[i], "--mcp") == 0) { + std::cerr << "Error: MCP mode not available. Rebuild with -DIDASQL_WITH_MCP=ON\n"; + return 1; +#endif } else if (strcmp(argv[i], "--bind") == 0 && i + 1 < argc) { bind_addr = argv[++i]; } else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { @@ -1205,15 +950,12 @@ int main(int argc, char* argv[]) { } bool has_action = !query.empty() || !sql_file.empty() || interactive || !export_file.empty() || http_mode || mcp_mode; -#ifdef IDASQL_HAS_AI_AGENT - has_action = has_action || !nl_prompt.empty(); -#endif if (!has_action) { - std::cerr << "Error: Specify -q, -f, -i, --export, --http, --mcp" -#ifdef IDASQL_HAS_AI_AGENT - << ", or --prompt" + std::cerr << "Error: Specify -q, -f, -i, --export, --http"; +#ifdef IDASQL_HAS_MCP + std::cerr << ", or --mcp"; #endif - << "\n\n"; + std::cerr << "\n\n"; print_usage(); return 1; } @@ -1229,6 +971,14 @@ int main(int argc, char* argv[]) { } std::cerr << "Database opened successfully." << std::endl; + idapython_runtime_guard_t idapython_runtime; + std::string idapython_runtime_error; + idapython_runtime.acquired = idasql::idapython::runtime_acquire(&idapython_runtime_error); + if (!idapython_runtime.acquired) { + std::cerr << "Warning: IDAPython capture runtime init failed: " + << idapython_runtime_error << std::endl; + } + // HTTP server mode if (http_mode) { int http_result = run_http_mode(db, http_port, bind_addr, auth_token); @@ -1237,7 +987,7 @@ int main(int argc, char* argv[]) { } // MCP server mode (standalone, not interactive REPL) -#ifdef IDASQL_HAS_AI_AGENT +#ifdef IDASQL_HAS_MCP if (mcp_mode) { // SQL executor - will be called on main thread via wait() idasql::QueryCallback sql_cb = [&db](const std::string& sql) -> std::string { @@ -1248,18 +998,9 @@ int main(int argc, char* argv[]) { return "Error: " + result.error; }; - // Create MCP agent for natural language queries - auto mcp_agent = std::make_unique(sql_cb); - mcp_agent->start(); - - idasql::AskCallback ask_cb = [&mcp_agent](const std::string& question) -> std::string { - if (!mcp_agent) return "Error: AI agent not available"; - return mcp_agent->query(question); - }; - // Create and start MCP server with use_queue=true idasql::IDAMCPServer mcp_server; - int port = mcp_server.start(mcp_port, sql_cb, ask_cb, + int port = mcp_server.start(mcp_port, sql_cb, bind_addr.empty() ? "127.0.0.1" : bind_addr, true); if (port <= 0) { std::cerr << "Error: Failed to start MCP server\n"; @@ -1267,7 +1008,7 @@ int main(int argc, char* argv[]) { return 1; } - std::cout << idasql::format_mcp_info(port, true); + std::cout << idasql::format_mcp_info(port, mcp_server.bind_addr()); std::cout << "Press Ctrl+C to stop...\n\n"; std::cout.flush(); @@ -1287,14 +1028,13 @@ int main(int argc, char* argv[]) { mcp_server.run_until_stopped(); std::signal(SIGINT, SIG_DFL); - mcp_agent->stop(); std::cout << "\nMCP server stopped.\n"; db.close(); return 0; } #else if (mcp_mode) { - std::cerr << "Error: MCP mode not available. Rebuild with -DIDASQL_WITH_AI_AGENT=ON\n"; + std::cerr << "Error: MCP mode not available. Rebuild with -DIDASQL_WITH_MCP=ON\n"; db.close(); return 1; } @@ -1308,40 +1048,6 @@ int main(int argc, char* argv[]) { if (!export_to_sql(db, export_file.c_str(), export_tables)) { result = 1; } -#ifdef IDASQL_HAS_AI_AGENT - } else if (!nl_prompt.empty()) { - // Natural language query mode (one-shot) - auto executor = [&db](const std::string& sql) -> std::string { - return execute_sql_to_string(db, sql); - }; - - // Load settings (includes BYOK, provider, timeout) - idasql::AgentSettings settings = idasql::LoadAgentSettings(); - - // Apply provider override from CLI if specified - if (!provider_override.empty()) { - try { - settings.default_provider = idasql::ParseProviderType(provider_override); - } catch (...) { - // Already validated in argument parsing - } - } - - idasql::AIAgent agent(executor, settings, verbose_mode); - - // Register signal handler - g_agent = &agent; - std::signal(SIGINT, signal_handler); - - agent.start(); - std::string response = agent.query(nl_prompt); - agent.stop(); - - g_agent = nullptr; - std::signal(SIGINT, SIG_DFL); - - std::cout << response << "\n"; -#endif } else if (!query.empty()) { // Single query mode auto query_result = db.query(query); @@ -1361,11 +1067,7 @@ int main(int argc, char* argv[]) { } } else if (interactive) { // Interactive REPL -#ifdef IDASQL_HAS_AI_AGENT - run_repl(db, agent_mode, verbose_mode, provider_override); -#else run_repl(db); -#endif } // Save database if -w/--write was specified diff --git a/src/common/agent_settings.hpp b/src/common/agent_settings.hpp deleted file mode 100644 index 0c96725..0000000 --- a/src/common/agent_settings.hpp +++ /dev/null @@ -1,228 +0,0 @@ -/** - * agent_settings.hpp - AI agent configuration persistence - * - * Stores provider selection, timeout, custom prompt, and BYOK settings - * under the user's idasql settings directory. - */ - -#pragma once - -#ifdef IDASQL_HAS_AI_AGENT - -#include -#include -#include - -#include -#include -#include -#include - -#ifdef _WIN32 -#include -#include -#else -#include -#include -#include -#endif - -namespace idasql { - -// BYOK (Bring Your Own Key) settings for a single provider -struct BYOKSettings { - bool enabled = false; - std::string api_key; - std::string base_url; - std::string model; - std::string provider_type; // "openai", "anthropic", "azure" - int timeout_ms = 0; - - // Convert to libagents BYOKConfig - libagents::BYOKConfig to_config() const { - libagents::BYOKConfig config; - config.api_key = api_key; - config.base_url = base_url; - config.model = model; - config.provider_type = provider_type; - config.timeout_ms = timeout_ms; - return config; - } - - // Check if BYOK is usable (enabled and has API key) - bool is_usable() const { return enabled && !api_key.empty(); } -}; - -// Agent settings stored in ~/.idasql/agent_settings.json -struct AgentSettings { - // Default provider (claude, copilot) - libagents::ProviderType default_provider = libagents::ProviderType::Claude; - - // User's custom prompt (additive to system prompt) - std::string custom_prompt; - - // Response timeout in milliseconds (0 = use default) - int response_timeout_ms = 120000; // 2 minutes default - - // BYOK configuration per provider - // Key: provider name ("copilot", "claude") - std::unordered_map byok; - - // Get BYOK settings for the current provider - const BYOKSettings* get_byok() const { - std::string provider_name = libagents::provider_type_name(default_provider); - auto it = byok.find(provider_name); - if (it != byok.end()) - return &it->second; - return nullptr; - } - - // Get or create BYOK settings for the current provider - BYOKSettings& get_or_create_byok() { - std::string provider_name = libagents::provider_type_name(default_provider); - return byok[provider_name]; - } -}; - -// Get the settings directory path (~/.idasql or %APPDATA%\idasql) -inline std::string GetSettingsDir() { -#ifdef _WIN32 - char path[MAX_PATH]; - if (SUCCEEDED(SHGetFolderPathA(nullptr, CSIDL_APPDATA, nullptr, 0, path))) { - return std::string(path) + "\\idasql"; - } - // Fallback to USERPROFILE - const char* userprofile = std::getenv("USERPROFILE"); - if (userprofile) { - return std::string(userprofile) + "\\.idasql"; - } - return ".idasql"; -#else - const char* home = std::getenv("HOME"); - if (!home) { - struct passwd* pw = getpwuid(getuid()); - if (pw) home = pw->pw_dir; - } - if (home) { - return std::string(home) + "/.idasql"; - } - return ".idasql"; -#endif -} - -// Get the settings file path -inline std::string GetSettingsPath() { - return GetSettingsDir() + -#ifdef _WIN32 - "\\agent_settings.json"; -#else - "/agent_settings.json"; -#endif -} - -// Ensure directory exists -inline bool EnsureDir(const std::string& path) { -#ifdef _WIN32 - return CreateDirectoryA(path.c_str(), nullptr) || GetLastError() == ERROR_ALREADY_EXISTS; -#else - return mkdir(path.c_str(), 0755) == 0 || errno == EEXIST; -#endif -} - -// JSON serialization -inline void to_json(nlohmann::json& j, const BYOKSettings& s) { - j = nlohmann::json{ - {"enabled", s.enabled}, - {"api_key", s.api_key}, - {"base_url", s.base_url}, - {"model", s.model}, - {"provider_type", s.provider_type}, - {"timeout_ms", s.timeout_ms} - }; -} - -inline void from_json(const nlohmann::json& j, BYOKSettings& s) { - s.enabled = j.value("enabled", false); - s.api_key = j.value("api_key", ""); - s.base_url = j.value("base_url", ""); - s.model = j.value("model", ""); - s.provider_type = j.value("provider_type", ""); - s.timeout_ms = j.value("timeout_ms", 0); -} - -inline void to_json(nlohmann::json& j, const AgentSettings& s) { - j = nlohmann::json{ - {"default_provider", libagents::provider_type_name(s.default_provider)}, - {"custom_prompt", s.custom_prompt}, - {"response_timeout_ms", s.response_timeout_ms}, - {"byok", s.byok} - }; -} - -inline void from_json(const nlohmann::json& j, AgentSettings& s) { - std::string provider_str = j.value("default_provider", "claude"); - if (provider_str == "copilot") { - s.default_provider = libagents::ProviderType::Copilot; - } else { - s.default_provider = libagents::ProviderType::Claude; - } - s.custom_prompt = j.value("custom_prompt", ""); - s.response_timeout_ms = j.value("response_timeout_ms", 120000); - if (j.contains("byok") && j["byok"].is_object()) { - s.byok = j["byok"].get>(); - } -} - -// Load settings from disk (creates default if not exists) -inline AgentSettings LoadAgentSettings() { - AgentSettings settings; - std::string path = GetSettingsPath(); - - std::ifstream f(path); - if (f.is_open()) { - try { - nlohmann::json j = nlohmann::json::parse(f); - settings = j.get(); - } catch (...) { - // Ignore parse errors, use defaults - } - } - return settings; -} - -// Save settings to disk -inline bool SaveAgentSettings(const AgentSettings& settings) { - std::string dir = GetSettingsDir(); - if (!EnsureDir(dir)) { - return false; - } - - std::string path = GetSettingsPath(); - std::ofstream f(path); - if (!f.is_open()) { - return false; - } - - try { - nlohmann::json j = settings; - f << j.dump(2); - return true; - } catch (...) { - return false; - } -} - -// Parse provider type from string -inline libagents::ProviderType ParseProviderType(const std::string& name) { - if (name == "copilot" || name == "Copilot" || name == "COPILOT") { - return libagents::ProviderType::Copilot; - } - if (name == "claude" || name == "Claude" || name == "CLAUDE") { - return libagents::ProviderType::Claude; - } - throw std::runtime_error("Unknown provider: " + name + " (use 'claude' or 'copilot')"); -} - -} // namespace idasql - -#endif // IDASQL_HAS_AI_AGENT diff --git a/src/common/ai_agent.cpp b/src/common/ai_agent.cpp deleted file mode 100644 index 2de128f..0000000 --- a/src/common/ai_agent.cpp +++ /dev/null @@ -1,347 +0,0 @@ -#include "ai_agent.hpp" - -#ifdef IDASQL_HAS_AI_AGENT - -// Embedded documentation from prompts/idasql_agent.md -#include "idasql_agent_prompt.hpp" - -#include -#include -#include -#include -#include - -namespace idasql { - -// ============================================================================ -// Construction / Destruction -// ============================================================================ - -AIAgent::AIAgent(SqlExecutor executor, const AgentSettings& settings, bool verbose) - : executor_(std::move(executor)), verbose_(verbose), - provider_type_(settings.default_provider), - response_timeout_ms_(settings.response_timeout_ms) -{ - // Create agent with provider from settings - agent_ = libagents::create_agent(provider_type_); - - if (verbose_) { - std::cerr << "[AGENT] Created " << libagents::provider_type_name(provider_type_) - << " provider" << std::endl; - } - - // Apply BYOK from settings if configured - const BYOKSettings* byok = settings.get_byok(); - if (byok && byok->is_usable()) { - set_byok(byok->to_config()); - if (verbose_) { - std::cerr << "[AGENT] Loaded BYOK from settings" << std::endl; - } - } else { - // Fall back to environment variables - load_byok_from_env(); - } -} - -AIAgent::AIAgent(SqlExecutor executor, bool verbose) - : AIAgent(std::move(executor), LoadAgentSettings(), verbose) -{ - // Delegates to settings-based constructor -} - -void AIAgent::set_byok(const libagents::BYOKConfig& config) { - byok_config_ = config; - byok_configured_ = config.is_configured(); - if (verbose_ && byok_configured_) { - std::cerr << "[AGENT] BYOK configured"; - if (!config.model.empty()) std::cerr << " (model: " << config.model << ")"; - if (!config.base_url.empty()) std::cerr << " (endpoint: " << config.base_url << ")"; - std::cerr << std::endl; - } -} - -bool AIAgent::load_byok_from_env() { - libagents::BYOKConfig config; - - // Load from COPILOT_SDK_BYOK_* environment variables - if (const char* key = std::getenv("COPILOT_SDK_BYOK_API_KEY")) - config.api_key = key; - if (const char* url = std::getenv("COPILOT_SDK_BYOK_BASE_URL")) - config.base_url = url; - if (const char* model = std::getenv("COPILOT_SDK_BYOK_MODEL")) - config.model = model; - if (const char* type = std::getenv("COPILOT_SDK_BYOK_PROVIDER_TYPE")) - config.provider_type = type; - - if (config.is_configured()) { - if (verbose_) { - std::cerr << "[AGENT] Loaded BYOK from environment:" << std::endl; - std::cerr << "[AGENT] Model: " << (config.model.empty() ? "(default)" : config.model) << std::endl; - std::cerr << "[AGENT] Endpoint: " << (config.base_url.empty() ? "(default)" : config.base_url) << std::endl; - std::cerr << "[AGENT] Type: " << (config.provider_type.empty() ? "(default)" : config.provider_type) << std::endl; - } - set_byok(config); - return true; - } - return false; -} - -AIAgent::~AIAgent() { - stop(); -} - -// ============================================================================ -// Lifecycle -// ============================================================================ - -void AIAgent::start() { - if (!agent_) return; - - setup_tools(); - - // Apply BYOK configuration before initialize() - required for Copilot provider - if (byok_configured_) { - agent_->set_byok(byok_config_); - if (verbose_) { - std::cerr << "[AGENT] Applied BYOK configuration" << std::endl; - } - } - - // Apply response timeout if configured - if (response_timeout_ms_ > 0) { - agent_->set_response_timeout(std::chrono::milliseconds(response_timeout_ms_)); - if (verbose_) { - std::cerr << "[AGENT] Response timeout: " << response_timeout_ms_ << " ms" << std::endl; - } - } - - // Note: We don't use set_system_prompt() because it can break MCP tool - // visibility with some providers. Instead, we embed the prompt in the - // first message (priming). - - if (!agent_->initialize()) { - if (verbose_) { - std::cerr << "[AGENT] Failed to initialize agent" << std::endl; - } - return; - } - - if (verbose_) { - std::cerr << "[AGENT] Agent initialized (" << agent_->provider_name() << ")" << std::endl; - } -} - -void AIAgent::stop() { - if (agent_ && agent_->is_initialized()) { - agent_->shutdown(); - if (verbose_) { - std::cerr << "[AGENT] Agent shutdown" << std::endl; - } - } -} - -void AIAgent::reset_session() { - if (verbose_) { - std::cerr << "[AGENT] Resetting session..." << std::endl; - } - - docs_primed_ = false; - quit_requested_.store(false); - - if (agent_) { - agent_->clear_session(); - } - - if (verbose_) { - std::cerr << "[AGENT] Session reset complete" << std::endl; - } -} - -void AIAgent::request_quit() { - quit_requested_.store(true); - if (agent_) { - agent_->abort(); - } -} - -// ============================================================================ -// Tool Registration -// ============================================================================ - -void AIAgent::setup_tools() { - libagents::Tool idasql_tool; - idasql_tool.name = "idasql"; - idasql_tool.description = - "Execute a SQL query against an IDA Pro database. " - "Available tables: funcs, strings, imports, segments, names, xrefs, instructions, " - "blocks, comments, types, entries, heads, fchunks, bookmarks, pseudocode, ctree, " - "ctree_lvars, ctree_call_args. " - "Example: SELECT name, size FROM funcs WHERE name LIKE 'sub_%' ORDER BY size DESC LIMIT 10"; - - idasql_tool.parameters_schema = R"({ - "type": "object", - "properties": { - "query": { - "type": "string", - "description": "SQL query to execute against the IDA database" - } - }, - "required": ["query"] - })"; - - // Tool handler - executes on caller thread via query_hosted() - idasql_tool.handler = [this](const std::string& args) -> std::string { - try { - auto j = libagents::json::parse(args); - std::string sql = j.value("query", ""); - - if (verbose_) { - std::cerr << "[TOOL] Executing SQL: " << sql.substr(0, 80) - << (sql.size() > 80 ? "..." : "") << std::endl; - } - - // This runs on the main thread (query_hosted guarantees this) - std::string result = executor_(sql); - - if (verbose_) { - std::cerr << "[TOOL] Result: " << result.size() << " bytes" << std::endl; - } - - return result; - - } catch (const std::exception& e) { - return std::string("Error: ") + e.what(); - } - }; - - agent_->register_tool(idasql_tool); - - if (verbose_) { - std::cerr << "[AGENT] Registered idasql tool" << std::endl; - } -} - -// ============================================================================ -// Query Interface -// ============================================================================ - -std::string AIAgent::query(const std::string& prompt) { - // SQL passthrough - execute directly - if (looks_like_sql(prompt)) { - return executor_(prompt); - } - - if (!agent_ || !agent_->is_initialized()) { - return "Error: Agent not initialized"; - } - - // Build message (prime with docs if first message) - std::string message = docs_primed_ ? prompt : build_primed_message(prompt); - docs_primed_ = true; - - // Use query_hosted for main-thread tool dispatch - libagents::HostContext host; - host.should_abort = [this]() { return quit_requested_.load(); }; - - try { - return agent_->query_hosted(message, host); - } catch (const std::exception& e) { - return std::string("Error: ") + e.what(); - } -} - -std::string AIAgent::query_streaming(const std::string& prompt, ContentCallback on_content) { - // SQL passthrough - if (looks_like_sql(prompt)) { - std::string result = executor_(prompt); - if (on_content) on_content(result); - return result; - } - - if (!agent_ || !agent_->is_initialized()) { - std::string err = "Error: Agent not initialized"; - if (on_content) on_content(err); - return err; - } - - // Build message - std::string message = docs_primed_ ? prompt : build_primed_message(prompt); - docs_primed_ = true; - - // Use query_hosted with streaming callback - libagents::HostContext host; - host.should_abort = [this]() { return quit_requested_.load(); }; - host.on_event = [on_content](const libagents::Event& event) { - if (on_content && event.type == libagents::EventType::ContentDelta) { - on_content(event.content); - } - }; - - try { - return agent_->query_hosted(message, host); - } catch (const std::exception& e) { - return std::string("Error: ") + e.what(); - } -} - -// ============================================================================ -// Helpers -// ============================================================================ - -std::string AIAgent::build_primed_message(const std::string& user_message) { - return std::string(SYSTEM_PROMPT) + - "\n\n---\n\n" - "# User Request\n\n" - "Use the `idasql` tool to execute SQL queries. " - "Do not use Bash, Grep, or other tools - only use the idasql tool.\n\n" + - user_message; -} - -bool AIAgent::looks_like_sql(const std::string& input) { - if (input.empty()) return false; - - // Find first non-whitespace character - size_t start = 0; - while (start < input.size() && std::isspace(static_cast(input[start]))) { - ++start; - } - if (start >= input.size()) return false; - - // Convert first ~20 chars to uppercase for comparison - std::string prefix; - for (size_t i = start; i < input.size() && i < start + 20; ++i) { - prefix += static_cast(std::toupper(static_cast(input[i]))); - } - - // Check for SQL keywords - return prefix.rfind("SELECT ", 0) == 0 || - prefix.rfind("INSERT ", 0) == 0 || - prefix.rfind("UPDATE ", 0) == 0 || - prefix.rfind("DELETE ", 0) == 0 || - prefix.rfind("CREATE ", 0) == 0 || - prefix.rfind("DROP ", 0) == 0 || - prefix.rfind("PRAGMA ", 0) == 0 || - prefix.rfind("WITH ", 0) == 0 || - prefix.rfind("EXPLAIN ", 0) == 0 || - prefix.rfind(".TABLES", 0) == 0 || - prefix.rfind(".SCHEMA", 0) == 0 || - prefix.rfind(".HELP", 0) == 0 || - prefix.rfind(".QUIT", 0) == 0 || - prefix.rfind(".EXIT", 0) == 0; -} - -bool AIAgent::is_available() { - try { - // Just check if we can create an agent - don't initialize - // (initialization is expensive and would be redundant if we're about to - // create another agent anyway) - auto agent = libagents::create_agent(libagents::ProviderType::Claude); - return agent != nullptr; - } catch (...) { - return false; - } -} - -} // namespace idasql - -#endif // IDASQL_HAS_AI_AGENT diff --git a/src/common/ai_agent.hpp b/src/common/ai_agent.hpp deleted file mode 100644 index c64549a..0000000 --- a/src/common/ai_agent.hpp +++ /dev/null @@ -1,163 +0,0 @@ -/** - * ai_agent.hpp - IDASQL AI agent wrapper - * - * Wraps libagents provider interactions and query execution integration. - */ - -#pragma once - -#ifdef IDASQL_HAS_AI_AGENT - -#include -#include -#include "agent_settings.hpp" -#include -#include -#include -#include - -namespace idasql { - -/** - * AIAgent - Natural language interface for IDASQL using libagents - * - * This is a simplified wrapper around libagents that provides: - * - Tool registration for SQL execution - * - Main-thread tool dispatch via query_hosted() (required for IDA safety) - * - SQL passthrough detection - * - Signal handling for Ctrl-C - * - BYOK (Bring Your Own Key) support for Copilot provider - * - * Architecture: - * - libagents handles all threading internally - * - query_hosted() ensures tool handlers run on the caller thread - * - No custom queues or thread management needed - */ -class AIAgent { -public: - /// Callback to execute SQL and return formatted results - using SqlExecutor = std::function; - - /// Callback for streaming content - using ContentCallback = std::function; - - /** - * Construct agent with SQL executor and settings - * @param executor Function that executes SQL and returns formatted results - * @param settings Agent settings (provider, BYOK, timeout, etc.) - * @param verbose If true, show debug output - */ - explicit AIAgent(SqlExecutor executor, const AgentSettings& settings, bool verbose = false); - - /** - * Construct agent with SQL executor (uses stored settings) - * @param executor Function that executes SQL and returns formatted results - * @param verbose If true, show debug output - */ - explicit AIAgent(SqlExecutor executor, bool verbose = false); - - /** - * Configure BYOK (Bring Your Own Key) - call before start() - * Required for Copilot provider, optional for Claude - * @param config BYOK configuration (api_key, base_url, model, provider_type) - */ - void set_byok(const libagents::BYOKConfig& config); - - /** - * Load BYOK config from environment variables (fallback) - * Looks for COPILOT_SDK_BYOK_API_KEY, COPILOT_SDK_BYOK_BASE_URL, etc. - * @return true if BYOK was configured from environment - */ - bool load_byok_from_env(); - - /** - * Get the current provider type - */ - libagents::ProviderType provider_type() const { return provider_type_; } - - ~AIAgent(); - - // Non-copyable, non-movable - AIAgent(const AIAgent&) = delete; - AIAgent& operator=(const AIAgent&) = delete; - AIAgent(AIAgent&&) = delete; - AIAgent& operator=(AIAgent&&) = delete; - - /** - * Start the agent and connect to provider - */ - void start(); - - /** - * Stop the agent and disconnect - */ - void stop(); - - /** - * Reset the session - clears conversation history - */ - void reset_session(); - - /** - * Request to quit (e.g., from Ctrl-C handler) - * Thread-safe, can be called from signal handler - */ - void request_quit(); - - /** - * Check if quit was requested - */ - bool quit_requested() const { return quit_requested_.load(); } - - /** - * Send a query and get response (blocking) - * SQL is passed through directly, natural language goes to AI. - * Tool handlers execute on the caller thread (main thread safe). - * - * @param prompt User input (natural language or SQL) - * @return Response text - */ - std::string query(const std::string& prompt); - - /** - * Send a query with streaming output - * @param prompt User input - * @param on_content Callback for content deltas - * @return Final response text - */ - std::string query_streaming(const std::string& prompt, ContentCallback on_content); - - /** - * Check if input looks like SQL (for passthrough) - * @param input User input string - * @return true if input appears to be SQL - */ - static bool looks_like_sql(const std::string& input); - - /** - * Check if AI agent is available - * @return true if the default provider is available - */ - static bool is_available(); - -private: - SqlExecutor executor_; - bool verbose_ = false; - bool docs_primed_ = false; - std::atomic quit_requested_{false}; - std::unique_ptr agent_; - libagents::ProviderType provider_type_ = libagents::ProviderType::Claude; - libagents::BYOKConfig byok_config_; - bool byok_configured_ = false; - int response_timeout_ms_ = 0; - - /// Register the idasql tool with libagents - void setup_tools(); - - /// Build primed message with documentation prepended - std::string build_primed_message(const std::string& user_message); -}; - -} // namespace idasql - -#endif // IDASQL_HAS_AI_AGENT diff --git a/src/common/http_server.cpp b/src/common/http_server.cpp index 5b62ee6..e80a8e8 100644 --- a/src/common/http_server.cpp +++ b/src/common/http_server.cpp @@ -1,31 +1,38 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + #include "http_server.hpp" #include +#include "welcome_query.hpp" #include namespace idasql { -// Help text served at /help endpoint -static const char* HTTP_HELP_TEXT = R"(IDASQL HTTP REST API -==================== - -SQL interface for IDA Pro databases via HTTP. - -Endpoints: - GET / - Welcome message - GET /help - This documentation - POST /query - Execute SQL (body = raw SQL, response = JSON) - GET /status - Server health check - POST /shutdown - Stop server - -Response Format: - Success: {"success": true, "columns": [...], "rows": [[...]], "row_count": N} - Error: {"success": false, "error": "message"} - -Example: - curl http://localhost:/help - curl -X POST http://localhost:/query -d "SELECT name FROM funcs LIMIT 5" -)"; +static std::string build_http_help_text() { + std::ostringstream out; + out << "IDASQL HTTP REST API\n" + << "====================\n\n" + << "SQL interface for IDA Pro databases via HTTP.\n\n" + << "Endpoints:\n" + << " GET / - Welcome message\n" + << " GET /help - This documentation\n" + << " POST /query - Execute SQL (body = raw SQL, response = JSON)\n" + << " GET /status - Server health check\n" + << " POST /shutdown - Stop server\n\n" + << "Discover Schema:\n" + << " SELECT name, type FROM sqlite_master WHERE type IN ('table','view') ORDER BY type, name;\n" + << " PRAGMA table_info(funcs);\n\n" + << "Starter Query:\n" + << " SELECT * FROM welcome;\n\n" + << "Response Format:\n" + << " Success: {\"success\": true, \"columns\": [...], \"rows\": [[...]], \"row_count\": N}\n" + << " Error: {\"success\": false, \"error\": \"message\"}\n\n" + << "Example:\n" + << " curl http://localhost:/help\n" + << " " << format_query_curl_example("http://localhost:") << "\n"; + return out.str(); +} int IDAHTTPServer::start(int port, HTTPQueryCallback query_cb, const std::string& bind_addr, bool use_queue) { @@ -33,11 +40,13 @@ int IDAHTTPServer::start(int port, HTTPQueryCallback query_cb, return impl_->port(); } + bind_addr_ = bind_addr.empty() ? "127.0.0.1" : bind_addr; + xsql::thinclient::http_query_server_config config; config.tool_name = "idasql"; - config.help_text = HTTP_HELP_TEXT; + config.help_text = build_http_help_text(); config.port = port; - config.bind_address = bind_addr; + config.bind_address = bind_addr_; config.query_fn = std::move(query_cb); config.use_queue = use_queue; config.queue_admission_timeout_ms_fn = []() { @@ -89,11 +98,24 @@ void IDAHTTPServer::set_interrupt_check(std::function check) { } std::string format_http_info(int port, const std::string& stop_hint) { - return xsql::thinclient::format_http_info("idasql", port, stop_hint); + return format_http_info(port, "127.0.0.1", stop_hint); +} + +std::string format_http_info(int port, const std::string& bind_addr, const std::string& stop_hint) { + const std::string rendered_host = xsql::thinclient::format_url_host(bind_addr); + const std::string base_url = "http://" + rendered_host + ":" + std::to_string(port); + std::ostringstream ss; + ss << "IDASQL HTTP server: " << base_url << "\n"; + ss << stop_hint << "\n"; + return ss.str(); } std::string format_http_status(int port, bool running) { - return xsql::thinclient::format_http_status(port, running); + return format_http_status(port, running, "127.0.0.1"); +} + +std::string format_http_status(int port, bool running, const std::string& bind_addr) { + return xsql::thinclient::format_http_status(port, running, bind_addr); } } // namespace idasql diff --git a/src/common/http_server.hpp b/src/common/http_server.hpp index ee88222..6b925c6 100644 --- a/src/common/http_server.hpp +++ b/src/common/http_server.hpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + #pragma once /** @@ -65,21 +68,27 @@ class IDAHTTPServer { /** Get the server URL */ std::string url() const; + /** Get bind address configured at startup */ + const std::string& bind_addr() const { return bind_addr_; } + /** Set interrupt check function (called during wait loop) */ void set_interrupt_check(std::function check); private: std::unique_ptr impl_; + std::string bind_addr_{"127.0.0.1"}; }; /** * Format HTTP server info for display */ std::string format_http_info(int port, const std::string& stop_hint = "Press Ctrl+C to stop and return to REPL."); +std::string format_http_info(int port, const std::string& bind_addr, const std::string& stop_hint); /** * Format HTTP server status */ std::string format_http_status(int port, bool running); +std::string format_http_status(int port, bool running, const std::string& bind_addr); } // namespace idasql diff --git a/src/common/idasql_cli.hpp b/src/common/idasql_cli.hpp index 16e1014..272ff3f 100644 --- a/src/common/idasql_cli.hpp +++ b/src/common/idasql_cli.hpp @@ -1,45 +1,34 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + #pragma once /** * idasql_cli.hpp - IDA plugin CLI integration wrapper * - * IdasqlCLI - IDA plugin command line interface + * IdasqlCLI - IDA plugin command line interface. * * Wraps SessionHandler and provides cli_t integration for IDA. - * The actual query processing logic is in SessionHandler (testable without IDA). - * - * Usage: - * IdasqlCLI cli(executor); - * cli.install(); // Register with IDA - * // ... plugin lifetime ... - * cli.uninstall(); // Unregister on shutdown */ -#include #include #include +#include + #include #include "session_handler.hpp" namespace idasql { -class IdasqlCLI -{ +class IdasqlCLI { public: using SqlExecutor = std::function; - explicit IdasqlCLI(SqlExecutor executor, bool enable_agent = true) - : session_(std::move(executor), enable_agent) - { - // Override clear_session callback to add IDA-specific behavior - session_.callbacks().clear_session = [this]() { - return clear_session(); - }; - } + explicit IdasqlCLI(SqlExecutor executor) + : session_(std::move(executor)) {} - ~IdasqlCLI() - { + ~IdasqlCLI() { uninstall(); } @@ -47,41 +36,32 @@ class IdasqlCLI IdasqlCLI(const IdasqlCLI&) = delete; IdasqlCLI& operator=(const IdasqlCLI&) = delete; - /** - * Install the CLI with IDA - */ - bool install() - { - if (installed_) return true; + bool install() { + if (installed_) { + return true; + } - // Store pointer for static callback s_instance_ = this; - // Setup cli_t structure cli_.size = sizeof(cli_t); cli_.flags = 0; cli_.sname = "idasql"; - cli_.lname = session_.is_agent_enabled() - ? "idasql - SQL queries with AI agent support" - : "idasql - SQL interface to IDA database"; - cli_.hint = "Enter SQL query, .command, or natural language"; + cli_.lname = "idasql - SQL interface to IDA database"; + cli_.hint = "Enter SQL query or .command"; cli_.execute_line = &IdasqlCLI::execute_line_cb; cli_.keydown = nullptr; cli_.find_completions = nullptr; install_command_interpreter(&cli_); installed_ = true; - msg("IDASQL CLI: Installed (AI agent: %s)\n", - session_.is_agent_enabled() ? "enabled" : "disabled"); + msg("IDASQL CLI: Installed\n"); return true; } - /** - * Uninstall the CLI from IDA - */ - void uninstall() - { - if (!installed_) return; + void uninstall() { + if (!installed_) { + return; + } session_.end_session(); remove_command_interpreter(&cli_); @@ -92,36 +72,11 @@ class IdasqlCLI } bool is_installed() const { return installed_; } - bool is_agent_enabled() const { return session_.is_agent_enabled(); } - /** - * Clear session - clears IDA message window and resets AI agent - */ - std::string clear_session() - { - // Clear IDA's message window - msg_clear(); - - // Clear the AI agent session - std::string result = session_.clear_session(); - - // Print status to fresh message window - msg("IDASQL: %s\n", result.c_str()); - - return result; - } - - /** - * Process a line of input (delegates to SessionHandler) - */ - std::string process_line(const std::string& line) - { + std::string process_line(const std::string& line) { return session_.process_line(line); } - /** - * Get the underlying session handler (for testing) - */ SessionHandler& session() { return session_; } const SessionHandler& session() const { return session_; } @@ -130,24 +85,22 @@ class IdasqlCLI cli_t cli_{}; bool installed_ = false; - // Static instance for callback static IdasqlCLI* s_instance_; - // Static callback for cli_t - static bool idaapi execute_line_cb(const char* line) - { - if (!s_instance_ || !line) return true; + static bool idaapi execute_line_cb(const char* line) { + if (!s_instance_ || !line) { + return true; + } std::string result = s_instance_->process_line(line); if (!result.empty()) { msg("%s\n", result.c_str()); } - return true; // Line was executed + return true; } }; -// Static member definition inline IdasqlCLI* IdasqlCLI::s_instance_ = nullptr; } // namespace idasql diff --git a/src/common/idasql_commands.hpp b/src/common/idasql_commands.hpp index 5c31628..1f5dcd3 100644 --- a/src/common/idasql_commands.hpp +++ b/src/common/idasql_commands.hpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** * idasql_commands.hpp - Dot-command parser for interactive sessions * @@ -8,63 +11,73 @@ #include #include -#include #include - -#ifdef IDASQL_HAS_AI_AGENT -#include "agent_settings.hpp" -#endif +#include "welcome_query.hpp" namespace idasql { -/** - * Command handler result - */ enum class CommandResult { NOT_HANDLED, // Not a command, process as query HANDLED, // Command executed successfully QUIT // User requested quit }; -/** - * Command handler callbacks - * - * These callbacks allow different environments (CLI, plugin) to extend - * command behavior. For example, .clear might: - * - Core: Reset the AI agent session - * - Plugin: Also call msg_clear() to clear IDA's message window - */ struct CommandCallbacks { - std::function get_tables; // Return table list - std::function get_schema; // Return schema for table - std::function get_info; // Return database info - std::function clear_session; // Clear/reset session (agent, UI, etc.) + std::function get_tables; + std::function get_schema; + std::function get_info; - // MCP server callbacks (optional - plugin only) - std::function mcp_status; // Get MCP status - std::function mcp_start; // Start MCP server (port, bind_addr) - std::function mcp_stop; // Stop MCP server + // MCP server callbacks (optional) + std::function mcp_status; + std::function mcp_start; + std::function mcp_stop; // HTTP server callbacks (optional) - std::function http_status; // Get HTTP server status - std::function http_start; // Start HTTP server (port, bind_addr) - std::function http_stop; // Stop HTTP server + std::function http_status; + std::function http_start; + std::function http_stop; }; -/** - * Handle dot commands (.tables, .schema, .help, .quit, etc.) - * - * @param input User input line - * @param callbacks Callbacks to execute commands - * @param output Output string (filled if command produces output) - * @return CommandResult indicating how to proceed - */ +inline void parse_bind_and_port(const std::string& raw, std::string& bind_addr, int& port) { + bind_addr = "127.0.0.1"; + port = 0; + + std::string rest = raw; + size_t rs = rest.find_first_not_of(" \t"); + if (rs == std::string::npos) { + return; + } + rest = rest.substr(rs); + + std::string tok1; + std::string tok2; + size_t sp = rest.find_first_of(" \t"); + if (sp != std::string::npos) { + tok1 = rest.substr(0, sp); + size_t t2s = rest.find_first_not_of(" \t", sp); + if (t2s != std::string::npos) { + tok2 = rest.substr(t2s); + } + } else { + tok1 = rest; + } + + const bool tok1_numeric = !tok1.empty() && tok1.find_first_not_of("0123456789") == std::string::npos; + if (tok1_numeric) { + port = std::stoi(tok1); + } else { + bind_addr = tok1; + if (!tok2.empty()) { + port = std::stoi(tok2); + } + } +} + inline CommandResult handle_command( const std::string& input, const CommandCallbacks& callbacks, - std::string& output) -{ + std::string& output) { if (input.empty() || input[0] != '.') { return CommandResult::NOT_HANDLED; } @@ -87,30 +100,20 @@ inline CommandResult handle_command( return CommandResult::HANDLED; } - if (input == ".clear") { - if (callbacks.clear_session) { - output = callbacks.clear_session(); - } else { - output = "Session cleared"; - } - return CommandResult::HANDLED; - } - if (input == ".help") { output = "IDASQL Commands:\n" " .tables List all tables\n" " .schema Show table schema\n" " .info Show database info\n" - " .clear Clear/reset session\n" " .quit / .exit Exit\n" " .help Show this help\n" -#ifdef IDASQL_HAS_AI_AGENT +#ifdef IDASQL_HAS_MCP "\n" "MCP Server:\n" - " .mcp Show status or start if not running\n" + " .mcp Show status or start if not running\n" " .mcp start [bind] [port] Start MCP server\n" - " .mcp stop Stop MCP server\n" - " .mcp help Show MCP help\n" + " .mcp stop Stop MCP server\n" + " .mcp help Show MCP help\n" #endif "\n" "HTTP Server:\n" @@ -121,177 +124,103 @@ inline CommandResult handle_command( "\n" "SQL:\n" " SELECT * FROM funcs LIMIT 10;\n" - " SELECT name, size FROM funcs ORDER BY size DESC;\n" -#ifdef IDASQL_HAS_AI_AGENT - "\n" - "AI Agent:\n" - " .agent help Show agent commands\n" - " .agent provider Show/set AI provider\n" - " .agent clear Clear conversation\n" - "\n" - "Natural Language:\n" - " Find the largest functions\n" - " Show functions that call malloc\n" - " What imports does this binary use?\n" -#endif - ; + " SELECT name, size FROM funcs ORDER BY size DESC;\n"; return CommandResult::HANDLED; } - // .mcp commands (MCP server control - plugin only) if (input.rfind(".mcp", 0) == 0) { -#ifdef IDASQL_HAS_AI_AGENT +#ifdef IDASQL_HAS_MCP std::string subargs = input.length() > 4 ? input.substr(4) : ""; - // Trim leading whitespace size_t start = subargs.find_first_not_of(" \t"); - if (start != std::string::npos) + if (start != std::string::npos) { subargs = subargs.substr(start); + } if (subargs.empty()) { - // .mcp - show status, start if not running if (callbacks.mcp_status) { output = callbacks.mcp_status(); } else { - output = "MCP server not available (plugin mode only)"; + output = "MCP server not available"; } - } - else if (subargs.rfind("start", 0) == 0) { + } else if (subargs.rfind("start", 0) == 0) { int port = 0; std::string bind_addr = "127.0.0.1"; - // Parse optional: "start [bind] [port]" std::string rest = subargs.length() > 5 ? subargs.substr(5) : ""; - size_t rs = rest.find_first_not_of(" \t"); - if (rs != std::string::npos) { - rest = rest.substr(rs); - // Split into tokens - std::string tok1, tok2; - size_t sp = rest.find_first_of(" \t"); - if (sp != std::string::npos) { - tok1 = rest.substr(0, sp); - size_t t2s = rest.find_first_not_of(" \t", sp); - if (t2s != std::string::npos) tok2 = rest.substr(t2s); - } else { - tok1 = rest; - } - // Heuristic: if tok1 is all digits, treat as port; otherwise bind_addr - bool tok1_numeric = !tok1.empty() && tok1.find_first_not_of("0123456789") == std::string::npos; - if (tok1_numeric) { - port = std::stoi(tok1); - } else { - bind_addr = tok1; - if (!tok2.empty()) port = std::stoi(tok2); - } - } + parse_bind_and_port(rest, bind_addr, port); + if (callbacks.mcp_start) { output = callbacks.mcp_start(port, bind_addr); - std::string host; - int actual_port = 0; - if (xsql::thinclient::extract_mcp_start_endpoint(output, host, actual_port)) { - const std::string clipboard_text = - xsql::thinclient::build_mcp_clipboard_payload("idasql", host, actual_port); - (void)xsql::thinclient::try_copy_text_to_clipboard_windows(clipboard_text); - } + auto nl = output.find('\n'); + const std::string clipboard_text = (nl != std::string::npos) ? output.substr(0, nl) : output; + (void)xsql::thinclient::try_copy_text_to_clipboard_windows(clipboard_text); } else { - output = "MCP server not available (plugin mode only)"; + output = "MCP server not available"; } - } - else if (subargs == "stop") { + } else if (subargs == "stop") { if (callbacks.mcp_stop) { output = callbacks.mcp_stop(); } else { - output = "MCP server not available (plugin mode only)"; + output = "MCP server not available"; } - } - else if (subargs == "help") { + } else if (subargs == "help") { output = "MCP Server Commands:\n" " .mcp Show status, start if not running\n" - " .mcp start [bind] [port] Start MCP server (default: 127.0.0.1, random port)\n" + " .mcp start [bind] [port] Start MCP server (default: 127.0.0.1, random port)\n" " .mcp stop Stop MCP server\n" " .mcp help Show this help\n" "\n" - "The MCP server exposes two tools:\n" + "The MCP server exposes one tool:\n" " idasql_query - Execute SQL query directly\n" - " idasql_agent - Ask natural language question (AI-powered)\n" "\n" "Connect with Claude Desktop by adding to config:\n" " {\"mcpServers\": {\"idasql\": {\"url\": \"http://127.0.0.1:/sse\"}}}\n"; - } - else { + } else { output = "Unknown MCP command: " + subargs + "\nUse '.mcp help' for available commands."; } #else - output = "MCP server requires AI agent support. Rebuild with -DIDASQL_WITH_AI_AGENT=ON"; + output = "MCP server support not compiled in. Rebuild with -DIDASQL_WITH_MCP=ON"; #endif return CommandResult::HANDLED; } - // .http commands (HTTP server control) if (input.rfind(".http", 0) == 0) { std::string subargs = input.length() > 5 ? input.substr(5) : ""; - // Trim leading whitespace size_t start = subargs.find_first_not_of(" \t"); - if (start != std::string::npos) + if (start != std::string::npos) { subargs = subargs.substr(start); + } if (subargs.empty()) { - // .http - show status, start if not running if (callbacks.http_status) { output = callbacks.http_status(); } else { output = "HTTP server not available"; } - } - else if (subargs.rfind("start", 0) == 0) { + } else if (subargs.rfind("start", 0) == 0) { int port = 0; std::string bind_addr = "127.0.0.1"; - // Parse optional: "start [bind] [port]" std::string rest = subargs.length() > 5 ? subargs.substr(5) : ""; - size_t rs = rest.find_first_not_of(" \t"); - if (rs != std::string::npos) { - rest = rest.substr(rs); - // Split into tokens - std::string tok1, tok2; - size_t sp = rest.find_first_of(" \t"); - if (sp != std::string::npos) { - tok1 = rest.substr(0, sp); - size_t t2s = rest.find_first_not_of(" \t", sp); - if (t2s != std::string::npos) tok2 = rest.substr(t2s); - } else { - tok1 = rest; - } - // Heuristic: if tok1 is all digits, treat as port; otherwise bind_addr - bool tok1_numeric = !tok1.empty() && tok1.find_first_not_of("0123456789") == std::string::npos; - if (tok1_numeric) { - port = std::stoi(tok1); - } else { - bind_addr = tok1; - if (!tok2.empty()) port = std::stoi(tok2); - } - } + parse_bind_and_port(rest, bind_addr, port); + if (callbacks.http_start) { output = callbacks.http_start(port, bind_addr); - std::string host; - int actual_port = 0; - if (xsql::thinclient::extract_http_start_endpoint(output, host, actual_port)) { - const std::string clipboard_text = - xsql::thinclient::build_http_clipboard_payload("idasql", host, actual_port); - (void)xsql::thinclient::try_copy_text_to_clipboard_windows(clipboard_text); - } + auto nl = output.find('\n'); + const std::string clipboard_text = (nl != std::string::npos) ? output.substr(0, nl) : output; + (void)xsql::thinclient::try_copy_text_to_clipboard_windows(clipboard_text); } else { output = "HTTP server not available"; } - } - else if (subargs == "stop") { + } else if (subargs == "stop") { if (callbacks.http_stop) { output = callbacks.http_stop(); } else { output = "HTTP server not available"; } - } - else if (subargs == "help") { + } else if (subargs == "help") { + const std::string example = idasql::format_query_curl_example("http://127.0.0.1:"); output = "HTTP Server Commands:\n" " .http Show status, start if not running\n" - " .http start [bind] [port] Start HTTP server (default: 127.0.0.1, random port)\n" + " .http start [bind] [port] Start HTTP server (default: 127.0.0.1, random port)\n" " .http stop Stop HTTP server\n" " .http help Show this help\n" "\n" @@ -301,207 +230,23 @@ inline CommandResult handle_command( " GET /status Health check\n" " POST /shutdown Stop server\n" "\n" + "Schema discovery:\n" + " SELECT name, type FROM sqlite_master WHERE type IN ('table','view') ORDER BY type, name;\n" + " PRAGMA table_info(funcs);\n" + "\n" "Example:\n" - " curl -X POST http://127.0.0.1:/query -d \"SELECT name FROM funcs LIMIT 5\"\n"; - } - else { - output = "Unknown HTTP command: " + subargs + "\nUse '.http help' for available commands."; - } - return CommandResult::HANDLED; - } - - // .agent commands - if (input.rfind(".agent", 0) == 0) { -#ifdef IDASQL_HAS_AI_AGENT - std::string subargs = input.length() > 6 ? input.substr(6) : ""; - // Trim leading whitespace - size_t start = subargs.find_first_not_of(" \t"); - if (start != std::string::npos) - subargs = subargs.substr(start); - - // Parse subcmd and value - std::string subcmd, value; - size_t space = subargs.find(' '); - if (space != std::string::npos) { - subcmd = subargs.substr(0, space); - value = subargs.substr(space + 1); - size_t val_start = value.find_first_not_of(" \t"); - if (val_start != std::string::npos) - value = value.substr(val_start); + " " + example + "\n"; } else { - subcmd = subargs; - } - - auto settings = LoadAgentSettings(); - std::string provider_name = libagents::provider_type_name(settings.default_provider); - - if (subcmd.empty() || subcmd == "help") { - output = "Agent Commands:\n" - " .agent help Show this help\n" - " .agent provider Show current provider\n" - " .agent provider NAME Switch provider (claude, copilot)\n" - " .agent clear Clear conversation\n" - " .agent timeout Show response timeout\n" - " .agent timeout MS Set response timeout in milliseconds\n" - " .agent byok Show BYOK status\n" - " .agent byok enable Enable BYOK\n" - " .agent byok disable Disable BYOK\n" - " .agent byok key VALUE Set API key\n" - " .agent byok endpoint URL Set API endpoint\n" - " .agent byok model NAME Set model name\n" - " .agent byok type TYPE Set provider type (openai, anthropic, azure)\n" - "\nCurrent provider: " + provider_name + "\n"; - } - else if (subcmd == "provider") { - if (value.empty()) { - output = "Current provider: " + provider_name + "\n" - "\nAvailable providers:\n" - " claude - Claude Code (Anthropic)\n" - " copilot - GitHub Copilot\n"; - } else { - try { - auto type = ParseProviderType(value); - settings.default_provider = type; - SaveAgentSettings(settings); - output = "Provider set to: " + std::string(libagents::provider_type_name(type)) + - " (saved to settings)\n" - "Note: Restart agent session for changes to take effect.\n"; - } catch (const std::exception& e) { - output = std::string("Error: ") + e.what() + "\n" - "Available providers: claude, copilot\n"; - } - } - } - else if (subcmd == "clear") { - if (callbacks.clear_session) { - output = callbacks.clear_session(); - } else { - output = "Session cleared"; - } - } - else if (subcmd == "timeout") { - if (value.empty()) { - output = "Response timeout: " + std::to_string(settings.response_timeout_ms) + " ms (" + - std::to_string(settings.response_timeout_ms / 1000) + " seconds)\n"; - } else { - try { - int ms = std::stoi(value); - if (ms < 1000) { - output = "Error: Timeout must be at least 1000 ms (1 second).\n"; - } else { - settings.response_timeout_ms = ms; - SaveAgentSettings(settings); - output = "Timeout set to " + std::to_string(ms) + " ms (" + - std::to_string(ms / 1000) + " seconds).\n"; - } - } catch (...) { - output = "Error: Invalid timeout value. Use milliseconds.\n"; - } - } - } - else if (subcmd == "byok") { - // Parse BYOK subcommand - std::string byok_subcmd, byok_value; - size_t byok_space = value.find(' '); - if (byok_space != std::string::npos) { - byok_subcmd = value.substr(0, byok_space); - byok_value = value.substr(byok_space + 1); - size_t bv_start = byok_value.find_first_not_of(" \t"); - if (bv_start != std::string::npos) - byok_value = byok_value.substr(bv_start); - } else { - byok_subcmd = value; - } - - const BYOKSettings* byok = settings.get_byok(); - - if (byok_subcmd.empty()) { - std::stringstream ss; - ss << "BYOK status for provider '" << provider_name << "':\n"; - if (byok) { - ss << " Enabled: " << (byok->enabled ? "yes" : "no") << "\n" - << " API Key: " << (byok->api_key.empty() ? "(not set)" : "********") << "\n" - << " Endpoint: " << (byok->base_url.empty() ? "(default)" : byok->base_url) << "\n" - << " Model: " << (byok->model.empty() ? "(default)" : byok->model) << "\n" - << " Type: " << (byok->provider_type.empty() ? "(default)" : byok->provider_type) << "\n" - << " Usable: " << (byok->is_usable() ? "yes" : "no") << "\n"; - } else { - ss << " (not configured)\n"; - } - output = ss.str(); - } - else if (byok_subcmd == "enable") { - auto& b = settings.get_or_create_byok(); - b.enabled = true; - SaveAgentSettings(settings); - output = "BYOK enabled for provider '" + provider_name + "'.\n"; - if (b.api_key.empty()) { - output += "Warning: API key not set. Use '.agent byok key ' to set it.\n"; - } - } - else if (byok_subcmd == "disable") { - auto& b = settings.get_or_create_byok(); - b.enabled = false; - SaveAgentSettings(settings); - output = "BYOK disabled for provider '" + provider_name + "'.\n"; - } - else if (byok_subcmd == "key") { - if (byok_value.empty()) { - output = "Error: API key value required.\n" - "Usage: .agent byok key \n"; - } else { - auto& b = settings.get_or_create_byok(); - b.api_key = byok_value; - SaveAgentSettings(settings); - output = "BYOK API key set for provider '" + provider_name + "'.\n"; - } - } - else if (byok_subcmd == "endpoint") { - auto& b = settings.get_or_create_byok(); - b.base_url = byok_value; - SaveAgentSettings(settings); - output = byok_value.empty() ? - "BYOK endpoint cleared (using default).\n" : - "BYOK endpoint set to: " + byok_value + "\n"; - } - else if (byok_subcmd == "model") { - auto& b = settings.get_or_create_byok(); - b.model = byok_value; - SaveAgentSettings(settings); - output = byok_value.empty() ? - "BYOK model cleared (using default).\n" : - "BYOK model set to: " + byok_value + "\n"; - } - else if (byok_subcmd == "type") { - auto& b = settings.get_or_create_byok(); - b.provider_type = byok_value; - SaveAgentSettings(settings); - output = byok_value.empty() ? - "BYOK type cleared (using default).\n" : - "BYOK type set to: " + byok_value + "\n"; - } - else { - output = "Unknown byok subcommand: " + byok_subcmd + "\n" - "Use '.agent byok' to see available commands.\n"; - } - } - else { - output = "Unknown agent subcommand: " + subcmd + "\n" - "Use '.agent help' for available commands.\n"; + output = "Unknown HTTP command: " + subargs + "\nUse '.http help' for available commands."; } -#else - output = "AI agent support not compiled in. Rebuild with -DIDASQL_WITH_AI_AGENT=ON\n"; -#endif return CommandResult::HANDLED; } if (input.rfind(".schema", 0) == 0) { std::string table = input.length() > 8 ? input.substr(8) : ""; - // Trim leading whitespace size_t start = table.find_first_not_of(" \t"); if (start != std::string::npos) { table = table.substr(start); - // Trim trailing whitespace size_t end = table.find_last_not_of(" \t"); if (end != std::string::npos) { table = table.substr(0, end + 1); @@ -522,233 +267,4 @@ inline CommandResult handle_command( return CommandResult::HANDLED; } -/** - * Handle --config CLI commands - * - * @param path Config path like "agent.provider" or "agent.byok.key" - * @param value Value to set (empty = get current value) - * @return tuple - */ -inline std::tuple handle_config_command( - const std::string& path, - const std::string& value) -{ -#ifdef IDASQL_HAS_AI_AGENT - auto settings = LoadAgentSettings(); - std::string provider_name = libagents::provider_type_name(settings.default_provider); - std::stringstream ss; - - // Show all config - if (path.empty()) { - ss << "Settings: " << GetSettingsPath() << "\n\n"; - ss << "agent.provider: " << provider_name << "\n"; - ss << "agent.timeout: " << settings.response_timeout_ms << " ms\n"; - ss << "agent.prompt: " << (settings.custom_prompt.empty() ? "(not set)" : "\"" + settings.custom_prompt + "\"") << "\n"; - ss << "\n"; - - const BYOKSettings* byok = settings.get_byok(); - ss << "agent.byok (" << provider_name << "):\n"; - if (byok) { - ss << " enabled: " << (byok->enabled ? "true" : "false") << "\n"; - ss << " key: " << (byok->api_key.empty() ? "(not set)" : "********") << "\n"; - ss << " endpoint: " << (byok->base_url.empty() ? "(default)" : byok->base_url) << "\n"; - ss << " model: " << (byok->model.empty() ? "(default)" : byok->model) << "\n"; - ss << " type: " << (byok->provider_type.empty() ? "(default)" : byok->provider_type) << "\n"; - } else { - ss << " (not configured)\n"; - } - return {true, ss.str(), 0}; - } - - // Parse path - std::vector parts; - std::string part; - std::istringstream iss(path); - while (std::getline(iss, part, '.')) { - if (!part.empty()) parts.push_back(part); - } - - if (parts.empty() || parts[0] != "agent") { - return {false, "Error: Unknown config path: " + path + "\nUse --config to see available options.\n", 1}; - } - - // agent.* - if (parts.size() == 1) { - // Just "agent" - show agent settings - ss << "agent.provider: " << provider_name << "\n"; - ss << "agent.timeout: " << settings.response_timeout_ms << " ms\n"; - ss << "agent.prompt: " << (settings.custom_prompt.empty() ? "(not set)" : "\"" + settings.custom_prompt + "\"") << "\n"; - return {true, ss.str(), 0}; - } - - std::string key = parts[1]; - - // agent.provider - if (key == "provider") { - if (value.empty()) { - ss << "agent.provider = " << provider_name << "\n"; - } else { - try { - auto type = ParseProviderType(value); - settings.default_provider = type; - SaveAgentSettings(settings); - ss << "agent.provider = " << libagents::provider_type_name(type) << " (saved)\n"; - } catch (const std::exception& e) { - return {false, std::string("Error: ") + e.what() + "\n", 1}; - } - } - return {true, ss.str(), 0}; - } - - // agent.timeout - if (key == "timeout") { - if (value.empty()) { - ss << "agent.timeout = " << settings.response_timeout_ms << " ms\n"; - } else { - try { - int ms = std::stoi(value); - if (ms < 1000) { - return {false, "Error: Timeout must be at least 1000 ms.\n", 1}; - } - settings.response_timeout_ms = ms; - SaveAgentSettings(settings); - ss << "agent.timeout = " << ms << " ms (saved)\n"; - } catch (...) { - return {false, "Error: Invalid timeout value.\n", 1}; - } - } - return {true, ss.str(), 0}; - } - - // agent.prompt - if (key == "prompt") { - if (value.empty()) { - ss << "agent.prompt = " << (settings.custom_prompt.empty() ? "(not set)" : "\"" + settings.custom_prompt + "\"") << "\n"; - } else { - settings.custom_prompt = value; - SaveAgentSettings(settings); - ss << "agent.prompt = \"" << value << "\" (saved)\n"; - } - return {true, ss.str(), 0}; - } - - // agent.byok.* - if (key == "byok") { - // Helper lambda to show BYOK status for a provider - auto show_byok = [&ss](const std::string& pname, const BYOKSettings* byok) { - ss << "agent.byok." << pname << ":\n"; - if (byok) { - ss << " enabled: " << (byok->enabled ? "true" : "false") << "\n"; - ss << " key: " << (byok->api_key.empty() ? "(not set)" : "********") << "\n"; - ss << " endpoint: " << (byok->base_url.empty() ? "(default)" : byok->base_url) << "\n"; - ss << " model: " << (byok->model.empty() ? "(default)" : byok->model) << "\n"; - ss << " type: " << (byok->provider_type.empty() ? "(default)" : byok->provider_type) << "\n"; - } else { - ss << " (not configured)\n"; - } - }; - - // Helper lambda to get/set a BYOK field - auto handle_byok_field = [&](BYOKSettings& byok, const std::string& field, - const std::string& prefix) -> std::tuple { - std::stringstream out; - if (field == "enabled") { - if (value.empty()) { - out << prefix << ".enabled = " << (byok.enabled ? "true" : "false") << "\n"; - } else { - byok.enabled = (value == "true" || value == "1" || value == "yes"); - SaveAgentSettings(settings); - out << prefix << ".enabled = " << (byok.enabled ? "true" : "false") << " (saved)\n"; - } - return {true, out.str(), 0}; - } - if (field == "key") { - if (value.empty()) { - out << prefix << ".key = " << (byok.api_key.empty() ? "(not set)" : "********") << "\n"; - } else { - byok.api_key = value; - SaveAgentSettings(settings); - out << prefix << ".key = ******** (saved)\n"; - } - return {true, out.str(), 0}; - } - if (field == "endpoint") { - if (value.empty()) { - out << prefix << ".endpoint = " << (byok.base_url.empty() ? "(default)" : byok.base_url) << "\n"; - } else { - byok.base_url = value; - SaveAgentSettings(settings); - out << prefix << ".endpoint = " << value << " (saved)\n"; - } - return {true, out.str(), 0}; - } - if (field == "model") { - if (value.empty()) { - out << prefix << ".model = " << (byok.model.empty() ? "(default)" : byok.model) << "\n"; - } else { - byok.model = value; - SaveAgentSettings(settings); - out << prefix << ".model = " << value << " (saved)\n"; - } - return {true, out.str(), 0}; - } - if (field == "type") { - if (value.empty()) { - out << prefix << ".type = " << (byok.provider_type.empty() ? "(default)" : byok.provider_type) << "\n"; - } else { - byok.provider_type = value; - SaveAgentSettings(settings); - out << prefix << ".type = " << value << " (saved)\n"; - } - return {true, out.str(), 0}; - } - return {false, "Error: Unknown BYOK field: " + field + "\n", 1}; - }; - - if (parts.size() == 2) { - // "agent.byok" - show all providers' BYOK status - auto it_claude = settings.byok.find("claude"); - auto it_copilot = settings.byok.find("copilot"); - show_byok("claude", it_claude != settings.byok.end() ? &it_claude->second : nullptr); - ss << "\n"; - show_byok("copilot", it_copilot != settings.byok.end() ? &it_copilot->second : nullptr); - return {true, ss.str(), 0}; - } - - std::string part2 = parts[2]; - - // Check if part2 is a provider name (claude/copilot) or a field name - if (part2 == "claude" || part2 == "copilot") { - // agent.byok. or agent.byok.. - std::string target_provider = part2; - - if (parts.size() == 3) { - // "agent.byok.copilot" - show this provider's BYOK - auto it = settings.byok.find(target_provider); - show_byok(target_provider, it != settings.byok.end() ? &it->second : nullptr); - return {true, ss.str(), 0}; - } - - if (parts.size() == 4) { - // "agent.byok.copilot." - get/set field - std::string field = parts[3]; - auto& byok = settings.byok[target_provider]; - return handle_byok_field(byok, field, "agent.byok." + target_provider); - } - } else { - // agent.byok. - uses current provider - auto& byok = settings.get_or_create_byok(); - return handle_byok_field(byok, part2, "agent.byok"); - } - - return {false, "Error: Unknown config path: " + path + "\n", 1}; - } - - return {false, "Error: Unknown config path: " + path + "\nUse --config to see available options.\n", 1}; - -#else - return {false, "Error: AI agent not compiled in. Rebuild with -DIDASQL_WITH_AI_AGENT=ON\n", 1}; -#endif -} - } // namespace idasql diff --git a/src/common/idasql_version.hpp b/src/common/idasql_version.hpp index e48a5f0..1cca5e9 100644 --- a/src/common/idasql_version.hpp +++ b/src/common/idasql_version.hpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** * idasql_version.hpp - Version constants for IDASQL */ @@ -6,5 +9,5 @@ #define IDASQL_VERSION_MAJOR 0 #define IDASQL_VERSION_MINOR 0 -#define IDASQL_VERSION_PATCH 9 -#define IDASQL_VERSION_STRING "0.0.9" +#define IDASQL_VERSION_PATCH 10 +#define IDASQL_VERSION_STRING "0.0.10" diff --git a/src/common/json_utils.hpp b/src/common/json_utils.hpp index 02da404..1071f9f 100644 --- a/src/common/json_utils.hpp +++ b/src/common/json_utils.hpp @@ -1,40 +1,238 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** - * json_utils.hpp - Minimal JSON escaping helpers + * json_utils.hpp - JSON escaping and query result serialization helpers */ #pragma once -#include -#include +#include + #include +#include namespace idasql { -inline std::string escape_json(const std::string& input) { - std::ostringstream oss; - for (unsigned char c : input) { +inline void append_json_hex_escape(std::string& out, unsigned char byte) { + static const char kHex[] = "0123456789ABCDEF"; + out += "\\u00"; + out.push_back(kHex[(byte >> 4) & 0x0F]); + out.push_back(kHex[byte & 0x0F]); +} + +inline bool is_utf8_continuation(unsigned char ch) { + return (ch & 0xC0U) == 0x80U; +} + +inline bool is_valid_utf8_sequence(std::string_view input, size_t pos, size_t& seq_len) { + seq_len = 0; + if (pos >= input.size()) { + return false; + } + + const unsigned char c0 = static_cast(input[pos]); + if (c0 < 0x80U) { + seq_len = 1; + return true; + } + + if (c0 >= 0xC2U && c0 <= 0xDFU) { + if (pos + 1 >= input.size()) return false; + const unsigned char c1 = static_cast(input[pos + 1]); + if (!is_utf8_continuation(c1)) return false; + seq_len = 2; + return true; + } + + if (c0 == 0xE0U) { + if (pos + 2 >= input.size()) return false; + const unsigned char c1 = static_cast(input[pos + 1]); + const unsigned char c2 = static_cast(input[pos + 2]); + if (c1 < 0xA0U || c1 > 0xBFU || !is_utf8_continuation(c2)) return false; + seq_len = 3; + return true; + } + if ((c0 >= 0xE1U && c0 <= 0xECU) || (c0 >= 0xEEU && c0 <= 0xEFU)) { + if (pos + 2 >= input.size()) return false; + const unsigned char c1 = static_cast(input[pos + 1]); + const unsigned char c2 = static_cast(input[pos + 2]); + if (!is_utf8_continuation(c1) || !is_utf8_continuation(c2)) return false; + seq_len = 3; + return true; + } + if (c0 == 0xEDU) { + if (pos + 2 >= input.size()) return false; + const unsigned char c1 = static_cast(input[pos + 1]); + const unsigned char c2 = static_cast(input[pos + 2]); + if (c1 < 0x80U || c1 > 0x9FU || !is_utf8_continuation(c2)) return false; + seq_len = 3; + return true; + } + + if (c0 == 0xF0U) { + if (pos + 3 >= input.size()) return false; + const unsigned char c1 = static_cast(input[pos + 1]); + const unsigned char c2 = static_cast(input[pos + 2]); + const unsigned char c3 = static_cast(input[pos + 3]); + if (c1 < 0x90U || c1 > 0xBFU + || !is_utf8_continuation(c2) + || !is_utf8_continuation(c3)) { + return false; + } + seq_len = 4; + return true; + } + if (c0 >= 0xF1U && c0 <= 0xF3U) { + if (pos + 3 >= input.size()) return false; + const unsigned char c1 = static_cast(input[pos + 1]); + const unsigned char c2 = static_cast(input[pos + 2]); + const unsigned char c3 = static_cast(input[pos + 3]); + if (!is_utf8_continuation(c1) + || !is_utf8_continuation(c2) + || !is_utf8_continuation(c3)) { + return false; + } + seq_len = 4; + return true; + } + if (c0 == 0xF4U) { + if (pos + 3 >= input.size()) return false; + const unsigned char c1 = static_cast(input[pos + 1]); + const unsigned char c2 = static_cast(input[pos + 2]); + const unsigned char c3 = static_cast(input[pos + 3]); + if (c1 < 0x80U || c1 > 0x8FU + || !is_utf8_continuation(c2) + || !is_utf8_continuation(c3)) { + return false; + } + seq_len = 4; + return true; + } + + return false; +} + +inline void append_json_string(std::string& out, std::string_view input) { + out.push_back('"'); + for (size_t i = 0; i < input.size();) { + const unsigned char c = static_cast(input[i]); switch (c) { - case '\"': oss << "\\\""; break; - case '\\': oss << "\\\\"; break; - case '\b': oss << "\\b"; break; - case '\f': oss << "\\f"; break; - case '\n': oss << "\\n"; break; - case '\r': oss << "\\r"; break; - case '\t': oss << "\\t"; break; + case '"': + out += "\\\""; + ++i; + break; + case '\\': + out += "\\\\"; + ++i; + break; + case '\b': + out += "\\b"; + ++i; + break; + case '\f': + out += "\\f"; + ++i; + break; + case '\n': + out += "\\n"; + ++i; + break; + case '\r': + out += "\\r"; + ++i; + break; + case '\t': + out += "\\t"; + ++i; + break; default: if (c < 0x20) { - oss << "\\u" - << std::hex << std::uppercase << std::setfill('0') - << std::setw(4) << static_cast(c); - // reset flags for subsequent writes - oss << std::dec; + append_json_hex_escape(out, c); + ++i; + break; + } + + if (c < 0x80U) { + out.push_back(static_cast(c)); + ++i; } else { - oss << static_cast(c); + size_t seq_len = 0; + if (is_valid_utf8_sequence(input, i, seq_len)) { + out.append(input.data() + i, seq_len); + i += seq_len; + } else { + // Preserve non-UTF8 byte values as JSON escapes. + append_json_hex_escape(out, c); + ++i; + } } break; } } - return oss.str(); + out.push_back('"'); +} + +inline std::string escape_json(const std::string& input) { + std::string out; + out.reserve(input.size() + 8); + append_json_string(out, input); + if (out.size() < 2) { + return {}; + } + return out.substr(1, out.size() - 2); +} + +inline void append_json_string_array(std::string& out, const std::vector& values) { + out.push_back('['); + for (size_t i = 0; i < values.size(); ++i) { + if (i != 0) out.push_back(','); + append_json_string(out, values[i]); + } + out.push_back(']'); +} + +inline std::string query_result_to_json_safe(const QueryResult& result) { + std::string out; + out.reserve(256); + out += "{\"success\":"; + out += result.success ? "true" : "false"; + + if (result.success) { + out += ",\"columns\":"; + append_json_string_array(out, result.columns); + + out += ",\"rows\":["; + for (size_t i = 0; i < result.rows.size(); ++i) { + if (i != 0) out.push_back(','); + append_json_string_array(out, result.rows[i].values); + } + out.push_back(']'); + + out += ",\"row_count\":"; + out += std::to_string(result.rows.size()); + + if (!result.warnings.empty()) { + out += ",\"warnings\":"; + append_json_string_array(out, result.warnings); + } + if (result.timed_out) { + out += ",\"timed_out\":true"; + } + if (result.partial) { + out += ",\"partial\":true"; + } + if (result.elapsed_ms > 0) { + out += ",\"elapsed_ms\":"; + out += std::to_string(result.elapsed_ms); + } + } else { + out += ",\"error\":"; + append_json_string(out, result.error); + } + + out.push_back('}'); + return out; } } // namespace idasql diff --git a/src/common/mcp_server.cpp b/src/common/mcp_server.cpp index 686cb16..051e9cd 100644 --- a/src/common/mcp_server.cpp +++ b/src/common/mcp_server.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + #include "mcp_server.hpp" #include @@ -6,11 +9,13 @@ #include #include #include +#include #include #include #include #include +#include namespace idasql { @@ -59,13 +64,17 @@ MCPQueueResult IDAMCPServer::queue_and_wait(MCPPendingCommand::Type type, const const auto deadline = std::chrono::steady_clock::now() + std::chrono::milliseconds(timeout_ms); while (!cmd->completed && running_.load()) { if (cmd->started) { - cmd->done_cv.wait_for(lock, std::chrono::milliseconds(100), - [&]() { return cmd->completed || !running_.load(); }); + cmd->done_cv.wait_for( + lock, + std::chrono::milliseconds(100), + [&]() { return cmd->completed || !running_.load(); }); continue; } - if (cmd->done_cv.wait_until(lock, deadline, - [&]() { return cmd->completed || cmd->started || !running_.load(); })) { + if (cmd->done_cv.wait_until( + lock, + deadline, + [&]() { return cmd->completed || cmd->started || !running_.load(); })) { continue; } @@ -93,21 +102,21 @@ MCPQueueResult IDAMCPServer::queue_and_wait(MCPPendingCommand::Type type, const return {false, "Error: MCP request timed out in queue (raise PRAGMA idasql.queue_admission_timeout_ms)"}; } - return {true, cmd->result}; + // Convention: query callbacks return "Error: ..." on failure + bool ok = cmd->result.substr(0, 7) != "Error: "; + return {ok, cmd->result}; } -int IDAMCPServer::start(int port, QueryCallback query_cb, AskCallback ask_cb, +int IDAMCPServer::start(int port, QueryCallback query_cb, const std::string& bind_addr, bool use_queue) { if (running_.load()) { return port_; } query_cb_ = query_cb; - ask_cb_ = ask_cb; bind_addr_ = bind_addr; use_queue_.store(use_queue); - // If port is 0, pick a random port in the 9000-9999 range if (port == 0) { std::random_device rd; std::mt19937 gen(rd()); @@ -117,7 +126,6 @@ int IDAMCPServer::start(int port, QueryCallback query_cb, AskCallback ask_cb, impl_ = std::make_unique(); - // Register idasql_query tool - direct SQL execution Json query_input_schema = { {"type", "object"}, {"properties", { @@ -129,18 +137,13 @@ int IDAMCPServer::start(int port, QueryCallback query_cb, AskCallback ask_cb, {"required", Json::array({"query"})} }; - Json query_output_schema = { - {"type", "object"}, - {"properties", { - {"result", {{"type", "string"}}}, - {"success", {{"type", "boolean"}}} - }} - }; - + // No outputSchema — the tool returns text content, not structuredContent. + // Declaring an outputSchema without returning structuredContent violates + // the MCP spec and causes the official Python SDK to reject responses. fastmcpp::tools::Tool sql_query_tool{ "idasql_query", query_input_schema, - query_output_schema, + Json(), [this](const Json& args) -> Json { std::string query = args.value("query", ""); if (query.empty()) { @@ -156,12 +159,10 @@ int IDAMCPServer::start(int port, QueryCallback query_cb, AskCallback ask_cb, bool success = true; if (use_queue_.load()) { - // Queue mode (CLI): queue command for main thread execution auto qr = queue_and_wait(MCPPendingCommand::Type::Query, query); result = qr.payload; success = qr.success; } else { - // Direct mode (plugin): callback uses execute_sync internally if (!query_cb_) { return Json{ {"content", Json::array({ @@ -171,9 +172,12 @@ int IDAMCPServer::start(int port, QueryCallback query_cb, AskCallback ask_cb, }; } result = query_cb_(query); + // Convention: query callbacks return "Error: ..." on failure + if (result.substr(0, 7) == "Error: ") { + success = false; + } } - // MCP tools/call expects content array format return Json{ {"content", Json::array({ Json{{"type", "text"}, {"text", result}} @@ -185,74 +189,9 @@ int IDAMCPServer::start(int port, QueryCallback query_cb, AskCallback ask_cb, sql_query_tool.set_description("Execute a SQL query against the IDA database and return results"); impl_->tool_manager.register_tool(sql_query_tool); - // Register idasql_agent tool - natural language query (if ask_cb provided) - if (ask_cb_) { - Json ask_input_schema = { - {"type", "object"}, - {"properties", { - {"question", { - {"type", "string"}, - {"description", "Natural language question about the binary (e.g., 'What functions call malloc?')"} - }} - }}, - {"required", Json::array({"question"})} - }; - - Json ask_output_schema = { - {"type", "object"}, - {"properties", { - {"response", {{"type", "string"}}}, - {"success", {{"type", "boolean"}}} - }} - }; - - fastmcpp::tools::Tool agent_ask_tool{ - "idasql_agent", - ask_input_schema, - ask_output_schema, - [this](const Json& args) -> Json { - std::string question = args.value("question", ""); - if (question.empty()) { - return Json{ - {"content", Json::array({ - Json{{"type", "text"}, {"text", "Error: missing question"}} - })}, - {"isError", true} - }; - } - - std::string result; - bool success = true; - - if (use_queue_.load()) { - // Queue mode (CLI): queue command for main thread execution - auto qr = queue_and_wait(MCPPendingCommand::Type::Ask, question); - result = qr.payload; - success = qr.success; - } else { - // Direct mode (plugin): callback handles thread safety - result = ask_cb_(question); - } - - return Json{ - {"content", Json::array({ - Json{{"type", "text"}, {"text", result}} - })}, - {"isError", !success} - }; - } - }; - agent_ask_tool.set_description("Ask a natural language question about the binary - AI translates to SQL and returns results"); - impl_->tool_manager.register_tool(agent_ask_tool); - } - - // Create MCP handler std::unordered_map descriptions = { {"idasql_query", "Execute a SQL query against the IDA database and return results"} }; - if (ask_cb_) { - descriptions["idasql_agent"] = "Ask a natural language question about the binary - AI translates to SQL and returns results"; - } auto handler = fastmcpp::mcp::make_mcp_handler( "idasql", @@ -261,7 +200,6 @@ int IDAMCPServer::start(int port, QueryCallback query_cb, AskCallback ask_cb, descriptions ); - // Create and start SSE server impl_->server = std::make_unique( handler, bind_addr_, @@ -277,7 +215,6 @@ int IDAMCPServer::start(int port, QueryCallback query_cb, AskCallback ask_cb, port_ = impl_->server->port(); running_.store(true); - return port_; } @@ -293,11 +230,12 @@ void IDAMCPServer::run_until_stopped() { } std::shared_ptr cmd; - { std::unique_lock lock(queue_mutex_); - if (queue_cv_.wait_for(lock, std::chrono::milliseconds(100), - [this]() { return !pending_commands_.empty() || !running_.load(); })) { + if (queue_cv_.wait_for( + lock, + std::chrono::milliseconds(100), + [this]() { return !pending_commands_.empty() || !running_.load(); })) { if (!pending_commands_.empty()) { cmd = pending_commands_.front(); pending_commands_.pop_front(); @@ -305,45 +243,45 @@ void IDAMCPServer::run_until_stopped() { } } - if (cmd) { - bool should_execute = false; - { - std::lock_guard lock(cmd->done_mutex); - if (!cmd->completed && !cmd->canceled) { - cmd->started = true; - should_execute = true; - } else if (!cmd->completed && cmd->canceled) { - cmd->completed = true; - } - } + if (!cmd) { + continue; + } - if (!should_execute) { - cmd->done_cv.notify_one(); - continue; + bool should_execute = false; + { + std::lock_guard lock(cmd->done_mutex); + if (!cmd->completed && !cmd->canceled) { + cmd->started = true; + should_execute = true; + } else if (!cmd->completed && cmd->canceled) { + cmd->completed = true; } + } - std::string result; - try { - if (cmd->type == MCPPendingCommand::Type::Query && query_cb_) { - result = query_cb_(cmd->input); - } else if (cmd->type == MCPPendingCommand::Type::Ask && ask_cb_) { - result = ask_cb_(cmd->input); - } else { - result = "Error: No handler for command type"; - } - } catch (const std::exception& e) { - result = std::string("Error: ") + e.what(); + if (!should_execute) { + cmd->done_cv.notify_one(); + continue; + } + + std::string result; + try { + if (cmd->type == MCPPendingCommand::Type::Query && query_cb_) { + result = query_cb_(cmd->input); + } else { + result = "Error: No handler for command type"; } + } catch (const std::exception& e) { + result = std::string("Error: ") + e.what(); + } - { - std::lock_guard lock(cmd->done_mutex); - if (!cmd->completed) { - cmd->result = std::move(result); - cmd->completed = true; - } + { + std::lock_guard lock(cmd->done_mutex); + if (!cmd->completed) { + cmd->result = std::move(result); + cmd->completed = true; } - cmd->done_cv.notify_one(); } + cmd->done_cv.notify_one(); } } @@ -386,39 +324,29 @@ void IDAMCPServer::complete_pending_commands(const std::string& result) { std::string IDAMCPServer::url() const { std::ostringstream ss; - ss << "http://" << bind_addr_ << ":" << port_; + ss << "http://" << xsql::thinclient::format_url_host(bind_addr_) << ":" << port_; return ss.str(); } -std::string format_mcp_info(int port, bool has_agent) { - std::ostringstream ss; - ss << "MCP server started on port " << port << "\n"; - ss << "SSE endpoint: http://127.0.0.1:" << port << "/sse\n\n"; - - ss << "Available tools:\n"; - ss << " idasql_query - Execute SQL query directly\n"; - if (has_agent) { - ss << " idasql_agent - Ask natural language question (AI-powered)\n"; - } - ss << "\n"; - - ss << "Add to Claude Desktop config:\n"; - ss << "{\n"; - ss << " \"mcpServers\": {\n"; - ss << " \"idasql\": {\n"; - ss << " \"url\": \"http://127.0.0.1:" << port << "/sse\"\n"; - ss << " }\n"; - ss << " }\n"; - ss << "}\n"; +std::string format_mcp_info(int port) { + return format_mcp_info(port, "127.0.0.1"); +} - return ss.str(); +std::string format_mcp_info(int port, const std::string& bind_addr) { + const std::string rendered_host = xsql::thinclient::format_url_host(bind_addr); + return "IDASQL MCP server: http://" + rendered_host + ":" + std::to_string(port) + "/sse\n"; } std::string format_mcp_status(int port, bool running) { + return format_mcp_status(port, running, "127.0.0.1"); +} + +std::string format_mcp_status(int port, bool running, const std::string& bind_addr) { std::ostringstream ss; + const std::string rendered_host = xsql::thinclient::format_url_host(bind_addr); if (running) { ss << "MCP server running on port " << port << "\n"; - ss << "SSE endpoint: http://127.0.0.1:" << port << "/sse\n"; + ss << "SSE endpoint: http://" << rendered_host << ":" << port << "/sse\n"; } else { ss << "MCP server not running\n"; ss << "Use '.mcp start' to start\n"; diff --git a/src/common/mcp_server.hpp b/src/common/mcp_server.hpp index 97b255a..c54de5a 100644 --- a/src/common/mcp_server.hpp +++ b/src/common/mcp_server.hpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + #pragma once /** @@ -9,34 +12,28 @@ * Tool handlers queue commands for execution on the main thread. * * Usage modes: - * 1. CLI (idalib): Call wait() to process commands on main thread - * 2. Plugin: Use execute_sync() wrapper in callbacks (no wait() needed) - * - * For CLI, start() returns immediately. Call wait() to block and process - * commands. For plugin, the callback itself uses execute_sync() to marshal - * to IDA's main thread, so no wait() is needed. + * 1. CLI (idalib): Call run_until_stopped() to process commands on main thread + * 2. Plugin: Use execute_sync() wrapper in callbacks (no run_until_stopped() needed) */ -#include -#include #include -#include #include #include +#include #include +#include +#include namespace idasql { -// Callbacks for handling requests -// QueryCallback: Direct SQL execution -// AskCallback: Natural language query (requires AI agent) +// SQL callback for handling requests. using QueryCallback = std::function; -using AskCallback = std::function; -// Internal command structure for cross-thread execution +// Internal command structure for cross-thread execution. struct MCPPendingCommand { - enum class Type { Query, Ask }; - Type type; + enum class Type { Query }; + + Type type = Type::Query; std::string input; std::string result; bool started = false; @@ -61,54 +58,57 @@ class IDAMCPServer { IDAMCPServer& operator=(const IDAMCPServer&) = delete; /** - * Start MCP server on given port with callbacks + * Start MCP server on given port with callback. * * @param port Port to listen on (0 = random port 9000-9999) * @param query_cb SQL query callback - * @param ask_cb Natural language callback (optional) * @param bind_addr Address to bind to (default: localhost only) * @param use_queue If true, callbacks are queued for main thread (CLI mode) * If false, callbacks called directly (plugin mode with execute_sync) * @return Actual port used, or -1 on failure */ - int start(int port, QueryCallback query_cb, AskCallback ask_cb = nullptr, + int start(int port, QueryCallback query_cb, const std::string& bind_addr = "127.0.0.1", bool use_queue = false); /** - * Block until server stops, processing commands on the calling thread - * Only needed when use_queue=true (CLI mode) - * This is where query_cb and ask_cb get called + * Block until server stops, processing commands on the calling thread. + * Only needed when use_queue=true (CLI mode). */ void run_until_stopped(); /** - * Stop the server + * Stop the server. */ void stop(); /** - * Check if server is running + * Check if server is running. */ bool is_running() const { return running_.load(); } /** - * Get the port the server is listening on + * Get the port the server is listening on. */ int port() const { return port_; } /** - * Get the SSE endpoint URL + * Get the SSE endpoint URL. */ std::string url() const; /** - * Set interrupt check function (called during wait loop) + * Get bind address configured at startup. + */ + const std::string& bind_addr() const { return bind_addr_; } + + /** + * Set interrupt check function (called during wait loop). */ void set_interrupt_check(std::function check); /** - * Queue a command for execution on the main thread - * Called by MCP tool handlers when use_queue=true + * Queue a command for execution on the main thread. + * Called by MCP tool handlers when use_queue=true. */ MCPQueueResult queue_and_wait(MCPPendingCommand::Type type, const std::string& input); @@ -124,11 +124,10 @@ class IDAMCPServer { std::condition_variable queue_cv_; std::deque> pending_commands_; - // Callbacks stored for execution + // Callback stored for execution. QueryCallback query_cb_; - AskCallback ask_cb_; - // Forward declaration - impl hides fastmcpp + // Forward declaration - impl hides fastmcpp. class Impl; std::unique_ptr impl_; @@ -136,13 +135,15 @@ class IDAMCPServer { }; /** - * Format MCP server info for display + * Format MCP server info for display. */ -std::string format_mcp_info(int port, bool has_agent); +std::string format_mcp_info(int port); +std::string format_mcp_info(int port, const std::string& bind_addr); /** - * Format MCP server status + * Format MCP server status. */ std::string format_mcp_status(int port, bool running); +std::string format_mcp_status(int port, bool running, const std::string& bind_addr); } // namespace idasql diff --git a/src/common/plugin_control.hpp b/src/common/plugin_control.hpp index 229e65f..d553962 100644 --- a/src/common/plugin_control.hpp +++ b/src/common/plugin_control.hpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** * plugin_control.hpp - Plugin control codes used by IDASQL */ diff --git a/src/common/session_handler.hpp b/src/common/session_handler.hpp index 9894266..46544e6 100644 --- a/src/common/session_handler.hpp +++ b/src/common/session_handler.hpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + #pragma once /** @@ -8,53 +11,35 @@ * This class handles: * - SQL query execution * - Meta commands (.tables, .schema, .help) - * - Natural language queries via AI agent (when enabled) - * - Multi-turn conversation state * * NO IDA DEPENDENCIES - can be tested standalone. - * - * Used by: - * - CLI main.cpp (directly) - * - IdasqlCLI (wraps this for cli_t) */ -#include -#include -#include -#include #include +#include +#include +#include #include "idasql_commands.hpp" -#ifdef IDASQL_HAS_AI_AGENT -#include "ai_agent.hpp" -#endif - namespace idasql { -class SessionHandler -{ +class SessionHandler { public: using SqlExecutor = std::function; // Simple allowlist for table identifiers (alnum + underscore) - static bool is_safe_table_name(const std::string& name) - { - if (name.empty() || name.size() > 128) return false; + static bool is_safe_table_name(const std::string& name) { + if (name.empty() || name.size() > 128) { + return false; + } return std::all_of(name.begin(), name.end(), [](unsigned char c) { return std::isalnum(c) || c == '_'; }); } - /** - * Create a session handler - * @param executor Function to execute SQL and return formatted results - * @param enable_agent Whether to enable AI agent (if available) - */ - explicit SessionHandler(SqlExecutor executor, bool enable_agent = false) - : executor_(std::move(executor)) - { - // Setup command callbacks + explicit SessionHandler(SqlExecutor executor) + : executor_(std::move(executor)) { callbacks_.get_tables = [this]() { return executor_("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"); }; @@ -68,23 +53,9 @@ class SessionHandler callbacks_.get_info = [this]() { return executor_("PRAGMA database_list"); }; - callbacks_.clear_session = [this]() { - return clear_session(); - }; - -#ifdef IDASQL_HAS_AI_AGENT - if (enable_agent && AIAgent::is_available()) { - agent_ = std::make_unique(executor_); - agent_->start(); - agent_enabled_ = true; - } -#else - (void)enable_agent; // Suppress unused warning -#endif } - ~SessionHandler() - { + ~SessionHandler() { end_session(); } @@ -94,18 +65,11 @@ class SessionHandler SessionHandler(SessionHandler&&) = default; SessionHandler& operator=(SessionHandler&&) = default; - /** - * Process a line of input - * @param line User input (SQL, meta command, or natural language) - * @return Result string, or empty if no output - */ - std::string process_line(const std::string& line) - { + std::string process_line(const std::string& line) { if (line.empty()) { return ""; } - // Check for meta commands first std::string output; auto cmd_result = handle_command(line, callbacks_, output); @@ -118,97 +82,29 @@ class SessionHandler return output; case CommandResult::NOT_HANDLED: - // Continue to process as query break; } -#ifdef IDASQL_HAS_AI_AGENT - // If AI agent is enabled and input doesn't look like SQL, use agent - if (agent_enabled_ && agent_ && !AIAgent::looks_like_sql(line)) { - return agent_->query(line); - } -#endif - - // Execute as raw SQL return executor_(line); } - /** - * One-shot query (no session, no conversation history) - */ - std::string query(const std::string& prompt) - { -#ifdef IDASQL_HAS_AI_AGENT - if (agent_enabled_ && agent_) { - return agent_->query(prompt); - } -#endif - // Fallback: treat as SQL + std::string query(const std::string& prompt) { return executor_(prompt); } - /** - * End the session (cleanup agent) - */ - void end_session() - { -#ifdef IDASQL_HAS_AI_AGENT - if (agent_) { - agent_->stop(); - agent_.reset(); - } -#endif - agent_enabled_ = false; + void end_session() { + // No stateful session resources to clean up in slim mode. } - bool is_agent_enabled() const { return agent_enabled_; } bool is_quit_requested() const { return quit_requested_; } - /** - * Clear/reset the session - * Resets AI agent conversation history if enabled. - * Override the callback to add UI-specific behavior (e.g., msg_clear). - * - * @return Status message - */ - virtual std::string clear_session() - { -#ifdef IDASQL_HAS_AI_AGENT - if (agent_) { - agent_->reset_session(); - return "Session cleared (conversation history reset)"; - } -#endif - return "Session cleared"; - } - - /** - * Get command callbacks (for overriding in derived classes) - */ CommandCallbacks& callbacks() { return callbacks_; } const CommandCallbacks& callbacks() const { return callbacks_; } - /** - * Check if AI agent is available on this system - */ - static bool is_agent_available() - { -#ifdef IDASQL_HAS_AI_AGENT - return AIAgent::is_available(); -#else - return false; -#endif - } - private: SqlExecutor executor_; CommandCallbacks callbacks_; - bool agent_enabled_ = false; bool quit_requested_ = false; - -#ifdef IDASQL_HAS_AI_AGENT - std::unique_ptr agent_; -#endif }; } // namespace idasql diff --git a/src/common/welcome_query.cpp b/src/common/welcome_query.cpp new file mode 100644 index 0000000..918099c --- /dev/null +++ b/src/common/welcome_query.cpp @@ -0,0 +1,20 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +#include "welcome_query.hpp" + +namespace idasql { + +const char* default_welcome_query() { + return "SELECT * FROM welcome"; +} + +std::string format_query_curl_example(const std::string& base_url) { + return "curl -X POST " + base_url + "/query -d \"" + std::string(default_welcome_query()) + "\""; +} + +std::string format_http_clipboard_payload(const std::string& base_url) { + return "IDASQL HTTP server: " + base_url; +} + +} // namespace idasql diff --git a/src/common/welcome_query.hpp b/src/common/welcome_query.hpp new file mode 100644 index 0000000..89c779a --- /dev/null +++ b/src/common/welcome_query.hpp @@ -0,0 +1,14 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +namespace idasql { + +const char* default_welcome_query(); +std::string format_query_curl_example(const std::string& base_url); +std::string format_http_clipboard_payload(const std::string& base_url); + +} // namespace idasql diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index 89a3267..871ba24 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -1,17 +1,50 @@ # libidasql - SQL interface to IDA databases -# -# This is a header-only library that extends xsql with IDA-specific tables. -# Users link against this target to get the include paths. -add_library(idasql INTERFACE) +add_library(idasql STATIC + src/database.cpp + src/entities.cpp + src/entities_ext.cpp + src/entities_dbg.cpp + src/entities_search.cpp + src/entities_types.cpp + src/functions.cpp + src/decompiler.cpp + src/disassembly.cpp + src/search_bytes.cpp + src/idapython_exec.cpp + src/metadata.cpp + src/metadata_welcome.cpp + src/ui_context_provider.cpp +) + +target_include_directories(idasql + PUBLIC + $ + $ + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/src +) -target_include_directories(idasql INTERFACE - $ - $ +target_compile_definitions(idasql PUBLIC + USE_DANGEROUS_FUNCTIONS + USE_STANDARD_FILE_FUNCTIONS + USE_IDA_SDK ) -# Link against xsql::xsql for SQLite virtual table framework -target_link_libraries(idasql INTERFACE xsql::xsql) +if(WIN32) + target_compile_options(idasql PRIVATE /bigobj) + target_compile_definitions(idasql PRIVATE + WIN32_LEAN_AND_MEAN + NOMINMAX + ) +endif() + +# Avoid name collision with idasql_plugin's import lib (idasql.lib) +set_target_properties(idasql PROPERTIES OUTPUT_NAME "libidasql") + +target_precompile_headers(idasql PRIVATE src/ida_headers.hpp) + +target_link_libraries(idasql PUBLIC xsql::xsql idasdk::idalib) # Export the library target # install(TARGETS idasql EXPORT idasqlTargets) diff --git a/src/lib/include/idasql/database.hpp b/src/lib/include/idasql/database.hpp index 656816d..89200d4 100644 --- a/src/lib/include/idasql/database.hpp +++ b/src/lib/include/idasql/database.hpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** * database.hpp - IDASQL API * @@ -28,36 +31,13 @@ #include #include #include +#include #include #include #include -#include -#include #include -// IDA SDK -#include -#include -#include -#include -#include - -// IDASQL components -#include -#include -#include -#include -#include -#include -#include -#include - -// Optional: Decompiler (may not be available) -#ifdef USE_HEXRAYS -#include -#endif - namespace idasql { // ============================================================================ @@ -163,15 +143,12 @@ struct QueryResult { */ class QueryEngine { public: - QueryEngine() { - init(); - } - - ~QueryEngine() = default; + QueryEngine(); + ~QueryEngine(); // Moveable but not copyable - QueryEngine(QueryEngine&&) noexcept = default; - QueryEngine& operator=(QueryEngine&&) noexcept = default; + QueryEngine(QueryEngine&&) noexcept; + QueryEngine& operator=(QueryEngine&&) noexcept; QueryEngine(const QueryEngine&) = delete; QueryEngine& operator=(const QueryEngine&) = delete; @@ -179,120 +156,38 @@ class QueryEngine { /** * Execute SQL and return results */ - QueryResult query(const std::string& sql) { - return query(sql.c_str()); - } - - QueryResult query(const char* sql) { - QueryResult result; - - if (!db_.is_open()) { - result.error = "QueryEngine not initialized"; - return result; - } - - if (handle_runtime_pragma(sql, result)) { - error_ = result.success ? "" : result.error; - return result; - } - - xsql::QueryOptions options; - options.timeout_ms = runtime_settings().query_timeout_ms(); - xsql::Result raw = db_.query(sql, options); - result.columns = std::move(raw.columns); - result.rows.reserve(raw.rows.size()); - for (auto& raw_row : raw.rows) { - Row row; - row.values = std::move(raw_row.values); - result.rows.push_back(std::move(row)); - } - result.error = std::move(raw.error); - result.warnings = std::move(raw.warnings); - result.timed_out = raw.timed_out; - result.partial = raw.partial; - result.elapsed_ms = raw.elapsed_ms; - append_query_hints(sql ? std::string(sql) : std::string(), result); - result.success = result.error.empty(); - error_ = result.success ? "" : result.error; - - return result; - } + QueryResult query(const std::string& sql) { return query(sql.c_str()); } + QueryResult query(const char* sql); /** * Execute SQL, ignoring rows */ - xsql::Status exec(const char* sql) { - if (!db_.is_open()) { - error_ = "QueryEngine not initialized"; - return xsql::Status::error; - } - - QueryResult pragma_result; - if (handle_runtime_pragma(sql, pragma_result)) { - error_ = pragma_result.success ? "" : pragma_result.error; - return pragma_result.success ? xsql::Status::ok : xsql::Status::error; - } - - xsql::Status rc = db_.exec(sql); - error_ = db_.last_error(); - return rc; - } + xsql::Status exec(const char* sql); /** * Execute SQL, ignore results (for INSERT/UPDATE/DELETE) */ - bool execute(const char* sql) { - return xsql::is_ok(exec(sql)); - } + bool execute(const char* sql); /** * Execute multi-statement SQL script and collect statement results. */ bool execute_script(const std::string& script, std::vector& results, - std::string& error) { - if (!db_.is_open()) { - error_ = "QueryEngine not initialized"; - error = error_; - return false; - } - - bool ok = db_.execute_script(script, results, error); - error_ = ok ? "" : error; - return ok; - } + std::string& error); /** * Export tables to a SQL file. */ bool export_tables(const std::vector& tables, const std::string& output_path, - std::string& error) { - if (!db_.is_open()) { - error_ = "QueryEngine not initialized"; - error = error_; - return false; - } - - bool ok = db_.export_tables(tables, output_path, error); - error_ = ok ? "" : error; - return ok; - } + std::string& error); /** * Get single value (first column of first row) */ - std::string scalar(const std::string& sql) { - return scalar(sql.c_str()); - } - - std::string scalar(const char* sql) { - auto result = query(sql); - if (result.success && !result.empty()) { - return result.rows[0].values[0]; - } - return ""; - } + std::string scalar(const std::string& sql) { return scalar(sql.c_str()); } + std::string scalar(const char* sql); /** * Get last error message @@ -312,249 +207,15 @@ class QueryEngine { const xsql::Database& database() const { return db_; } private: - static std::string trim_copy(const std::string& s) { - size_t begin = 0; - while (begin < s.size() && std::isspace(static_cast(s[begin]))) { - ++begin; - } - size_t end = s.size(); - while (end > begin && std::isspace(static_cast(s[end - 1]))) { - --end; - } - return s.substr(begin, end - begin); - } - - static std::string to_lower_copy(std::string value) { - for (char& c : value) { - c = static_cast(std::tolower(static_cast(c))); - } - return value; - } - - static std::string strip_optional_quotes(const std::string& s) { - if (s.size() >= 2) { - char a = s.front(); - char b = s.back(); - if ((a == '\'' && b == '\'') || (a == '"' && b == '"')) { - return s.substr(1, s.size() - 2); - } - } - return s; - } - - static bool parse_int_value(const std::string& text, int& value) { - try { - size_t consumed = 0; - long long parsed = std::stoll(text, &consumed, 10); - if (consumed != text.size()) { - return false; - } - if (parsed < (std::numeric_limits::min)() || - parsed > (std::numeric_limits::max)()) { - return false; - } - value = static_cast(parsed); - return true; - } catch (...) { - return false; - } - } - - static bool parse_bool_value(const std::string& text, bool& value) { - const std::string lower = to_lower_copy(trim_copy(text)); - if (lower == "1" || lower == "on" || lower == "true" || lower == "yes") { - value = true; - return true; - } - if (lower == "0" || lower == "off" || lower == "false" || lower == "no") { - value = false; - return true; - } - return false; - } - - static QueryResult make_pragma_result(const std::string& key, const std::string& value) { - QueryResult result; - result.columns = {"name", "value"}; - Row row; - row.values = {key, value}; - result.rows.push_back(std::move(row)); - result.success = true; - return result; - } - - static QueryResult make_pragma_error(const std::string& error) { - QueryResult result; - result.success = false; - result.error = error; - return result; - } - - bool handle_runtime_pragma(const char* sql, QueryResult& out) { - if (sql == nullptr) { - return false; - } - - std::string text = trim_copy(sql); - if (text.empty()) { - return false; - } - if (!text.empty() && text.back() == ';') { - text.pop_back(); - text = trim_copy(text); - } - - std::string lower = to_lower_copy(text); - const std::string pragma_prefix = "pragma"; - if (lower.rfind(pragma_prefix, 0) != 0) { - return false; - } - - std::string body = trim_copy(text.substr(pragma_prefix.size())); - std::string body_lower = to_lower_copy(body); - const std::string idasql_prefix = "idasql."; - if (body_lower.rfind(idasql_prefix, 0) != 0) { - return false; - } - - std::string key_expr = trim_copy(body.substr(idasql_prefix.size())); - std::string value_expr; - size_t eq_pos = key_expr.find('='); - if (eq_pos != std::string::npos) { - value_expr = trim_copy(key_expr.substr(eq_pos + 1)); - key_expr = trim_copy(key_expr.substr(0, eq_pos)); - value_expr = strip_optional_quotes(value_expr); - } - - const std::string key = to_lower_copy(key_expr); - auto& settings = runtime_settings(); - - if (key == "query_timeout_ms") { - if (value_expr.empty()) { - out = make_pragma_result("query_timeout_ms", std::to_string(settings.query_timeout_ms())); - return true; - } - int timeout_ms = 0; - if (!parse_int_value(value_expr, timeout_ms) || !settings.set_query_timeout_ms(timeout_ms)) { - out = make_pragma_error("Invalid idasql.query_timeout_ms value"); - return true; - } - out = make_pragma_result("query_timeout_ms", std::to_string(settings.query_timeout_ms())); - return true; - } - - if (key == "queue_admission_timeout_ms") { - if (value_expr.empty()) { - out = make_pragma_result("queue_admission_timeout_ms", - std::to_string(settings.queue_admission_timeout_ms())); - return true; - } - int timeout_ms = 0; - if (!parse_int_value(value_expr, timeout_ms) || - !settings.set_queue_admission_timeout_ms(timeout_ms)) { - out = make_pragma_error("Invalid idasql.queue_admission_timeout_ms value"); - return true; - } - out = make_pragma_result("queue_admission_timeout_ms", - std::to_string(settings.queue_admission_timeout_ms())); - return true; - } - - if (key == "max_queue") { - if (value_expr.empty()) { - out = make_pragma_result("max_queue", std::to_string(settings.max_queue())); - return true; - } - int queue_limit = 0; - if (!parse_int_value(value_expr, queue_limit) || queue_limit < 0 || - !settings.set_max_queue(static_cast(queue_limit))) { - out = make_pragma_error("Invalid idasql.max_queue value"); - return true; - } - out = make_pragma_result("max_queue", std::to_string(settings.max_queue())); - return true; - } - - if (key == "hints_enabled") { - if (value_expr.empty()) { - out = make_pragma_result("hints_enabled", settings.hints_enabled() ? "1" : "0"); - return true; - } - bool enabled = false; - if (!parse_bool_value(value_expr, enabled)) { - out = make_pragma_error("Invalid idasql.hints_enabled value"); - return true; - } - settings.set_hints_enabled(enabled); - out = make_pragma_result("hints_enabled", settings.hints_enabled() ? "1" : "0"); - return true; - } - - if (key == "timeout_push") { - if (value_expr.empty()) { - out = make_pragma_error("idasql.timeout_push requires a timeout value"); - return true; - } - int timeout_ms = 0; - if (!parse_int_value(value_expr, timeout_ms)) { - out = make_pragma_error("Invalid idasql.timeout_push value"); - return true; - } - int effective_timeout = 0; - if (!settings.timeout_push(timeout_ms, &effective_timeout)) { - out = make_pragma_error("Invalid idasql.timeout_push value"); - return true; - } - out = make_pragma_result("query_timeout_ms", std::to_string(effective_timeout)); - return true; - } - - if (key == "timeout_pop") { - int effective_timeout = 0; - if (!settings.timeout_pop(&effective_timeout)) { - out = make_pragma_error("idasql.timeout_pop stack is empty"); - return true; - } - out = make_pragma_result("query_timeout_ms", std::to_string(effective_timeout)); - return true; - } - - out = make_pragma_error("Unknown idasql pragma key"); - return true; - } - - void append_query_hints(const std::string& sql, QueryResult& result) const { - if (!runtime_settings().hints_enabled()) { - return; - } - - const std::string lower = to_lower_copy(sql); - const bool touches_decompiler_table = - lower.find("ctree_lvars") != std::string::npos || - lower.find("ctree_call_args") != std::string::npos || - lower.find("ctree ") != std::string::npos || - lower.find("ctree\n") != std::string::npos || - lower.find("pseudocode") != std::string::npos; - const bool has_func_filter = lower.find("func_addr") != std::string::npos; - - auto add_warning_once = [&result](const std::string& warning) { - for (const auto& existing : result.warnings) { - if (existing == warning) { - return; - } - } - result.warnings.push_back(warning); - }; - - if (touches_decompiler_table && !has_func_filter) { - add_warning_once( - "Decompiler tables are expensive without func_addr filtering; add WHERE func_addr = and LIMIT."); - } - if (result.timed_out && touches_decompiler_table) { - add_warning_once( - "Decompiler query timed out; resolve candidate functions first, then query ctree_* per function."); - } - } + static std::string trim_copy(const std::string& s); + static std::string to_lower_copy(std::string value); + static std::string strip_optional_quotes(const std::string& s); + static bool parse_int_value(const std::string& text, int& value); + static bool parse_bool_value(const std::string& text, bool& value); + static QueryResult make_pragma_result(const std::string& key, const std::string& value); + static QueryResult make_pragma_error(const std::string& error); + bool handle_runtime_pragma(const char* sql, QueryResult& out); + void append_query_hints(const std::string& sql, QueryResult& result) const; xsql::Database db_; std::string error_; @@ -566,38 +227,9 @@ class QueryEngine { std::unique_ptr disassembly_; std::unique_ptr types_; std::unique_ptr debugger_; - std::unique_ptr decompiler_; // Runtime detection - - void init() { - // db_ auto-opens :memory: via xsql::Database constructor - - // Register all virtual tables - entities_ = std::make_unique(); - entities_->register_all(db_); + std::unique_ptr decompiler_; - metadata_ = std::make_unique(); - metadata_->register_all(db_); - - extended_ = std::make_unique(); - extended_->register_all(db_); - - disassembly_ = std::make_unique(); - disassembly_->register_all(db_); - - types_ = std::make_unique(); - types_->register_all(db_); - - debugger_ = std::make_unique(); - debugger_->register_all(db_); - - // Decompiler registry - register_all() handles runtime Hex-Rays detection - // Must be registered before SQL functions so hexrays_available() is set - decompiler_ = std::make_unique(); - decompiler_->register_all(db_); - - functions::register_sql_functions(db_); - search::register_search_bytes(db_); - } + void init(); }; // ============================================================================ @@ -622,7 +254,7 @@ class QueryEngine { class Session { public: Session() = default; - ~Session() { close(); } + ~Session(); // Non-copyable, non-moveable (singleton semantics) Session(const Session&) = delete; @@ -635,72 +267,12 @@ class Session { * @param idb_path Path to .idb/.i64 file * @return true on success */ - bool open(const char* idb_path) { - if (engine_) close(); - - // Initialize IDA library - int rc = init_library(); - if (rc != 0) { - error_ = "Failed to initialize IDA library: " + std::to_string(rc); - return false; - } - - // Open the database - rc = open_database(idb_path, true, nullptr); - if (rc != 0) { - error_ = "Failed to open database: " + std::string(idb_path); - return false; - } - ida_opened_ = true; - - // Wait for auto-analysis - auto_wait(); - - // For new analysis (exe/dll/etc), build strings after auto-analysis completes - // For existing databases (i64/idb), strings are already saved - std::string path_lower = idb_path; - std::transform(path_lower.begin(), path_lower.end(), path_lower.begin(), ::tolower); - auto ends_with = [](const std::string& s, const std::string& suffix) { - return s.size() >= suffix.size() && - s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0; - }; - bool is_new_analysis = !( - ends_with(path_lower, ".i64") || - ends_with(path_lower, ".idb") - ); - if (is_new_analysis) { - // Configure and build string list with sensible defaults - strwinsetup_t* opts = const_cast(get_strlist_options()); - opts->strtypes.clear(); - opts->strtypes.push_back(STRTYPE_C); // ASCII - opts->strtypes.push_back(STRTYPE_C_16); // UTF-16 - opts->minlen = 5; - opts->only_7bit = 0; - clear_strlist(); // Clear before building (like rebuild_strings) - build_strlist(); - } - - // Create query engine - engine_ = std::make_unique(); - if (!engine_->is_valid()) { - error_ = engine_->error(); - close(); - return false; - } - - return true; - } + bool open(const char* idb_path); /** * Close the session */ - void close() { - engine_.reset(); - if (ida_opened_) { - close_database(false); - ida_opened_ = false; - } - } + void close(); /** * Check if session is open @@ -716,48 +288,23 @@ class Session { // Delegate query methods to engine (with string overloads) QueryResult query(const std::string& sql) { return query(sql.c_str()); } - QueryResult query(const char* sql) { - if (!engine_) { - QueryResult r; - r.error = "Session not open"; - return r; - } - return engine_->query(sql); - } + QueryResult query(const char* sql); - xsql::Status exec(const char* sql) { - return engine_ ? engine_->exec(sql) : xsql::Status::error; - } + xsql::Status exec(const char* sql); bool execute(const std::string& sql) { return execute(sql.c_str()); } - bool execute(const char* sql) { - return engine_ ? engine_->execute(sql) : false; - } + bool execute(const char* sql); bool execute_script(const std::string& script, std::vector& results, - std::string& error) { - if (!engine_) { - error = "Session not open"; - return false; - } - return engine_->execute_script(script, results, error); - } + std::string& error); bool export_tables(const std::vector& tables, const std::string& output_path, - std::string& error) { - if (!engine_) { - error = "Session not open"; - return false; - } - return engine_->export_tables(tables, output_path, error); - } + std::string& error); std::string scalar(const std::string& sql) { return scalar(sql.c_str()); } - std::string scalar(const char* sql) { - return engine_ ? engine_->scalar(sql) : ""; - } + std::string scalar(const char* sql); /** * Get query engine (for advanced use) @@ -767,16 +314,7 @@ class Session { /** * Get database info */ - std::string info() const { - if (!ida_opened_) return "Not opened"; - - std::string s; - s += "Processor: " + std::string(inf_get_procname().c_str()) + "\n"; - s += "Functions: " + std::to_string(get_func_qty()) + "\n"; - s += "Segments: " + std::to_string(get_segm_qty()) + "\n"; - s += "Names: " + std::to_string(get_nlist_size()) + "\n"; - return s; - } + std::string info() const; private: std::unique_ptr engine_; @@ -789,45 +327,28 @@ class Session { // ============================================================================ namespace detail { - inline QueryEngine& global_engine() { - static QueryEngine engine; - return engine; - } + QueryEngine& global_engine(); } /** * Quick query - uses global engine - * - * Example: - * auto funcs = idasql::query("SELECT name FROM funcs LIMIT 5"); - * for (const auto& row : funcs) { - * msg("%s\n", row[0].c_str()); - * } */ -inline QueryResult query(const char* sql) { - return detail::global_engine().query(sql); -} +QueryResult query(const char* sql); /** * Quick exec (no result rows) */ -inline xsql::Status exec(const char* sql) { - return detail::global_engine().exec(sql); -} +xsql::Status exec(const char* sql); /** * Quick execute (no results) */ -inline bool execute(const char* sql) { - return detail::global_engine().execute(sql); -} +bool execute(const char* sql); /** * Quick scalar query */ -inline std::string scalar(const char* sql) { - return detail::global_engine().scalar(sql); -} +std::string scalar(const char* sql); // ============================================================================ // Backwards Compatibility Alias diff --git a/src/lib/include/idasql/entities_search.hpp b/src/lib/include/idasql/entities_search.hpp deleted file mode 100644 index 1bff5e1..0000000 --- a/src/lib/include/idasql/entities_search.hpp +++ /dev/null @@ -1,490 +0,0 @@ -/** - * entities_search.hpp - Grep-style entity search table - * - * Usage: - * SELECT name, kind FROM grep WHERE pattern = 'main' LIMIT 20; - * SELECT * FROM grep WHERE pattern = 'sub%' AND kind = 'function'; - * - * Pattern behavior: - * - Plain text: case-insensitive contains match (auto '%text%') - * - '%' and '_' : SQL LIKE wildcards - * - '*' is accepted and normalized to '%' - */ - -#pragma once - -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -// IDA SDK -#include -#include -#include -#include -#include - -namespace idasql { -namespace search { - -struct EntityRow { - std::string name; - std::string kind; - ea_t address = BADADDR; - uint32 ordinal = 0; - std::string parent_name; - std::string full_name; - bool has_address = false; - bool has_ordinal = false; -}; - -class NamePattern { - std::string pattern_; - bool valid_ = false; - -public: - explicit NamePattern(const std::string& raw) { - std::string lowered = to_lower(raw); - std::replace(lowered.begin(), lowered.end(), '*', '%'); - if (lowered.empty()) { - return; - } - - if (!has_wildcards(lowered)) { - // Grep-style default: plain text means "contains". - lowered = "%" + lowered + "%"; - } - - pattern_ = std::move(lowered); - valid_ = true; - } - - bool valid() const { return valid_; } - - bool matches(const std::string& value) const { - if (!valid_) return false; - return like_match(to_lower(value), pattern_); - } - -private: - static std::string to_lower(const std::string& s) { - std::string out; - out.reserve(s.size()); - for (char c : s) { - out.push_back(static_cast(std::tolower(static_cast(c)))); - } - return out; - } - - static bool has_wildcards(const std::string& s) { - return s.find('%') != std::string::npos || s.find('_') != std::string::npos; - } - - // SQL LIKE matcher supporting '%' and '_' - static bool like_match(const std::string& text, const std::string& pattern) { - size_t ti = 0; - size_t pi = 0; - size_t star = std::string::npos; - size_t retry = 0; - - while (ti < text.size()) { - if (pi < pattern.size() && (pattern[pi] == '_' || pattern[pi] == text[ti])) { - ++ti; - ++pi; - continue; - } - if (pi < pattern.size() && pattern[pi] == '%') { - star = pi++; - retry = ti; - continue; - } - if (star != std::string::npos) { - pi = star + 1; - ti = ++retry; - continue; - } - return false; - } - - while (pi < pattern.size() && pattern[pi] == '%') { - ++pi; - } - return pi == pattern.size(); - } -}; - -enum class EntitySource { - Functions = 0, - Labels, - Segments, - Structs, - Unions, - Enums, - Members, - EnumMembers, - Done -}; - -class EntityGenerator { - NamePattern pattern_; - - EntitySource current_source_ = EntitySource::Functions; - size_t current_index_ = 0; - EntityRow current_row_; - - // For type iteration - uint32 type_ordinal_ = 0; - size_t member_index_ = 0; - tinfo_t current_type_; - -public: - explicit EntityGenerator(const std::string& pattern) : pattern_(pattern) {} - - bool next() { - if (!pattern_.valid()) return false; - - while (current_source_ != EntitySource::Done) { - if (advance_current_source()) { - return true; - } - current_source_ = static_cast(static_cast(current_source_) + 1); - current_index_ = 0; - type_ordinal_ = 0; - member_index_ = 0; - } - return false; - } - - const EntityRow& current() const { return current_row_; } - -private: - bool matches(const std::string& name) const { - return pattern_.matches(name); - } - - bool advance_current_source() { - switch (current_source_) { - case EntitySource::Functions: return advance_functions(); - case EntitySource::Labels: return advance_labels(); - case EntitySource::Segments: return advance_segments(); - case EntitySource::Structs: return advance_structs(); - case EntitySource::Unions: return advance_unions(); - case EntitySource::Enums: return advance_enums(); - case EntitySource::Members: return advance_members(); - case EntitySource::EnumMembers: return advance_enum_members(); - case EntitySource::Done: return false; - } - return false; - } - - bool advance_functions() { - size_t count = get_func_qty(); - while (current_index_ < count) { - func_t* fn = getn_func(current_index_++); - if (!fn) continue; - - qstring name; - if (get_func_name(&name, fn->start_ea) <= 0) continue; - - std::string name_str(name.c_str()); - if (matches(name_str)) { - current_row_.name = name_str; - current_row_.kind = "function"; - current_row_.address = fn->start_ea; - current_row_.has_address = true; - current_row_.has_ordinal = false; - current_row_.parent_name.clear(); - current_row_.full_name = name_str; - return true; - } - } - return false; - } - - bool advance_labels() { - size_t count = get_nlist_size(); - while (current_index_ < count) { - ea_t ea = get_nlist_ea(current_index_); - const char* name = get_nlist_name(current_index_); - current_index_++; - - if (!name || !*name) continue; - - func_t* fn = get_func(ea); - if (fn && fn->start_ea == ea) continue; - - std::string name_str(name); - if (matches(name_str)) { - current_row_.name = name_str; - current_row_.kind = "label"; - current_row_.address = ea; - current_row_.has_address = true; - current_row_.has_ordinal = false; - current_row_.parent_name.clear(); - current_row_.full_name = name_str; - return true; - } - } - return false; - } - - bool advance_segments() { - int count = get_segm_qty(); - while (static_cast(current_index_) < count) { - segment_t* seg = getnseg(static_cast(current_index_++)); - if (!seg) continue; - - qstring name; - if (get_segm_name(&name, seg) <= 0) continue; - - std::string name_str(name.c_str()); - if (matches(name_str)) { - current_row_.name = name_str; - current_row_.kind = "segment"; - current_row_.address = seg->start_ea; - current_row_.has_address = true; - current_row_.has_ordinal = false; - current_row_.parent_name.clear(); - current_row_.full_name = name_str; - return true; - } - } - return false; - } - - bool advance_types_of_kind(const char* kind, bool want_struct, bool want_union, bool want_enum) { - uint32 count = get_ordinal_count(nullptr); - while (type_ordinal_ < count) { - uint32 ord = type_ordinal_++; - tinfo_t tif; - if (!tif.get_numbered_type(nullptr, ord)) continue; - - bool is_struct = tif.is_struct(); - bool is_union = tif.is_union(); - bool is_enum = tif.is_enum(); - - if (want_struct && !is_struct) continue; - if (want_union && !is_union) continue; - if (want_enum && !is_enum) continue; - - qstring name; - if (!tif.get_type_name(&name)) continue; - - std::string name_str(name.c_str()); - if (matches(name_str)) { - current_row_.name = name_str; - current_row_.kind = kind; - current_row_.has_address = false; - current_row_.ordinal = ord; - current_row_.has_ordinal = true; - current_row_.parent_name.clear(); - current_row_.full_name = name_str; - return true; - } - } - return false; - } - - bool advance_structs() { return advance_types_of_kind("struct", true, false, false); } - bool advance_unions() { return advance_types_of_kind("union", false, true, false); } - bool advance_enums() { return advance_types_of_kind("enum", false, false, true); } - - bool advance_members() { - uint32 count = get_ordinal_count(nullptr); - - while (type_ordinal_ < count) { - if (!current_type_.get_numbered_type(nullptr, type_ordinal_)) { - type_ordinal_++; - member_index_ = 0; - continue; - } - - if (!current_type_.is_struct() && !current_type_.is_union()) { - type_ordinal_++; - member_index_ = 0; - continue; - } - - udt_type_data_t udt; - if (!current_type_.get_udt_details(&udt)) { - type_ordinal_++; - member_index_ = 0; - continue; - } - - while (member_index_ < udt.size()) { - const udm_t& member = udt[member_index_++]; - std::string member_name(member.name.c_str()); - - if (matches(member_name)) { - qstring type_name; - current_type_.get_type_name(&type_name); - - current_row_.name = member_name; - current_row_.kind = "member"; - current_row_.has_address = false; - current_row_.ordinal = type_ordinal_; - current_row_.has_ordinal = true; - current_row_.parent_name = type_name.c_str(); - current_row_.full_name = std::string(type_name.c_str()) + "." + member_name; - return true; - } - } - - type_ordinal_++; - member_index_ = 0; - } - return false; - } - - bool advance_enum_members() { - uint32 count = get_ordinal_count(nullptr); - - while (type_ordinal_ < count) { - if (!current_type_.get_numbered_type(nullptr, type_ordinal_)) { - type_ordinal_++; - member_index_ = 0; - continue; - } - - if (!current_type_.is_enum()) { - type_ordinal_++; - member_index_ = 0; - continue; - } - - enum_type_data_t etd; - if (!current_type_.get_enum_details(&etd)) { - type_ordinal_++; - member_index_ = 0; - continue; - } - - while (member_index_ < etd.size()) { - const edm_t& em = etd[member_index_++]; - std::string value_name(em.name.c_str()); - - if (matches(value_name)) { - qstring type_name; - current_type_.get_type_name(&type_name); - - current_row_.name = value_name; - current_row_.kind = "enum_member"; - current_row_.has_address = false; - current_row_.ordinal = type_ordinal_; - current_row_.has_ordinal = true; - current_row_.parent_name = type_name.c_str(); - current_row_.full_name = std::string(type_name.c_str()) + "." + value_name; - return true; - } - } - - type_ordinal_++; - member_index_ = 0; - } - return false; - } -}; - -class GrepIterator : public xsql::RowIterator { - EntityGenerator generator_; - bool started_ = false; - bool valid_ = false; - int64_t rowid_ = -1; - -public: - explicit GrepIterator(const std::string& pattern) - : generator_(pattern) {} - - bool next() override { - started_ = true; - valid_ = generator_.next(); - if (valid_) { - ++rowid_; - } - return valid_; - } - - bool eof() const override { - return started_ && !valid_; - } - - void column(xsql::FunctionContext& ctx, int col) override { - if (!valid_) { - ctx.result_null(); - return; - } - - const EntityRow& row = generator_.current(); - switch (col) { - case 0: // pattern (input column) - ctx.result_null(); - break; - case 1: - ctx.result_text(row.name); - break; - case 2: - ctx.result_text(row.kind); - break; - case 3: - if (row.has_address) ctx.result_int64(static_cast(row.address)); - else ctx.result_null(); - break; - case 4: - if (row.has_ordinal) ctx.result_int64(row.ordinal); - else ctx.result_null(); - break; - case 5: - if (row.parent_name.empty()) ctx.result_null(); - else ctx.result_text(row.parent_name); - break; - case 6: - ctx.result_text(row.full_name); - break; - default: - ctx.result_null(); - break; - } - } - - int64_t rowid() const override { - return rowid_; - } -}; - -inline VTableDef define_grep() { - return table("grep") - .count([]() -> size_t { - // Full scans without a pattern are disabled. - return 0; - }) - // Required filter input. - .column_text("pattern", [](size_t) -> std::string { return ""; }) - // Output columns. - .column_text("name", [](size_t) -> std::string { return ""; }) - .column_text("kind", [](size_t) -> std::string { return ""; }) - .column_int64("address", [](size_t) -> int64_t { return 0; }) - .column_int64("ordinal", [](size_t) -> int64_t { return 0; }) - .column_text("parent_name", [](size_t) -> std::string { return ""; }) - .column_text("full_name", [](size_t) -> std::string { return ""; }) - .filter_eq_text("pattern", [](const char* pattern) -> std::unique_ptr { - return std::make_unique(pattern ? pattern : ""); - }, 25.0, 100.0) - .build(); -} - -inline bool register_grep_entities(xsql::Database& db) { - static VTableDef grep = define_grep(); - return db.register_table("ida_grep", &grep) && db.create_table("grep", "ida_grep"); -} - -} // namespace search -} // namespace idasql diff --git a/src/lib/include/idasql/fwd.hpp b/src/lib/include/idasql/fwd.hpp new file mode 100644 index 0000000..6f4f6e7 --- /dev/null +++ b/src/lib/include/idasql/fwd.hpp @@ -0,0 +1,54 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +/** + * fwd.hpp - Forward declarations for IDASQL registry types + * + * Allows database.hpp to hold unique_ptr without including full definitions. + */ + +#pragma once + +#include + +namespace idasql { + +namespace entities { + struct TableRegistry; +} + +namespace metadata { + struct MetadataItem; + struct WelcomeRow; + struct MetadataRegistry; +} + +namespace extended { + struct ExtendedRegistry; +} + +namespace disassembly { + struct DisassemblyRegistry; +} + +namespace types { + struct TypesRegistry; +} + +namespace debugger { + struct DebuggerRegistry; +} + +namespace decompiler { + struct DecompilerRegistry; +} + +namespace functions { + void register_sql_functions(xsql::Database& db); +} + +namespace search { + bool register_search_bytes(xsql::Database& db); +} + +} // namespace idasql diff --git a/src/lib/include/idasql/idapython.hpp b/src/lib/include/idasql/idapython.hpp new file mode 100644 index 0000000..93c10a5 --- /dev/null +++ b/src/lib/include/idasql/idapython.hpp @@ -0,0 +1,33 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +/** + * idapython.hpp - Public IDAPython runtime API + * + * Use runtime_acquire/runtime_release to manage the Python runtime + * in CLI tools and plugins. The full internal API is private. + */ + +#pragma once + +#include + +namespace idasql { +namespace idapython { + +/** + * Acquire the IDAPython runtime (refcounted). + * Call before executing Python or enabling Python-backed SQL functions. + * @param error Optional error message on failure + * @return true on success + */ +bool runtime_acquire(std::string* error = nullptr); + +/** + * Release the IDAPython runtime (refcounted). + * Balances a prior runtime_acquire() call. + */ +void runtime_release(); + +} // namespace idapython +} // namespace idasql diff --git a/src/lib/include/idasql/idasql.hpp b/src/lib/include/idasql/idasql.hpp index 9678af8..73d2bd4 100644 --- a/src/lib/include/idasql/idasql.hpp +++ b/src/lib/include/idasql/idasql.hpp @@ -1,18 +1,16 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** * idasql.hpp - Main include header for IDASQL library * - * This is the convenience header that includes all IDASQL components. - * * Usage: * #include * - * idasql::Database db; - * db.open("database.i64"); - * auto result = db.query("SELECT * FROM funcs LIMIT 10"); - * if (!result.success) { - * std::cerr << result.error << "\n"; - * } - * db.close(); + * idasql::Session session; + * session.open("database.i64"); + * auto result = session.query("SELECT * FROM funcs LIMIT 10"); + * session.close(); */ #pragma once @@ -20,19 +18,5 @@ // Core virtual table framework #include -// Entity tables -#include -#include -#include - -// Decompiler tables (requires Hex-Rays) -#include - -// Metadata tables -#include - -// SQL functions -#include - -// Database wrapper class +// Database wrapper class (includes fwd.hpp for registry types) #include diff --git a/src/lib/include/idasql/platform.hpp b/src/lib/include/idasql/platform.hpp index 298087f..c0ea94f 100644 --- a/src/lib/include/idasql/platform.hpp +++ b/src/lib/include/idasql/platform.hpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** * platform.hpp - Platform fixups for IDA SDK compatibility * diff --git a/src/lib/include/idasql/platform_undef.hpp b/src/lib/include/idasql/platform_undef.hpp index e63f152..ea987f3 100644 --- a/src/lib/include/idasql/platform_undef.hpp +++ b/src/lib/include/idasql/platform_undef.hpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** * platform_undef.hpp - Undo platform.hpp redirects before IDA headers * diff --git a/src/lib/include/idasql/runtime_settings.hpp b/src/lib/include/idasql/runtime_settings.hpp index dea478a..e3d5735 100644 --- a/src/lib/include/idasql/runtime_settings.hpp +++ b/src/lib/include/idasql/runtime_settings.hpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + #pragma once #include @@ -11,6 +14,7 @@ struct RuntimeSettingsSnapshot { int queue_admission_timeout_ms = 120000; size_t max_queue = 64; bool hints_enabled = true; + bool enable_idapython = false; size_t timeout_stack_depth = 0; }; @@ -28,6 +32,7 @@ class RuntimeSettings { snap.queue_admission_timeout_ms = queue_admission_timeout_ms_; snap.max_queue = max_queue_; snap.hints_enabled = hints_enabled_; + snap.enable_idapython = enable_idapython_; snap.timeout_stack_depth = timeout_stack_.size(); return snap; } @@ -52,6 +57,11 @@ class RuntimeSettings { return hints_enabled_; } + bool enable_idapython() const { + std::lock_guard lock(mutex_); + return enable_idapython_; + } + bool set_query_timeout_ms(int value) { if (!is_valid_timeout(value)) { return false; @@ -85,6 +95,11 @@ class RuntimeSettings { hints_enabled_ = enabled; } + void set_enable_idapython(bool enabled) { + std::lock_guard lock(mutex_); + enable_idapython_ = enabled; + } + bool timeout_push(int timeout_ms, int* effective_timeout_ms = nullptr) { if (!is_valid_timeout(timeout_ms)) { return false; @@ -126,6 +141,7 @@ class RuntimeSettings { int queue_admission_timeout_ms_ = 120000; size_t max_queue_ = 64; bool hints_enabled_ = true; + bool enable_idapython_ = false; std::vector timeout_stack_; }; @@ -134,4 +150,3 @@ inline RuntimeSettings& runtime_settings() { } } // namespace idasql - diff --git a/src/lib/include/idasql/ui_context_provider.hpp b/src/lib/include/idasql/ui_context_provider.hpp new file mode 100644 index 0000000..87f5dcf --- /dev/null +++ b/src/lib/include/idasql/ui_context_provider.hpp @@ -0,0 +1,20 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include + +#include + +namespace idasql { +namespace ui_context { + +bool initialize_capture_helper(std::string* error = nullptr); +void shutdown_capture_helper(); + +xsql::json get_ui_context_json(); + +} // namespace ui_context +} // namespace idasql diff --git a/src/lib/include/idasql/vtable.hpp b/src/lib/include/idasql/vtable.hpp index 9d5214a..b5c2e18 100644 --- a/src/lib/include/idasql/vtable.hpp +++ b/src/lib/include/idasql/vtable.hpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** * vtable.hpp - SQLite Virtual Table framework for IDA * diff --git a/src/lib/include/idasql/vtable_policy.hpp b/src/lib/include/idasql/vtable_policy.hpp index 488bfd1..14e0edf 100644 --- a/src/lib/include/idasql/vtable_policy.hpp +++ b/src/lib/include/idasql/vtable_policy.hpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** * vtable_policy.hpp - Policy and configuration system for IDASQL * diff --git a/src/lib/src/database.cpp b/src/lib/src/database.cpp new file mode 100644 index 0000000..7d06a23 --- /dev/null +++ b/src/lib/src/database.cpp @@ -0,0 +1,569 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +#include + +#include + +#include +#include +#include + +#include "ida_headers.hpp" + +// Private headers for registry implementations +#include "entities.hpp" +#include "entities_ext.hpp" +#include "entities_dbg.hpp" +#include "entities_search.hpp" +#include "entities_types.hpp" +#include "functions.hpp" +#include "decompiler.hpp" +#include "disassembly.hpp" +#include "search_bytes.hpp" +#include "metadata.hpp" + +namespace idasql { + +// ============================================================================ +// QueryEngine +// ============================================================================ + +QueryEngine::QueryEngine() { + init(); +} + +QueryEngine::~QueryEngine() = default; + +QueryResult QueryEngine::query(const char* sql) { + QueryResult result; + + if (!db_.is_open()) { + result.error = "QueryEngine not initialized"; + return result; + } + + if (handle_runtime_pragma(sql, result)) { + error_ = result.success ? "" : result.error; + return result; + } + + xsql::QueryOptions options; + options.timeout_ms = runtime_settings().query_timeout_ms(); + xsql::Result raw = db_.query(sql, options); + result.columns = std::move(raw.columns); + result.rows.reserve(raw.rows.size()); + for (auto& raw_row : raw.rows) { + Row row; + row.values = std::move(raw_row.values); + result.rows.push_back(std::move(row)); + } + result.error = std::move(raw.error); + result.warnings = std::move(raw.warnings); + result.timed_out = raw.timed_out; + result.partial = raw.partial; + result.elapsed_ms = raw.elapsed_ms; + append_query_hints(sql ? std::string(sql) : std::string(), result); + result.success = result.error.empty(); + error_ = result.success ? "" : result.error; + + return result; +} + +xsql::Status QueryEngine::exec(const char* sql) { + if (!db_.is_open()) { + error_ = "QueryEngine not initialized"; + return xsql::Status::error; + } + + QueryResult pragma_result; + if (handle_runtime_pragma(sql, pragma_result)) { + error_ = pragma_result.success ? "" : pragma_result.error; + return pragma_result.success ? xsql::Status::ok : xsql::Status::error; + } + + xsql::Status rc = db_.exec(sql); + error_ = db_.last_error(); + return rc; +} + +bool QueryEngine::execute(const char* sql) { + return xsql::is_ok(exec(sql)); +} + +bool QueryEngine::execute_script(const std::string& script, + std::vector& results, + std::string& error) { + if (!db_.is_open()) { + error_ = "QueryEngine not initialized"; + error = error_; + return false; + } + + bool ok = db_.execute_script(script, results, error); + error_ = ok ? "" : error; + return ok; +} + +bool QueryEngine::export_tables(const std::vector& tables, + const std::string& output_path, + std::string& error) { + if (!db_.is_open()) { + error_ = "QueryEngine not initialized"; + error = error_; + return false; + } + + bool ok = db_.export_tables(tables, output_path, error); + error_ = ok ? "" : error; + return ok; +} + +std::string QueryEngine::scalar(const char* sql) { + auto result = query(sql); + if (result.success && !result.empty()) { + return result.rows[0].values[0]; + } + return ""; +} + +std::string QueryEngine::trim_copy(const std::string& s) { + size_t begin = 0; + while (begin < s.size() && std::isspace(static_cast(s[begin]))) { + ++begin; + } + size_t end = s.size(); + while (end > begin && std::isspace(static_cast(s[end - 1]))) { + --end; + } + return s.substr(begin, end - begin); +} + +std::string QueryEngine::to_lower_copy(std::string value) { + for (char& c : value) { + c = static_cast(std::tolower(static_cast(c))); + } + return value; +} + +std::string QueryEngine::strip_optional_quotes(const std::string& s) { + if (s.size() >= 2) { + char a = s.front(); + char b = s.back(); + if ((a == '\'' && b == '\'') || (a == '"' && b == '"')) { + return s.substr(1, s.size() - 2); + } + } + return s; +} + +bool QueryEngine::parse_int_value(const std::string& text, int& value) { + try { + size_t consumed = 0; + long long parsed = std::stoll(text, &consumed, 10); + if (consumed != text.size()) { + return false; + } + if (parsed < (std::numeric_limits::min)() || + parsed > (std::numeric_limits::max)()) { + return false; + } + value = static_cast(parsed); + return true; + } catch (...) { + return false; + } +} + +bool QueryEngine::parse_bool_value(const std::string& text, bool& value) { + const std::string lower = to_lower_copy(trim_copy(text)); + if (lower == "1" || lower == "on" || lower == "true" || lower == "yes") { + value = true; + return true; + } + if (lower == "0" || lower == "off" || lower == "false" || lower == "no") { + value = false; + return true; + } + return false; +} + +QueryResult QueryEngine::make_pragma_result(const std::string& key, const std::string& value) { + QueryResult result; + result.columns = {"name", "value"}; + Row row; + row.values = {key, value}; + result.rows.push_back(std::move(row)); + result.success = true; + return result; +} + +QueryResult QueryEngine::make_pragma_error(const std::string& error) { + QueryResult result; + result.success = false; + result.error = error; + return result; +} + +bool QueryEngine::handle_runtime_pragma(const char* sql, QueryResult& out) { + if (sql == nullptr) { + return false; + } + + std::string text = trim_copy(sql); + if (text.empty()) { + return false; + } + if (!text.empty() && text.back() == ';') { + text.pop_back(); + text = trim_copy(text); + } + + std::string lower = to_lower_copy(text); + const std::string pragma_prefix = "pragma"; + if (lower.rfind(pragma_prefix, 0) != 0) { + return false; + } + + std::string body = trim_copy(text.substr(pragma_prefix.size())); + std::string body_lower = to_lower_copy(body); + const std::string idasql_prefix = "idasql."; + if (body_lower.rfind(idasql_prefix, 0) != 0) { + return false; + } + + std::string key_expr = trim_copy(body.substr(idasql_prefix.size())); + std::string value_expr; + size_t eq_pos = key_expr.find('='); + if (eq_pos != std::string::npos) { + value_expr = trim_copy(key_expr.substr(eq_pos + 1)); + key_expr = trim_copy(key_expr.substr(0, eq_pos)); + value_expr = strip_optional_quotes(value_expr); + } + + const std::string key = to_lower_copy(key_expr); + auto& settings = runtime_settings(); + + if (key == "query_timeout_ms") { + if (value_expr.empty()) { + out = make_pragma_result("query_timeout_ms", std::to_string(settings.query_timeout_ms())); + return true; + } + int timeout_ms = 0; + if (!parse_int_value(value_expr, timeout_ms) || !settings.set_query_timeout_ms(timeout_ms)) { + out = make_pragma_error("Invalid idasql.query_timeout_ms value"); + return true; + } + out = make_pragma_result("query_timeout_ms", std::to_string(settings.query_timeout_ms())); + return true; + } + + if (key == "queue_admission_timeout_ms") { + if (value_expr.empty()) { + out = make_pragma_result("queue_admission_timeout_ms", + std::to_string(settings.queue_admission_timeout_ms())); + return true; + } + int timeout_ms = 0; + if (!parse_int_value(value_expr, timeout_ms) || + !settings.set_queue_admission_timeout_ms(timeout_ms)) { + out = make_pragma_error("Invalid idasql.queue_admission_timeout_ms value"); + return true; + } + out = make_pragma_result("queue_admission_timeout_ms", + std::to_string(settings.queue_admission_timeout_ms())); + return true; + } + + if (key == "max_queue") { + if (value_expr.empty()) { + out = make_pragma_result("max_queue", std::to_string(settings.max_queue())); + return true; + } + int queue_limit = 0; + if (!parse_int_value(value_expr, queue_limit) || queue_limit < 0 || + !settings.set_max_queue(static_cast(queue_limit))) { + out = make_pragma_error("Invalid idasql.max_queue value"); + return true; + } + out = make_pragma_result("max_queue", std::to_string(settings.max_queue())); + return true; + } + + if (key == "hints_enabled") { + if (value_expr.empty()) { + out = make_pragma_result("hints_enabled", settings.hints_enabled() ? "1" : "0"); + return true; + } + bool enabled = false; + if (!parse_bool_value(value_expr, enabled)) { + out = make_pragma_error("Invalid idasql.hints_enabled value"); + return true; + } + settings.set_hints_enabled(enabled); + out = make_pragma_result("hints_enabled", settings.hints_enabled() ? "1" : "0"); + return true; + } + + if (key == "enable_idapython") { + if (value_expr.empty()) { + out = make_pragma_result("enable_idapython", settings.enable_idapython() ? "1" : "0"); + return true; + } + bool enabled = false; + if (!parse_bool_value(value_expr, enabled)) { + out = make_pragma_error("Invalid idasql.enable_idapython value"); + return true; + } + settings.set_enable_idapython(enabled); + out = make_pragma_result("enable_idapython", settings.enable_idapython() ? "1" : "0"); + return true; + } + + if (key == "timeout_push") { + if (value_expr.empty()) { + out = make_pragma_error("idasql.timeout_push requires a timeout value"); + return true; + } + int timeout_ms = 0; + if (!parse_int_value(value_expr, timeout_ms)) { + out = make_pragma_error("Invalid idasql.timeout_push value"); + return true; + } + int effective_timeout = 0; + if (!settings.timeout_push(timeout_ms, &effective_timeout)) { + out = make_pragma_error("Invalid idasql.timeout_push value"); + return true; + } + out = make_pragma_result("query_timeout_ms", std::to_string(effective_timeout)); + return true; + } + + if (key == "timeout_pop") { + int effective_timeout = 0; + if (!settings.timeout_pop(&effective_timeout)) { + out = make_pragma_error("idasql.timeout_pop stack is empty"); + return true; + } + out = make_pragma_result("query_timeout_ms", std::to_string(effective_timeout)); + return true; + } + + out = make_pragma_error("Unknown idasql pragma key"); + return true; +} + +void QueryEngine::append_query_hints(const std::string& sql, QueryResult& result) const { + if (!runtime_settings().hints_enabled()) { + return; + } + + const std::string lower = to_lower_copy(sql); + const bool touches_decompiler_table = + lower.find("ctree_lvars") != std::string::npos || + lower.find("ctree_call_args") != std::string::npos || + lower.find("ctree ") != std::string::npos || + lower.find("ctree\n") != std::string::npos || + lower.find("pseudocode") != std::string::npos; + const bool has_func_filter = lower.find("func_addr") != std::string::npos; + + auto add_warning_once = [&result](const std::string& warning) { + for (const auto& existing : result.warnings) { + if (existing == warning) { + return; + } + } + result.warnings.push_back(warning); + }; + + if (touches_decompiler_table && !has_func_filter) { + add_warning_once( + "Decompiler tables are expensive without func_addr filtering; add WHERE func_addr = and LIMIT."); + } + if (result.timed_out && touches_decompiler_table) { + add_warning_once( + "Decompiler query timed out; resolve candidate functions first, then query ctree_* per function."); + } +} + +void QueryEngine::init() { + // db_ auto-opens :memory: via xsql::Database constructor + + // Register all virtual tables + entities_ = std::make_unique(); + entities_->register_all(db_); + + metadata_ = std::make_unique(); + metadata_->register_all(db_); + + extended_ = std::make_unique(); + extended_->register_all(db_); + + disassembly_ = std::make_unique(); + disassembly_->register_all(db_); + + types_ = std::make_unique(); + types_->register_all(db_); + + debugger_ = std::make_unique(); + debugger_->register_all(db_); + + // Decompiler registry - register_all() handles runtime Hex-Rays detection + // Must be registered before SQL functions so hexrays_available() is set + decompiler_ = std::make_unique(); + decompiler_->register_all(db_); + + functions::register_sql_functions(db_); + search::register_search_bytes(db_); +} + +// ============================================================================ +// Session +// ============================================================================ + +Session::~Session() { close(); } + +bool Session::open(const char* idb_path) { + if (engine_) close(); + + // Initialize IDA library + int rc = init_library(); + if (rc != 0) { + error_ = "Failed to initialize IDA library: " + std::to_string(rc); + return false; + } + + // Open the database + rc = open_database(idb_path, true, nullptr); + if (rc != 0) { + error_ = "Failed to open database: " + std::string(idb_path); + return false; + } + ida_opened_ = true; + + // Wait for auto-analysis + auto_wait(); + + // For new analysis (exe/dll/etc), build strings after auto-analysis completes + // For existing databases (i64/idb), strings are already saved + std::string path_lower = idb_path; + std::transform(path_lower.begin(), path_lower.end(), path_lower.begin(), ::tolower); + auto ends_with = [](const std::string& s, const std::string& suffix) { + return s.size() >= suffix.size() && + s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0; + }; + bool is_new_analysis = !( + ends_with(path_lower, ".i64") || + ends_with(path_lower, ".idb") + ); + if (is_new_analysis) { + // Configure and build string list with sensible defaults + strwinsetup_t* opts = const_cast(get_strlist_options()); + opts->strtypes.clear(); + opts->strtypes.push_back(STRTYPE_C); // ASCII + opts->strtypes.push_back(STRTYPE_C_16); // UTF-16 + opts->minlen = 5; + opts->only_7bit = 0; + clear_strlist(); // Clear before building (like rebuild_strings) + build_strlist(); + } + + // Create query engine + engine_ = std::make_unique(); + if (!engine_->is_valid()) { + error_ = engine_->error(); + close(); + return false; + } + + return true; +} + +void Session::close() { + engine_.reset(); + if (ida_opened_) { + close_database(false); + ida_opened_ = false; + } +} + +QueryResult Session::query(const char* sql) { + if (!engine_) { + QueryResult r; + r.error = "Session not open"; + return r; + } + return engine_->query(sql); +} + +xsql::Status Session::exec(const char* sql) { + return engine_ ? engine_->exec(sql) : xsql::Status::error; +} + +bool Session::execute(const char* sql) { + return engine_ ? engine_->execute(sql) : false; +} + +bool Session::execute_script(const std::string& script, + std::vector& results, + std::string& error) { + if (!engine_) { + error = "Session not open"; + return false; + } + return engine_->execute_script(script, results, error); +} + +bool Session::export_tables(const std::vector& tables, + const std::string& output_path, + std::string& error) { + if (!engine_) { + error = "Session not open"; + return false; + } + return engine_->export_tables(tables, output_path, error); +} + +std::string Session::scalar(const char* sql) { + return engine_ ? engine_->scalar(sql) : ""; +} + +std::string Session::info() const { + if (!ida_opened_) return "Not opened"; + + std::string s; + s += "Processor: " + std::string(inf_get_procname().c_str()) + "\n"; + s += "Functions: " + std::to_string(get_func_qty()) + "\n"; + s += "Segments: " + std::to_string(get_segm_qty()) + "\n"; + s += "Names: " + std::to_string(get_nlist_size()) + "\n"; + return s; +} + +// ============================================================================ +// TIER 3: Free Functions - Quick one-liners +// ============================================================================ + +namespace detail { + QueryEngine& global_engine() { + static QueryEngine engine; + return engine; + } +} + +QueryResult query(const char* sql) { + return detail::global_engine().query(sql); +} + +xsql::Status exec(const char* sql) { + return detail::global_engine().exec(sql); +} + +bool execute(const char* sql) { + return detail::global_engine().execute(sql); +} + +std::string scalar(const char* sql) { + return detail::global_engine().scalar(sql); +} + +} // namespace idasql diff --git a/src/lib/include/idasql/decompiler.hpp b/src/lib/src/decompiler.cpp similarity index 53% rename from src/lib/include/idasql/decompiler.hpp rename to src/lib/src/decompiler.cpp index fde5bd6..833d30c 100644 --- a/src/lib/include/idasql/decompiler.hpp +++ b/src/lib/src/decompiler.cpp @@ -1,41 +1,7 @@ -/** - * decompiler.hpp - Hex-Rays Decompiler Virtual Tables - * - * Provides SQLite virtual tables for accessing decompiled function data: - * pseudocode - Decompiled function pseudocode lines - * ctree_lvars - Local variables from decompiled functions - * ctree - Full AST (expressions and statements) - * ctree_call_args - Flattened call arguments - * - * All tables support constraint pushdown on func_addr via filter_eq framework: - * SELECT * FROM pseudocode WHERE func_addr = 0x401000; - * SELECT * FROM ctree_lvars WHERE func_addr = 0x401000; - * - * Requires Hex-Rays decompiler license. - */ - -#pragma once - -#include - -#include -#include - -#include -#include -#include - -#include - -// IDA SDK headers -#include -#include -#include -#include - -// Hex-Rays decompiler headers -#include -#include +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +#include "decompiler.hpp" namespace idasql { namespace decompiler { @@ -44,16 +10,7 @@ namespace decompiler { // Decompiler Initialization // ============================================================================ -// Global flag tracking if Hex-Rays is available -// Set once during DecompilerRegistry::register_all() -inline bool& hexrays_available() { - static bool available = false; - return available; -} - -// Initialize Hex-Rays decompiler - call ONCE at startup -// Returns true if decompiler is available -inline bool init_hexrays() { +bool init_hexrays() { static bool initialized = false; if (!initialized) { @@ -68,9 +25,7 @@ inline bool init_hexrays() { return hexrays_available(); } -// Invalidate decompiler cache for the function containing ea. -// Safe to call even if Hex-Rays is unavailable or ea is not in a function. -inline void invalidate_decompiler_cache(ea_t ea) { +void invalidate_decompiler_cache(ea_t ea) { if (!hexrays_available()) return; func_t* f = get_func(ea); if (f) { @@ -79,11 +34,10 @@ inline void invalidate_decompiler_cache(ea_t ea) { } // ============================================================================ -// Data Structures +// ITP Helpers // ============================================================================ -// ITP name ↔ enum helpers -inline const char* itp_to_name(item_preciser_t itp) { +const char* itp_to_name(item_preciser_t itp) { switch (itp) { case ITP_SEMI: return "semi"; case ITP_BLOCK1: return "block1"; @@ -101,7 +55,7 @@ inline const char* itp_to_name(item_preciser_t itp) { } } -inline item_preciser_t name_to_itp(const char* name) { +item_preciser_t name_to_itp(const char* name) { if (!name || !name[0]) return ITP_SEMI; if (stricmp(name, "block1") == 0) return ITP_BLOCK1; if (stricmp(name, "block2") == 0) return ITP_BLOCK2; @@ -117,108 +71,11 @@ inline item_preciser_t name_to_itp(const char* name) { return ITP_SEMI; // default } -// Pseudocode line data -struct PseudocodeLine { - ea_t func_addr; - int line_num; - std::string text; - ea_t ea; // Associated address (from COLOR_ADDR anchor) - std::string comment; // User comment at this ea (from restore_user_cmts) - item_preciser_t comment_placement = ITP_SEMI; // Comment placement type -}; - -// Local variable data -struct LvarInfo { - ea_t func_addr; - int idx; - std::string name; - std::string type; - std::string comment; - int size; - bool is_arg; - bool is_result; - bool is_stk_var; - bool is_reg_var; - sval_t stkoff; - mreg_t mreg; -}; - -// Local variable rename result with explicit post-apply observability. -struct LvarRenameResult { - bool success = false; // Operation executed without internal API failure - bool applied = false; // Observed name changed to requested target - ea_t func_addr = BADADDR; - int lvar_idx = -1; - std::string target_name; // Original name selector (for by-name API) - std::string requested_name; // Requested new name - std::string before_name; // Name before mutation - std::string after_name; // Name after mutation/readback - std::string reason; // not_found, ambiguous_name, unchanged, not_nameable, etc. - std::vector warnings; -}; - -// Ctree item data -struct CtreeItem { - ea_t func_addr; - int item_id; - bool is_expr; - int op; - std::string op_name; - ea_t ea; - int parent_id; - int depth; - int x_id, y_id, z_id; - int cond_id, then_id, else_id; - int body_id, init_id, step_id; - int var_idx; - ea_t obj_ea; - int64_t num_value; - std::string str_value; - std::string helper_name; - int member_offset; - std::string var_name; - bool var_is_stk, var_is_reg, var_is_arg; - std::string obj_name; - - CtreeItem() : func_addr(0), item_id(-1), is_expr(false), op(0), ea(BADADDR), - parent_id(-1), depth(0), - x_id(-1), y_id(-1), z_id(-1), - cond_id(-1), then_id(-1), else_id(-1), - body_id(-1), init_id(-1), step_id(-1), - var_idx(-1), obj_ea(BADADDR), num_value(0), member_offset(0), - var_is_stk(false), var_is_reg(false), var_is_arg(false) {} -}; - -// Call argument data -struct CallArgInfo { - ea_t func_addr; - int call_item_id; - ea_t call_ea; - std::string call_obj_name; - std::string call_helper_name; - int arg_idx; - int arg_item_id; - std::string arg_op; - int arg_var_idx; - std::string arg_var_name; - bool arg_var_is_stk; - bool arg_var_is_arg; - ea_t arg_obj_ea; - std::string arg_obj_name; - int64_t arg_num_value; - std::string arg_str_value; - - CallArgInfo() : func_addr(0), call_item_id(-1), call_ea(BADADDR), arg_idx(-1), arg_item_id(-1), - arg_var_idx(-1), arg_var_is_stk(false), arg_var_is_arg(false), - arg_obj_ea(BADADDR), arg_num_value(0) {} -}; - // ============================================================================ // Helper Functions // ============================================================================ -// Get full ctype name with cot_/cit_ prefix -inline std::string get_full_ctype_name(ctype_t op) { +std::string get_full_ctype_name(ctype_t op) { const char* name = get_ctype_name(op); if (!name || !name[0]) return ""; if (op < cit_empty) { @@ -228,9 +85,7 @@ inline std::string get_full_ctype_name(ctype_t op) { } } -// Extract the first COLOR_ADDR anchor ea from a raw pseudocode line. -// Returns BADADDR if no anchor found. -inline ea_t extract_line_ea(cfunc_t* cfunc, const qstring& raw_line) { +ea_t extract_line_ea(cfunc_t* cfunc, const qstring& raw_line) { const char* p = raw_line.c_str(); while (*p) { if (*p == COLOR_ON && *(p + 1) == COLOR_ADDR) { @@ -256,8 +111,11 @@ inline ea_t extract_line_ea(cfunc_t* cfunc, const qstring& raw_line) { return BADADDR; } -// Collect pseudocode for a single function -inline bool collect_pseudocode(std::vector& lines, ea_t func_addr) { +// ============================================================================ +// Collect Functions +// ============================================================================ + +bool collect_pseudocode(std::vector& lines, ea_t func_addr) { lines.clear(); if (!hexrays_available()) return false; @@ -307,8 +165,7 @@ inline bool collect_pseudocode(std::vector& lines, ea_t func_add return true; } -// Collect pseudocode for all functions -inline void collect_all_pseudocode(std::vector& lines) { +void collect_all_pseudocode(std::vector& lines) { lines.clear(); if (!hexrays_available()) return; @@ -325,8 +182,7 @@ inline void collect_all_pseudocode(std::vector& lines) { } } -// Collect lvars for a single function -inline bool collect_lvars(std::vector& vars, ea_t func_addr) { +bool collect_lvars(std::vector& vars, ea_t func_addr) { vars.clear(); if (!hexrays_available()) return false; @@ -368,8 +224,7 @@ inline bool collect_lvars(std::vector& vars, ea_t func_addr) { return true; } -// Collect lvars for all functions -inline void collect_all_lvars(std::vector& vars) { +void collect_all_lvars(std::vector& vars) { vars.clear(); if (!hexrays_available()) return; @@ -386,202 +241,205 @@ inline void collect_all_lvars(std::vector& vars) { } } -// Ctree collector visitor -struct ctree_collector_t : public ctree_parentee_t { - std::vector& items; - std::map item_ids; - cfunc_t* cfunc; - ea_t func_addr; - int next_id; - - ctree_collector_t(std::vector& items_, cfunc_t* cfunc_, ea_t func_addr_) - : ctree_parentee_t(false), items(items_), cfunc(cfunc_), func_addr(func_addr_), next_id(0) {} +// ============================================================================ +// Ctree Collector +// ============================================================================ - int idaapi visit_insn(cinsn_t* insn) override { - int my_id = next_id++; - item_ids[insn] = my_id; +ctree_collector_t::ctree_collector_t(std::vector& items_, cfunc_t* cfunc_, ea_t func_addr_) + : ctree_parentee_t(false), items(items_), cfunc(cfunc_), func_addr(func_addr_), next_id(0) {} + +int idaapi ctree_collector_t::visit_insn(cinsn_t* insn) { + int my_id = next_id++; + item_ids[insn] = my_id; + + CtreeItem ci; + ci.func_addr = func_addr; + ci.item_id = my_id; + ci.is_expr = false; + ci.op = insn->op; + ci.op_name = get_full_ctype_name(insn->op); + ci.ea = insn->ea; + ci.label_num = insn->label_num; + ci.depth = parents.size(); + if (insn->op == cit_goto && insn->cgoto != nullptr) { + ci.goto_label_num = insn->cgoto->label_num; + } - CtreeItem ci; - ci.func_addr = func_addr; - ci.item_id = my_id; - ci.is_expr = false; - ci.op = insn->op; - ci.op_name = get_full_ctype_name(insn->op); - ci.ea = insn->ea; - ci.depth = parents.size(); + citem_t* p = parent_item(); + if (p) { + auto it = item_ids.find(p); + if (it != item_ids.end()) ci.parent_id = it->second; + } - citem_t* p = parent_item(); - if (p) { - auto it = item_ids.find(p); - if (it != item_ids.end()) ci.parent_id = it->second; - } + items.push_back(ci); + return 0; +} - items.push_back(ci); - return 0; +int idaapi ctree_collector_t::visit_expr(cexpr_t* expr) { + int my_id = next_id++; + item_ids[expr] = my_id; + + CtreeItem ci; + ci.func_addr = func_addr; + ci.item_id = my_id; + ci.is_expr = true; + ci.op = expr->op; + ci.op_name = get_full_ctype_name(expr->op); + ci.ea = expr->ea; + ci.label_num = expr->label_num; + ci.depth = parents.size(); + + citem_t* p = parent_item(); + if (p) { + auto it = item_ids.find(p); + if (it != item_ids.end()) ci.parent_id = it->second; } - int idaapi visit_expr(cexpr_t* expr) override { - int my_id = next_id++; - item_ids[expr] = my_id; + switch (expr->op) { + case cot_var: + ci.var_idx = expr->v.idx; + if (cfunc && ci.var_idx >= 0 && ci.var_idx < cfunc->get_lvars()->size()) { + const lvar_t& lv = (*cfunc->get_lvars())[ci.var_idx]; + ci.var_name = lv.name.c_str(); + ci.var_is_stk = lv.is_stk_var(); + ci.var_is_reg = lv.is_reg_var(); + ci.var_is_arg = lv.is_arg_var(); + } + break; + case cot_obj: + ci.obj_ea = expr->obj_ea; + { + qstring name; + if (get_name(&name, expr->obj_ea) > 0) { + ci.obj_name = name.c_str(); + } + } + break; + case cot_num: + ci.num_value = expr->numval(); + break; + case cot_str: + if (expr->string) ci.str_value = expr->string; + break; + case cot_helper: + if (expr->helper) ci.helper_name = expr->helper; + break; + case cot_memref: + case cot_memptr: + ci.member_offset = expr->m; + break; + default: + break; + } - CtreeItem ci; - ci.func_addr = func_addr; - ci.item_id = my_id; - ci.is_expr = true; - ci.op = expr->op; - ci.op_name = get_full_ctype_name(expr->op); - ci.ea = expr->ea; - ci.depth = parents.size(); + items.push_back(ci); + return 0; +} - citem_t* p = parent_item(); - if (p) { - auto it = item_ids.find(p); - if (it != item_ids.end()) ci.parent_id = it->second; - } +void ctree_collector_t::resolve_child_ids() { + for (auto& ci : items) { + if (ci.item_id < 0) continue; - switch (expr->op) { - case cot_var: - ci.var_idx = expr->v.idx; - if (cfunc && ci.var_idx >= 0 && ci.var_idx < cfunc->get_lvars()->size()) { - const lvar_t& lv = (*cfunc->get_lvars())[ci.var_idx]; - ci.var_name = lv.name.c_str(); - ci.var_is_stk = lv.is_stk_var(); - ci.var_is_reg = lv.is_reg_var(); - ci.var_is_arg = lv.is_arg_var(); - } - break; - case cot_obj: - ci.obj_ea = expr->obj_ea; - { - qstring name; - if (get_name(&name, expr->obj_ea) > 0) { - ci.obj_name = name.c_str(); - } - } - break; - case cot_num: - ci.num_value = expr->numval(); - break; - case cot_str: - if (expr->string) ci.str_value = expr->string; - break; - case cot_helper: - if (expr->helper) ci.helper_name = expr->helper; - break; - case cot_memref: - case cot_memptr: - ci.member_offset = expr->m; - break; - default: + citem_t* item = nullptr; + for (auto& kv : item_ids) { + if (kv.second == ci.item_id) { + item = kv.first; break; + } } + if (!item) continue; - items.push_back(ci); - return 0; - } - - void resolve_child_ids() { - for (auto& ci : items) { - if (ci.item_id < 0) continue; + if (ci.is_expr) { + cexpr_t* expr = static_cast(item); - citem_t* item = nullptr; - for (auto& kv : item_ids) { - if (kv.second == ci.item_id) { - item = kv.first; - break; - } + if (expr->x) { + auto it = item_ids.find(expr->x); + if (it != item_ids.end()) ci.x_id = it->second; } - if (!item) continue; - - if (ci.is_expr) { - cexpr_t* expr = static_cast(item); - - if (expr->x) { - auto it = item_ids.find(expr->x); - if (it != item_ids.end()) ci.x_id = it->second; - } - if (expr->y && expr->op != cot_call) { - auto it = item_ids.find(expr->y); - if (it != item_ids.end()) ci.y_id = it->second; - } - if (expr->z) { - auto it = item_ids.find(expr->z); - if (it != item_ids.end()) ci.z_id = it->second; - } - } else { - cinsn_t* insn = static_cast(item); - - switch (insn->op) { - case cit_if: - if (insn->cif) { - auto cond_it = item_ids.find(&insn->cif->expr); - if (cond_it != item_ids.end()) ci.cond_id = cond_it->second; - if (insn->cif->ithen) { - auto it = item_ids.find(insn->cif->ithen); - if (it != item_ids.end()) ci.then_id = it->second; - } - if (insn->cif->ielse) { - auto it = item_ids.find(insn->cif->ielse); - if (it != item_ids.end()) ci.else_id = it->second; - } - } - break; - case cit_for: - if (insn->cfor) { - auto cond_it = item_ids.find(&insn->cfor->expr); - if (cond_it != item_ids.end()) ci.cond_id = cond_it->second; - auto init_it = item_ids.find(&insn->cfor->init); - if (init_it != item_ids.end()) ci.init_id = init_it->second; - auto step_it = item_ids.find(&insn->cfor->step); - if (step_it != item_ids.end()) ci.step_id = step_it->second; - if (insn->cfor->body) { - auto it = item_ids.find(insn->cfor->body); - if (it != item_ids.end()) ci.body_id = it->second; - } + if (expr->y && expr->op != cot_call) { + auto it = item_ids.find(expr->y); + if (it != item_ids.end()) ci.y_id = it->second; + } + if (expr->z) { + auto it = item_ids.find(expr->z); + if (it != item_ids.end()) ci.z_id = it->second; + } + } else { + cinsn_t* insn = static_cast(item); + + switch (insn->op) { + case cit_if: + if (insn->cif) { + auto cond_it = item_ids.find(&insn->cif->expr); + if (cond_it != item_ids.end()) ci.cond_id = cond_it->second; + if (insn->cif->ithen) { + auto it = item_ids.find(insn->cif->ithen); + if (it != item_ids.end()) ci.then_id = it->second; } - break; - case cit_while: - if (insn->cwhile) { - auto cond_it = item_ids.find(&insn->cwhile->expr); - if (cond_it != item_ids.end()) ci.cond_id = cond_it->second; - if (insn->cwhile->body) { - auto it = item_ids.find(insn->cwhile->body); - if (it != item_ids.end()) ci.body_id = it->second; - } + if (insn->cif->ielse) { + auto it = item_ids.find(insn->cif->ielse); + if (it != item_ids.end()) ci.else_id = it->second; } - break; - case cit_do: - if (insn->cdo) { - auto cond_it = item_ids.find(&insn->cdo->expr); - if (cond_it != item_ids.end()) ci.cond_id = cond_it->second; - if (insn->cdo->body) { - auto it = item_ids.find(insn->cdo->body); - if (it != item_ids.end()) ci.body_id = it->second; - } + } + break; + case cit_for: + if (insn->cfor) { + auto cond_it = item_ids.find(&insn->cfor->expr); + if (cond_it != item_ids.end()) ci.cond_id = cond_it->second; + auto init_it = item_ids.find(&insn->cfor->init); + if (init_it != item_ids.end()) ci.init_id = init_it->second; + auto step_it = item_ids.find(&insn->cfor->step); + if (step_it != item_ids.end()) ci.step_id = step_it->second; + if (insn->cfor->body) { + auto it = item_ids.find(insn->cfor->body); + if (it != item_ids.end()) ci.body_id = it->second; } - break; - case cit_return: - if (insn->creturn) { - auto it = item_ids.find(&insn->creturn->expr); - if (it != item_ids.end()) ci.x_id = it->second; + } + break; + case cit_while: + if (insn->cwhile) { + auto cond_it = item_ids.find(&insn->cwhile->expr); + if (cond_it != item_ids.end()) ci.cond_id = cond_it->second; + if (insn->cwhile->body) { + auto it = item_ids.find(insn->cwhile->body); + if (it != item_ids.end()) ci.body_id = it->second; } - break; - case cit_expr: - if (insn->cexpr) { - auto it = item_ids.find(insn->cexpr); - if (it != item_ids.end()) ci.x_id = it->second; + } + break; + case cit_do: + if (insn->cdo) { + auto cond_it = item_ids.find(&insn->cdo->expr); + if (cond_it != item_ids.end()) ci.cond_id = cond_it->second; + if (insn->cdo->body) { + auto it = item_ids.find(insn->cdo->body); + if (it != item_ids.end()) ci.body_id = it->second; } - break; - default: - break; - } + } + break; + case cit_return: + if (insn->creturn) { + auto it = item_ids.find(&insn->creturn->expr); + if (it != item_ids.end()) ci.x_id = it->second; + } + break; + case cit_expr: + if (insn->cexpr) { + auto it = item_ids.find(insn->cexpr); + if (it != item_ids.end()) ci.x_id = it->second; + } + break; + default: + break; } } } -}; +} + +// ============================================================================ +// Ctree / Call Args Collect +// ============================================================================ -// Collect ctree items for a single function -inline bool collect_ctree(std::vector& items, ea_t func_addr) { +bool collect_ctree(std::vector& items, ea_t func_addr) { items.clear(); if (!hexrays_available()) return false; @@ -600,8 +458,7 @@ inline bool collect_ctree(std::vector& items, ea_t func_addr) { return true; } -// Collect ctree for all functions -inline void collect_all_ctree(std::vector& items) { +void collect_all_ctree(std::vector& items) { items.clear(); if (!hexrays_available()) return; @@ -618,100 +475,191 @@ inline void collect_all_ctree(std::vector& items) { } } -// Call args collector visitor -struct call_args_collector_t : public ctree_parentee_t { - std::vector& args; - std::map item_ids; - cfunc_t* cfunc; - ea_t func_addr; - int next_id; +static std::string default_label_name(int label_num) { + return "LABEL_" + std::to_string(label_num); +} + +bool collect_ctree_labels(std::vector& rows, ea_t func_addr) { + rows.clear(); - call_args_collector_t(std::vector& args_, cfunc_t* cfunc_, ea_t func_addr_) - : ctree_parentee_t(false), args(args_), cfunc(cfunc_), func_addr(func_addr_), next_id(0) {} + if (!hexrays_available()) return false; - int idaapi visit_insn(cinsn_t* insn) override { - item_ids[insn] = next_id++; - return 0; + func_t* f = get_func(func_addr); + if (!f) return false; + + hexrays_failure_t hf; + cfuncptr_t cfunc = decompile(f, &hf); + if (!cfunc) return false; + + std::vector items; + if (!collect_ctree(items, func_addr)) { + return false; } - int idaapi visit_expr(cexpr_t* expr) override { - int my_id = next_id++; - item_ids[expr] = my_id; + std::map label_map; + for (const CtreeItem& item : items) { + if (item.label_num < 0) continue; + auto it = label_map.find(item.label_num); + if (it != label_map.end()) continue; + + CtreeLabelInfo info; + info.func_addr = func_addr; + info.label_num = item.label_num; + info.name = default_label_name(item.label_num); + info.item_id = item.item_id; + info.item_ea = item.ea; + info.is_user_defined = false; + label_map[item.label_num] = std::move(info); + } - if (expr->op == cot_call && expr->a) { - std::string call_obj_name; - std::string call_helper_name; - if (expr->x != nullptr) { - if (expr->x->op == cot_obj) { - qstring name; - if (get_name(&name, expr->x->obj_ea) > 0) { - call_obj_name = name.c_str(); + user_labels_t* user_labels = restore_user_labels(func_addr, &*cfunc); + if (user_labels != nullptr) { + for (auto it = user_labels_begin(user_labels); it != user_labels_end(user_labels); it = user_labels_next(it)) { + const int label_num = user_labels_first(it); + const qstring& user_name = user_labels_second(it); + + auto dst = label_map.find(label_num); + if (dst == label_map.end()) { + CtreeLabelInfo info; + info.func_addr = func_addr; + info.label_num = label_num; + info.item_id = -1; + info.item_ea = BADADDR; + info.name = default_label_name(label_num); + info.is_user_defined = false; + + citem_t* label_item = cfunc->find_label(label_num); + if (label_item != nullptr) { + info.item_ea = label_item->ea; + for (const CtreeItem& item : items) { + if (item.label_num == label_num) { + info.item_id = item.item_id; + break; + } } - } else if (expr->x->op == cot_helper && expr->x->helper != nullptr) { - call_helper_name = expr->x->helper; } + + dst = label_map.insert({label_num, info}).first; } - carglist_t& arglist = *expr->a; - for (size_t i = 0; i < arglist.size(); i++) { - const carg_t& arg = arglist[i]; - - CallArgInfo ai; - ai.func_addr = func_addr; - ai.call_item_id = my_id; - ai.call_ea = expr->ea; - ai.call_obj_name = call_obj_name; - ai.call_helper_name = call_helper_name; - ai.arg_idx = i; - ai.arg_op = get_full_ctype_name(arg.op); - - auto it = item_ids.find((citem_t*)&arg); - if (it != item_ids.end()) { - ai.arg_item_id = it->second; - } else { - ai.arg_item_id = next_id++; - item_ids[(citem_t*)&arg] = ai.arg_item_id; - } + dst->second.name = user_name.c_str(); + dst->second.is_user_defined = true; + } + user_labels_free(user_labels); + } - switch (arg.op) { - case cot_var: - ai.arg_var_idx = arg.v.idx; - if (cfunc && ai.arg_var_idx >= 0 && ai.arg_var_idx < cfunc->get_lvars()->size()) { - const lvar_t& lv = (*cfunc->get_lvars())[ai.arg_var_idx]; - ai.arg_var_name = lv.name.c_str(); - ai.arg_var_is_stk = lv.is_stk_var(); - ai.arg_var_is_arg = lv.is_arg_var(); - } - break; - case cot_obj: - ai.arg_obj_ea = arg.obj_ea; - { - qstring name; - if (get_name(&name, arg.obj_ea) > 0) { - ai.arg_obj_name = name.c_str(); - } - } - break; - case cot_num: - ai.arg_num_value = arg.numval(); - break; - case cot_str: - if (arg.string) ai.arg_str_value = arg.string; - break; - default: - break; - } + rows.reserve(label_map.size()); + for (const auto& kv : label_map) { + rows.push_back(kv.second); + } + return true; +} + +void collect_all_ctree_labels(std::vector& rows) { + rows.clear(); - args.push_back(ai); + if (!hexrays_available()) return; + + size_t func_qty = get_func_qty(); + for (size_t i = 0; i < func_qty; i++) { + func_t* f = getn_func(i); + if (!f) continue; + + std::vector func_rows; + if (collect_ctree_labels(func_rows, f->start_ea)) { + rows.insert(rows.end(), func_rows.begin(), func_rows.end()); + } + } +} + +// ============================================================================ +// Call Args Collector +// ============================================================================ + +call_args_collector_t::call_args_collector_t(std::vector& args_, cfunc_t* cfunc_, ea_t func_addr_) + : ctree_parentee_t(false), args(args_), cfunc(cfunc_), func_addr(func_addr_), next_id(0) {} + +int idaapi call_args_collector_t::visit_insn(cinsn_t* insn) { + item_ids[insn] = next_id++; + return 0; +} + +int idaapi call_args_collector_t::visit_expr(cexpr_t* expr) { + int my_id = next_id++; + item_ids[expr] = my_id; + + if (expr->op == cot_call && expr->a) { + std::string call_obj_name; + std::string call_helper_name; + if (expr->x != nullptr) { + if (expr->x->op == cot_obj) { + qstring name; + if (get_name(&name, expr->x->obj_ea) > 0) { + call_obj_name = name.c_str(); + } + } else if (expr->x->op == cot_helper && expr->x->helper != nullptr) { + call_helper_name = expr->x->helper; } } - return 0; + carglist_t& arglist = *expr->a; + for (size_t i = 0; i < arglist.size(); i++) { + const carg_t& arg = arglist[i]; + + CallArgInfo ai; + ai.func_addr = func_addr; + ai.call_item_id = my_id; + ai.call_ea = expr->ea; + ai.call_obj_name = call_obj_name; + ai.call_helper_name = call_helper_name; + ai.arg_idx = i; + ai.arg_op = get_full_ctype_name(arg.op); + + auto it = item_ids.find((citem_t*)&arg); + if (it != item_ids.end()) { + ai.arg_item_id = it->second; + } else { + ai.arg_item_id = next_id++; + item_ids[(citem_t*)&arg] = ai.arg_item_id; + } + + switch (arg.op) { + case cot_var: + ai.arg_var_idx = arg.v.idx; + if (cfunc && ai.arg_var_idx >= 0 && ai.arg_var_idx < cfunc->get_lvars()->size()) { + const lvar_t& lv = (*cfunc->get_lvars())[ai.arg_var_idx]; + ai.arg_var_name = lv.name.c_str(); + ai.arg_var_is_stk = lv.is_stk_var(); + ai.arg_var_is_arg = lv.is_arg_var(); + } + break; + case cot_obj: + ai.arg_obj_ea = arg.obj_ea; + { + qstring name; + if (get_name(&name, arg.obj_ea) > 0) { + ai.arg_obj_name = name.c_str(); + } + } + break; + case cot_num: + ai.arg_num_value = arg.numval(); + break; + case cot_str: + if (arg.string) ai.arg_str_value = arg.string; + break; + default: + break; + } + + args.push_back(ai); + } } -}; -// Collect call args for a single function -inline bool collect_call_args(std::vector& args, ea_t func_addr) { + return 0; +} + +bool collect_call_args(std::vector& args, ea_t func_addr) { args.clear(); if (!hexrays_available()) return false; @@ -729,8 +677,7 @@ inline bool collect_call_args(std::vector& args, ea_t func_addr) { return true; } -// Collect call args for all functions -inline void collect_all_call_args(std::vector& args) { +void collect_all_call_args(std::vector& args) { args.clear(); if (!hexrays_available()) return; @@ -747,446 +694,396 @@ inline void collect_all_call_args(std::vector& args) { } } -// ctree and ctree_call_args use streaming generator tables (GeneratorTableDef). - // ============================================================================ // Iterators for constraint pushdown // ============================================================================ -// Pseudocode iterator for single function -class PseudocodeInFuncIterator : public xsql::RowIterator { - std::vector lines_; - size_t idx_ = 0; - bool started_ = false; +// --- PseudocodeInFuncIterator --- -public: - explicit PseudocodeInFuncIterator(ea_t func_addr) { - collect_pseudocode(lines_, func_addr); - } +PseudocodeInFuncIterator::PseudocodeInFuncIterator(ea_t func_addr) { + collect_pseudocode(lines_, func_addr); +} - bool next() override { - if (!started_) { - started_ = true; - if (lines_.empty()) return false; - idx_ = 0; - return true; - } - if (idx_ + 1 < lines_.size()) { ++idx_; return true; } - idx_ = lines_.size(); - return false; +bool PseudocodeInFuncIterator::next() { + if (!started_) { + started_ = true; + if (lines_.empty()) return false; + idx_ = 0; + return true; } + if (idx_ + 1 < lines_.size()) { ++idx_; return true; } + idx_ = lines_.size(); + return false; +} - bool eof() const override { return started_ && idx_ >= lines_.size(); } - - void column(xsql::FunctionContext& ctx, int col) override { - if (idx_ >= lines_.size()) { ctx.result_null(); return; } - const auto& line = lines_[idx_]; - switch (col) { - case 0: ctx.result_int64(line.func_addr); break; - case 1: ctx.result_int(line.line_num); break; - case 2: ctx.result_text(line.text.c_str()); break; - case 3: - ctx.result_int64(line.ea != BADADDR ? line.ea : 0); - break; - case 4: - if (!line.comment.empty()) - ctx.result_text(line.comment.c_str()); - else - ctx.result_null(); - break; - case 5: - ctx.result_text_static(itp_to_name(line.comment_placement)); - break; - } +bool PseudocodeInFuncIterator::eof() const { return started_ && idx_ >= lines_.size(); } + +void PseudocodeInFuncIterator::column(xsql::FunctionContext& ctx, int col) { + if (idx_ >= lines_.size()) { ctx.result_null(); return; } + const auto& line = lines_[idx_]; + switch (col) { + case 0: ctx.result_int64(line.func_addr); break; + case 1: ctx.result_int(line.line_num); break; + case 2: ctx.result_text(line.text.c_str()); break; + case 3: + ctx.result_int64(line.ea != BADADDR ? line.ea : 0); + break; + case 4: + if (!line.comment.empty()) + ctx.result_text(line.comment.c_str()); + else + ctx.result_null(); + break; + case 5: + ctx.result_text_static(itp_to_name(line.comment_placement)); + break; } +} - int64_t rowid() const override { return static_cast(idx_); } -}; +int64_t PseudocodeInFuncIterator::rowid() const { return static_cast(idx_); } -// Pseudocode iterator for a single mapped address -class PseudocodeAtEaIterator : public xsql::RowIterator { - std::vector lines_; - size_t idx_ = 0; - bool started_ = false; +// --- PseudocodeAtEaIterator --- -public: - explicit PseudocodeAtEaIterator(ea_t ea) { - func_t* f = get_func(ea); - if (!f) return; +PseudocodeAtEaIterator::PseudocodeAtEaIterator(ea_t ea) { + func_t* f = get_func(ea); + if (!f) return; - std::vector all; - collect_pseudocode(all, f->start_ea); - for (const auto& line : all) { - if (line.ea == ea) { - lines_.push_back(line); - } + std::vector all; + collect_pseudocode(all, f->start_ea); + for (const auto& line : all) { + if (line.ea == ea) { + lines_.push_back(line); } } +} - bool next() override { - if (!started_) { - started_ = true; - if (lines_.empty()) return false; - idx_ = 0; - return true; - } - if (idx_ + 1 < lines_.size()) { ++idx_; return true; } - idx_ = lines_.size(); - return false; +bool PseudocodeAtEaIterator::next() { + if (!started_) { + started_ = true; + if (lines_.empty()) return false; + idx_ = 0; + return true; } + if (idx_ + 1 < lines_.size()) { ++idx_; return true; } + idx_ = lines_.size(); + return false; +} - bool eof() const override { return started_ && idx_ >= lines_.size(); } - - void column(xsql::FunctionContext& ctx, int col) override { - if (idx_ >= lines_.size()) { ctx.result_null(); return; } - const auto& line = lines_[idx_]; - switch (col) { - case 0: ctx.result_int64(line.func_addr); break; - case 1: ctx.result_int(line.line_num); break; - case 2: ctx.result_text(line.text.c_str()); break; - case 3: ctx.result_int64(line.ea != BADADDR ? line.ea : 0); break; - case 4: - if (!line.comment.empty()) ctx.result_text(line.comment.c_str()); - else ctx.result_null(); - break; - case 5: ctx.result_text_static(itp_to_name(line.comment_placement)); break; - default: ctx.result_null(); break; - } +bool PseudocodeAtEaIterator::eof() const { return started_ && idx_ >= lines_.size(); } + +void PseudocodeAtEaIterator::column(xsql::FunctionContext& ctx, int col) { + if (idx_ >= lines_.size()) { ctx.result_null(); return; } + const auto& line = lines_[idx_]; + switch (col) { + case 0: ctx.result_int64(line.func_addr); break; + case 1: ctx.result_int(line.line_num); break; + case 2: ctx.result_text(line.text.c_str()); break; + case 3: ctx.result_int64(line.ea != BADADDR ? line.ea : 0); break; + case 4: + if (!line.comment.empty()) ctx.result_text(line.comment.c_str()); + else ctx.result_null(); + break; + case 5: ctx.result_text_static(itp_to_name(line.comment_placement)); break; + default: ctx.result_null(); break; } +} - int64_t rowid() const override { return static_cast(idx_); } -}; +int64_t PseudocodeAtEaIterator::rowid() const { return static_cast(idx_); } -// Pseudocode iterator for line number across all functions -class PseudocodeLineNumIterator : public xsql::RowIterator { - std::vector lines_; - size_t idx_ = 0; - bool started_ = false; +// --- PseudocodeLineNumIterator --- -public: - explicit PseudocodeLineNumIterator(int line_num) { - if (line_num < 0) return; +PseudocodeLineNumIterator::PseudocodeLineNumIterator(int line_num) { + if (line_num < 0) return; - size_t func_qty = get_func_qty(); - for (size_t i = 0; i < func_qty; ++i) { - func_t* f = getn_func(i); - if (!f) continue; + size_t func_qty = get_func_qty(); + for (size_t i = 0; i < func_qty; ++i) { + func_t* f = getn_func(i); + if (!f) continue; - std::vector func_lines; - collect_pseudocode(func_lines, f->start_ea); - for (const auto& line : func_lines) { - if (line.line_num == line_num) { - lines_.push_back(line); - } + std::vector func_lines; + collect_pseudocode(func_lines, f->start_ea); + for (const auto& line : func_lines) { + if (line.line_num == line_num) { + lines_.push_back(line); } } } +} - bool next() override { - if (!started_) { - started_ = true; - if (lines_.empty()) return false; - idx_ = 0; - return true; - } - if (idx_ + 1 < lines_.size()) { ++idx_; return true; } - idx_ = lines_.size(); - return false; +bool PseudocodeLineNumIterator::next() { + if (!started_) { + started_ = true; + if (lines_.empty()) return false; + idx_ = 0; + return true; } + if (idx_ + 1 < lines_.size()) { ++idx_; return true; } + idx_ = lines_.size(); + return false; +} - bool eof() const override { return started_ && idx_ >= lines_.size(); } - - void column(xsql::FunctionContext& ctx, int col) override { - if (idx_ >= lines_.size()) { ctx.result_null(); return; } - const auto& line = lines_[idx_]; - switch (col) { - case 0: ctx.result_int64(line.func_addr); break; - case 1: ctx.result_int(line.line_num); break; - case 2: ctx.result_text(line.text.c_str()); break; - case 3: ctx.result_int64(line.ea != BADADDR ? line.ea : 0); break; - case 4: - if (!line.comment.empty()) ctx.result_text(line.comment.c_str()); - else ctx.result_null(); - break; - case 5: ctx.result_text_static(itp_to_name(line.comment_placement)); break; - default: ctx.result_null(); break; - } +bool PseudocodeLineNumIterator::eof() const { return started_ && idx_ >= lines_.size(); } + +void PseudocodeLineNumIterator::column(xsql::FunctionContext& ctx, int col) { + if (idx_ >= lines_.size()) { ctx.result_null(); return; } + const auto& line = lines_[idx_]; + switch (col) { + case 0: ctx.result_int64(line.func_addr); break; + case 1: ctx.result_int(line.line_num); break; + case 2: ctx.result_text(line.text.c_str()); break; + case 3: ctx.result_int64(line.ea != BADADDR ? line.ea : 0); break; + case 4: + if (!line.comment.empty()) ctx.result_text(line.comment.c_str()); + else ctx.result_null(); + break; + case 5: ctx.result_text_static(itp_to_name(line.comment_placement)); break; + default: ctx.result_null(); break; } +} - int64_t rowid() const override { return static_cast(idx_); } -}; +int64_t PseudocodeLineNumIterator::rowid() const { return static_cast(idx_); } -// Lvars iterator for single function -class LvarsInFuncIterator : public xsql::RowIterator { - std::vector vars_; - size_t idx_ = 0; - bool started_ = false; +// --- LvarsInFuncIterator --- -public: - explicit LvarsInFuncIterator(ea_t func_addr) { - collect_lvars(vars_, func_addr); - } +LvarsInFuncIterator::LvarsInFuncIterator(ea_t func_addr) { + collect_lvars(vars_, func_addr); +} - bool next() override { - if (!started_) { - started_ = true; - if (vars_.empty()) return false; - idx_ = 0; - return true; - } - if (idx_ + 1 < vars_.size()) { ++idx_; return true; } - idx_ = vars_.size(); - return false; +bool LvarsInFuncIterator::next() { + if (!started_) { + started_ = true; + if (vars_.empty()) return false; + idx_ = 0; + return true; } + if (idx_ + 1 < vars_.size()) { ++idx_; return true; } + idx_ = vars_.size(); + return false; +} - bool eof() const override { return started_ && idx_ >= vars_.size(); } - - void column(xsql::FunctionContext& ctx, int col) override { - if (idx_ >= vars_.size()) { ctx.result_null(); return; } - const auto& v = vars_[idx_]; - switch (col) { - case 0: ctx.result_int64(v.func_addr); break; - case 1: ctx.result_int(v.idx); break; - case 2: ctx.result_text(v.name.c_str()); break; - case 3: ctx.result_text(v.type.c_str()); break; - case 4: - if (!v.comment.empty()) ctx.result_text(v.comment.c_str()); - else ctx.result_null(); - break; - case 5: ctx.result_int(v.size); break; - case 6: ctx.result_int(v.is_arg ? 1 : 0); break; - case 7: ctx.result_int(v.is_result ? 1 : 0); break; - case 8: ctx.result_int(v.is_stk_var ? 1 : 0); break; - case 9: ctx.result_int(v.is_reg_var ? 1 : 0); break; - case 10: v.is_stk_var ? ctx.result_int64(v.stkoff) : ctx.result_null(); break; - case 11: v.is_reg_var ? ctx.result_int(v.mreg) : ctx.result_null(); break; - default: ctx.result_null(); break; - } +bool LvarsInFuncIterator::eof() const { return started_ && idx_ >= vars_.size(); } + +void LvarsInFuncIterator::column(xsql::FunctionContext& ctx, int col) { + if (idx_ >= vars_.size()) { ctx.result_null(); return; } + const auto& v = vars_[idx_]; + switch (col) { + case 0: ctx.result_int64(v.func_addr); break; + case 1: ctx.result_int(v.idx); break; + case 2: ctx.result_text(v.name.c_str()); break; + case 3: ctx.result_text(v.type.c_str()); break; + case 4: + if (!v.comment.empty()) ctx.result_text(v.comment.c_str()); + else ctx.result_null(); + break; + case 5: ctx.result_int(v.size); break; + case 6: ctx.result_int(v.is_arg ? 1 : 0); break; + case 7: ctx.result_int(v.is_result ? 1 : 0); break; + case 8: ctx.result_int(v.is_stk_var ? 1 : 0); break; + case 9: ctx.result_int(v.is_reg_var ? 1 : 0); break; + case 10: v.is_stk_var ? ctx.result_int64(v.stkoff) : ctx.result_null(); break; + case 11: v.is_reg_var ? ctx.result_int(v.mreg) : ctx.result_null(); break; + default: ctx.result_null(); break; } +} - int64_t rowid() const override { return static_cast(idx_); } -}; +int64_t LvarsInFuncIterator::rowid() const { return static_cast(idx_); } -// Ctree iterator for single function -class CtreeInFuncIterator : public xsql::RowIterator { - std::vector items_; - size_t idx_ = 0; - bool started_ = false; +// --- CtreeInFuncIterator --- -public: - explicit CtreeInFuncIterator(ea_t func_addr) { - collect_ctree(items_, func_addr); - } +CtreeInFuncIterator::CtreeInFuncIterator(ea_t func_addr) { + collect_ctree(items_, func_addr); +} - bool next() override { - if (!started_) { - started_ = true; - if (items_.empty()) return false; - idx_ = 0; - return true; - } - if (idx_ + 1 < items_.size()) { ++idx_; return true; } - idx_ = items_.size(); - return false; +bool CtreeInFuncIterator::next() { + if (!started_) { + started_ = true; + if (items_.empty()) return false; + idx_ = 0; + return true; } + if (idx_ + 1 < items_.size()) { ++idx_; return true; } + idx_ = items_.size(); + return false; +} - bool eof() const override { return started_ && idx_ >= items_.size(); } - - void column(xsql::FunctionContext& ctx, int col) override { - if (idx_ >= items_.size()) { ctx.result_null(); return; } - const auto& item = items_[idx_]; - switch (col) { - case 0: ctx.result_int64(item.func_addr); break; - case 1: ctx.result_int(item.item_id); break; - case 2: ctx.result_int(item.is_expr ? 1 : 0); break; - case 3: ctx.result_int(item.op); break; - case 4: ctx.result_text(item.op_name.c_str()); break; - case 5: item.ea != BADADDR ? ctx.result_int64(item.ea) : ctx.result_null(); break; - case 6: item.parent_id >= 0 ? ctx.result_int(item.parent_id) : ctx.result_null(); break; - case 7: ctx.result_int(item.depth); break; - case 8: item.x_id >= 0 ? ctx.result_int(item.x_id) : ctx.result_null(); break; - case 9: item.y_id >= 0 ? ctx.result_int(item.y_id) : ctx.result_null(); break; - case 10: item.z_id >= 0 ? ctx.result_int(item.z_id) : ctx.result_null(); break; - case 11: item.cond_id >= 0 ? ctx.result_int(item.cond_id) : ctx.result_null(); break; - case 12: item.then_id >= 0 ? ctx.result_int(item.then_id) : ctx.result_null(); break; - case 13: item.else_id >= 0 ? ctx.result_int(item.else_id) : ctx.result_null(); break; - case 14: item.body_id >= 0 ? ctx.result_int(item.body_id) : ctx.result_null(); break; - case 15: item.init_id >= 0 ? ctx.result_int(item.init_id) : ctx.result_null(); break; - case 16: item.step_id >= 0 ? ctx.result_int(item.step_id) : ctx.result_null(); break; - case 17: item.var_idx >= 0 ? ctx.result_int(item.var_idx) : ctx.result_null(); break; - case 18: item.obj_ea != BADADDR ? ctx.result_int64(item.obj_ea) : ctx.result_null(); break; - case 19: item.op == cot_num ? ctx.result_int64(item.num_value) : ctx.result_null(); break; - case 20: !item.str_value.empty() ? ctx.result_text(item.str_value.c_str()) : ctx.result_null(); break; - case 21: !item.helper_name.empty() ? ctx.result_text(item.helper_name.c_str()) : ctx.result_null(); break; - case 22: (item.op == cot_memref || item.op == cot_memptr) ? ctx.result_int(item.member_offset) : ctx.result_null(); break; - case 23: !item.var_name.empty() ? ctx.result_text(item.var_name.c_str()) : ctx.result_null(); break; - case 24: item.op == cot_var ? ctx.result_int(item.var_is_stk ? 1 : 0) : ctx.result_null(); break; - case 25: item.op == cot_var ? ctx.result_int(item.var_is_reg ? 1 : 0) : ctx.result_null(); break; - case 26: item.op == cot_var ? ctx.result_int(item.var_is_arg ? 1 : 0) : ctx.result_null(); break; - case 27: !item.obj_name.empty() ? ctx.result_text(item.obj_name.c_str()) : ctx.result_null(); break; - } +bool CtreeInFuncIterator::eof() const { return started_ && idx_ >= items_.size(); } + +void CtreeInFuncIterator::column(xsql::FunctionContext& ctx, int col) { + if (idx_ >= items_.size()) { ctx.result_null(); return; } + const auto& item = items_[idx_]; + switch (col) { + case 0: ctx.result_int64(item.func_addr); break; + case 1: ctx.result_int(item.item_id); break; + case 2: ctx.result_int(item.is_expr ? 1 : 0); break; + case 3: ctx.result_int(item.op); break; + case 4: ctx.result_text(item.op_name.c_str()); break; + case 5: item.ea != BADADDR ? ctx.result_int64(item.ea) : ctx.result_null(); break; + case 6: item.parent_id >= 0 ? ctx.result_int(item.parent_id) : ctx.result_null(); break; + case 7: ctx.result_int(item.depth); break; + case 8: item.x_id >= 0 ? ctx.result_int(item.x_id) : ctx.result_null(); break; + case 9: item.y_id >= 0 ? ctx.result_int(item.y_id) : ctx.result_null(); break; + case 10: item.z_id >= 0 ? ctx.result_int(item.z_id) : ctx.result_null(); break; + case 11: item.cond_id >= 0 ? ctx.result_int(item.cond_id) : ctx.result_null(); break; + case 12: item.then_id >= 0 ? ctx.result_int(item.then_id) : ctx.result_null(); break; + case 13: item.else_id >= 0 ? ctx.result_int(item.else_id) : ctx.result_null(); break; + case 14: item.body_id >= 0 ? ctx.result_int(item.body_id) : ctx.result_null(); break; + case 15: item.init_id >= 0 ? ctx.result_int(item.init_id) : ctx.result_null(); break; + case 16: item.step_id >= 0 ? ctx.result_int(item.step_id) : ctx.result_null(); break; + case 17: item.var_idx >= 0 ? ctx.result_int(item.var_idx) : ctx.result_null(); break; + case 18: item.obj_ea != BADADDR ? ctx.result_int64(item.obj_ea) : ctx.result_null(); break; + case 19: item.op == cot_num ? ctx.result_int64(item.num_value) : ctx.result_null(); break; + case 20: !item.str_value.empty() ? ctx.result_text(item.str_value.c_str()) : ctx.result_null(); break; + case 21: !item.helper_name.empty() ? ctx.result_text(item.helper_name.c_str()) : ctx.result_null(); break; + case 22: (item.op == cot_memref || item.op == cot_memptr) ? ctx.result_int(item.member_offset) : ctx.result_null(); break; + case 23: !item.var_name.empty() ? ctx.result_text(item.var_name.c_str()) : ctx.result_null(); break; + case 24: item.op == cot_var ? ctx.result_int(item.var_is_stk ? 1 : 0) : ctx.result_null(); break; + case 25: item.op == cot_var ? ctx.result_int(item.var_is_reg ? 1 : 0) : ctx.result_null(); break; + case 26: item.op == cot_var ? ctx.result_int(item.var_is_arg ? 1 : 0) : ctx.result_null(); break; + case 27: !item.obj_name.empty() ? ctx.result_text(item.obj_name.c_str()) : ctx.result_null(); break; + case 28: item.label_num >= 0 ? ctx.result_int(item.label_num) : ctx.result_null(); break; + case 29: item.goto_label_num >= 0 ? ctx.result_int(item.goto_label_num) : ctx.result_null(); break; + default: ctx.result_null(); break; } +} - int64_t rowid() const override { return static_cast(idx_); } -}; +int64_t CtreeInFuncIterator::rowid() const { return static_cast(idx_); } -// Call args iterator for single function -class CallArgsInFuncIterator : public xsql::RowIterator { - std::vector args_; - size_t idx_ = 0; - bool started_ = false; +// --- CallArgsInFuncIterator --- -public: - explicit CallArgsInFuncIterator(ea_t func_addr) { - collect_call_args(args_, func_addr); - } +CallArgsInFuncIterator::CallArgsInFuncIterator(ea_t func_addr) { + collect_call_args(args_, func_addr); +} - bool next() override { - if (!started_) { - started_ = true; - if (args_.empty()) return false; - idx_ = 0; - return true; - } - if (idx_ + 1 < args_.size()) { ++idx_; return true; } - idx_ = args_.size(); - return false; +bool CallArgsInFuncIterator::next() { + if (!started_) { + started_ = true; + if (args_.empty()) return false; + idx_ = 0; + return true; } + if (idx_ + 1 < args_.size()) { ++idx_; return true; } + idx_ = args_.size(); + return false; +} - bool eof() const override { return started_ && idx_ >= args_.size(); } - - void column(xsql::FunctionContext& ctx, int col) override { - if (idx_ >= args_.size()) { ctx.result_null(); return; } - const auto& ai = args_[idx_]; - switch (col) { - case 0: ctx.result_int64(ai.func_addr); break; - case 1: ctx.result_int(ai.call_item_id); break; - case 2: ai.call_ea != BADADDR ? ctx.result_int64(ai.call_ea) : ctx.result_null(); break; - case 3: !ai.call_obj_name.empty() ? ctx.result_text(ai.call_obj_name.c_str()) : ctx.result_null(); break; - case 4: !ai.call_helper_name.empty() ? ctx.result_text(ai.call_helper_name.c_str()) : ctx.result_null(); break; - case 5: ctx.result_int(ai.arg_idx); break; - case 6: ai.arg_item_id >= 0 ? ctx.result_int(ai.arg_item_id) : ctx.result_null(); break; - case 7: ctx.result_text(ai.arg_op.c_str()); break; - case 8: ai.arg_var_idx >= 0 ? ctx.result_int(ai.arg_var_idx) : ctx.result_null(); break; - case 9: !ai.arg_var_name.empty() ? ctx.result_text(ai.arg_var_name.c_str()) : ctx.result_null(); break; - case 10: ai.arg_var_idx >= 0 ? ctx.result_int(ai.arg_var_is_stk ? 1 : 0) : ctx.result_null(); break; - case 11: ai.arg_var_idx >= 0 ? ctx.result_int(ai.arg_var_is_arg ? 1 : 0) : ctx.result_null(); break; - case 12: ai.arg_obj_ea != BADADDR ? ctx.result_int64(ai.arg_obj_ea) : ctx.result_null(); break; - case 13: !ai.arg_obj_name.empty() ? ctx.result_text(ai.arg_obj_name.c_str()) : ctx.result_null(); break; - case 14: ai.arg_op == "cot_num" ? ctx.result_int64(ai.arg_num_value) : ctx.result_null(); break; - case 15: !ai.arg_str_value.empty() ? ctx.result_text(ai.arg_str_value.c_str()) : ctx.result_null(); break; - } +bool CallArgsInFuncIterator::eof() const { return started_ && idx_ >= args_.size(); } + +void CallArgsInFuncIterator::column(xsql::FunctionContext& ctx, int col) { + if (idx_ >= args_.size()) { ctx.result_null(); return; } + const auto& ai = args_[idx_]; + switch (col) { + case 0: ctx.result_int64(ai.func_addr); break; + case 1: ctx.result_int(ai.call_item_id); break; + case 2: ai.call_ea != BADADDR ? ctx.result_int64(ai.call_ea) : ctx.result_null(); break; + case 3: !ai.call_obj_name.empty() ? ctx.result_text(ai.call_obj_name.c_str()) : ctx.result_null(); break; + case 4: !ai.call_helper_name.empty() ? ctx.result_text(ai.call_helper_name.c_str()) : ctx.result_null(); break; + case 5: ctx.result_int(ai.arg_idx); break; + case 6: ai.arg_item_id >= 0 ? ctx.result_int(ai.arg_item_id) : ctx.result_null(); break; + case 7: ctx.result_text(ai.arg_op.c_str()); break; + case 8: ai.arg_var_idx >= 0 ? ctx.result_int(ai.arg_var_idx) : ctx.result_null(); break; + case 9: !ai.arg_var_name.empty() ? ctx.result_text(ai.arg_var_name.c_str()) : ctx.result_null(); break; + case 10: ai.arg_var_idx >= 0 ? ctx.result_int(ai.arg_var_is_stk ? 1 : 0) : ctx.result_null(); break; + case 11: ai.arg_var_idx >= 0 ? ctx.result_int(ai.arg_var_is_arg ? 1 : 0) : ctx.result_null(); break; + case 12: ai.arg_obj_ea != BADADDR ? ctx.result_int64(ai.arg_obj_ea) : ctx.result_null(); break; + case 13: !ai.arg_obj_name.empty() ? ctx.result_text(ai.arg_obj_name.c_str()) : ctx.result_null(); break; + case 14: ai.arg_op == "cot_num" ? ctx.result_int64(ai.arg_num_value) : ctx.result_null(); break; + case 15: !ai.arg_str_value.empty() ? ctx.result_text(ai.arg_str_value.c_str()) : ctx.result_null(); break; } +} - int64_t rowid() const override { return static_cast(idx_); } -}; +int64_t CallArgsInFuncIterator::rowid() const { return static_cast(idx_); } // ============================================================================ // Generators for full scans (lazy, one function at a time) // ============================================================================ -class CtreeGenerator : public xsql::Generator { - size_t func_idx_ = 0; - std::vector items_; - size_t idx_ = 0; - int64_t rowid_ = -1; - bool started_ = false; +// --- CtreeGenerator --- - bool load_next_func() { - if (!hexrays_available()) return false; - - size_t func_qty = get_func_qty(); - while (func_idx_ < func_qty) { - func_t* f = getn_func(func_idx_++); - if (!f) continue; - - if (collect_ctree(items_, f->start_ea) && !items_.empty()) { - idx_ = 0; - return true; - } - } - return false; - } +bool CtreeGenerator::load_next_func() { + if (!hexrays_available()) return false; -public: - bool next() override { - if (!started_) { - started_ = true; - if (!load_next_func()) return false; - rowid_ = 0; - return true; - } + size_t func_qty = get_func_qty(); + while (func_idx_ < func_qty) { + func_t* f = getn_func(func_idx_++); + if (!f) continue; - if (idx_ + 1 < items_.size()) { - ++idx_; - ++rowid_; + if (collect_ctree(items_, f->start_ea) && !items_.empty()) { + idx_ = 0; return true; } + } + return false; +} +bool CtreeGenerator::next() { + if (!started_) { + started_ = true; if (!load_next_func()) return false; - ++rowid_; + rowid_ = 0; return true; } - const CtreeItem& current() const override { return items_[idx_]; } + if (idx_ + 1 < items_.size()) { + ++idx_; + ++rowid_; + return true; + } - int64_t rowid() const override { return rowid_; } -}; + if (!load_next_func()) return false; + ++rowid_; + return true; +} -class CallArgsGenerator : public xsql::Generator { - size_t func_idx_ = 0; - std::vector args_; - size_t idx_ = 0; - int64_t rowid_ = -1; - bool started_ = false; +const CtreeItem& CtreeGenerator::current() const { return items_[idx_]; } - bool load_next_func() { - if (!hexrays_available()) return false; +int64_t CtreeGenerator::rowid() const { return rowid_; } - size_t func_qty = get_func_qty(); - while (func_idx_ < func_qty) { - func_t* f = getn_func(func_idx_++); - if (!f) continue; +// --- CallArgsGenerator --- - if (collect_call_args(args_, f->start_ea) && !args_.empty()) { - idx_ = 0; - return true; - } - } - return false; - } +bool CallArgsGenerator::load_next_func() { + if (!hexrays_available()) return false; -public: - bool next() override { - if (!started_) { - started_ = true; - if (!load_next_func()) return false; - rowid_ = 0; - return true; - } + size_t func_qty = get_func_qty(); + while (func_idx_ < func_qty) { + func_t* f = getn_func(func_idx_++); + if (!f) continue; - if (idx_ + 1 < args_.size()) { - ++idx_; - ++rowid_; + if (collect_call_args(args_, f->start_ea) && !args_.empty()) { + idx_ = 0; return true; } + } + return false; +} +bool CallArgsGenerator::next() { + if (!started_) { + started_ = true; if (!load_next_func()) return false; + rowid_ = 0; + return true; + } + + if (idx_ + 1 < args_.size()) { + ++idx_; ++rowid_; return true; } - const CallArgInfo& current() const override { return args_[idx_]; } + if (!load_next_func()) return false; + ++rowid_; + return true; +} + +const CallArgInfo& CallArgsGenerator::current() const { return args_[idx_]; } - int64_t rowid() const override { return rowid_; } -}; +int64_t CallArgsGenerator::rowid() const { return rowid_; } // ============================================================================ -// Table Definitions +// Comment / Union Helpers // ============================================================================ -// Helper: Set or delete a decompiler comment at an ea within a function -inline bool set_decompiler_comment(ea_t func_addr, ea_t target_ea, const char* comment, item_preciser_t itp = ITP_SEMI) { +bool set_decompiler_comment(ea_t func_addr, ea_t target_ea, const char* comment, item_preciser_t itp) { if (!hexrays_available()) return false; if (target_ea == BADADDR || target_ea == 0) return false; @@ -1209,7 +1106,7 @@ inline bool set_decompiler_comment(ea_t func_addr, ea_t target_ea, const char* c return true; } -inline bool clear_decompiler_comment_all_placements(ea_t func_addr, ea_t target_ea) { +bool clear_decompiler_comment_all_placements(ea_t func_addr, ea_t target_ea) { if (!hexrays_available()) return false; if (target_ea == BADADDR || target_ea == 0) return false; @@ -1238,8 +1135,7 @@ inline bool clear_decompiler_comment_all_placements(ea_t func_addr, ea_t target_ return true; } -// Resolve an EA for a ctree item within a function. -inline bool get_ctree_item_ea(ea_t func_addr, int item_id, ea_t& out_ea) { +bool get_ctree_item_ea(ea_t func_addr, int item_id, ea_t& out_ea) { out_ea = BADADDR; if (!hexrays_available()) return false; if (item_id < 0) return false; @@ -1255,8 +1151,7 @@ inline bool get_ctree_item_ea(ea_t func_addr, int item_id, ea_t& out_ea) { return false; } -// Persist user union selection path for an EA. Empty path clears selection. -inline bool set_union_selection_at_ea(ea_t func_addr, ea_t target_ea, const intvec_t& path) { +bool set_union_selection_at_ea(ea_t func_addr, ea_t target_ea, const intvec_t& path) { if (!hexrays_available()) return false; if (target_ea == BADADDR || target_ea == 0) return false; @@ -1285,8 +1180,7 @@ inline bool set_union_selection_at_ea(ea_t func_addr, ea_t target_ea, const intv return true; } -// Persist user union selection path by ctree item id. -inline bool set_union_selection_at_item(ea_t func_addr, int item_id, const intvec_t& path) { +bool set_union_selection_at_item(ea_t func_addr, int item_id, const intvec_t& path) { ea_t target_ea = BADADDR; if (!get_ctree_item_ea(func_addr, item_id, target_ea)) { return false; @@ -1294,8 +1188,7 @@ inline bool set_union_selection_at_item(ea_t func_addr, int item_id, const intve return set_union_selection_at_ea(func_addr, target_ea, path); } -// Read user union selection path for an EA. Returns false when not found. -inline bool get_union_selection_at_ea(ea_t func_addr, ea_t target_ea, intvec_t& out_path) { +bool get_union_selection_at_ea(ea_t func_addr, ea_t target_ea, intvec_t& out_path) { out_path.clear(); if (!hexrays_available()) return false; if (target_ea == BADADDR || target_ea == 0) return false; @@ -1312,80 +1205,11 @@ inline bool get_union_selection_at_ea(ea_t func_addr, ea_t target_ea, intvec_t& return found; } -inline CachedTableDef define_pseudocode() { - return cached_table("pseudocode") - .no_shared_cache() - .estimate_rows([]() -> size_t { return get_func_qty() * 20; }) - .cache_builder([](std::vector& cache) { - collect_all_pseudocode(cache); - }) - .row_populator([](PseudocodeLine& row, int argc, xsql::FunctionArg* argv) { - // argv[2]=func_addr, argv[3]=line_num, argv[4]=line, argv[5]=ea, argv[6]=comment, argv[7]=comment_placement - if (argc > 2) row.func_addr = static_cast(argv[2].as_int64()); - if (argc > 3) row.line_num = argv[3].as_int(); - if (argc > 5) row.ea = static_cast(argv[5].as_int64()); - if (argc > 7 && !argv[7].is_null()) { - const char* p = argv[7].as_c_str(); - row.comment_placement = name_to_itp(p); - } - }) - .column_int64("func_addr", [](const PseudocodeLine& r) -> int64_t { return r.func_addr; }) - .column_int("line_num", [](const PseudocodeLine& r) -> int { return r.line_num; }) - .column_text("line", [](const PseudocodeLine& r) -> std::string { return r.text; }) - .column_int64("ea", [](const PseudocodeLine& r) -> int64_t { - return r.ea != BADADDR ? r.ea : 0; - }) - .column_text_rw("comment", - [](const PseudocodeLine& r) -> std::string { return r.comment; }, - [](PseudocodeLine& row, xsql::FunctionArg val) -> bool { - const char* text = nullptr; - if (!val.is_null()) { - text = val.as_c_str(); - } - const bool is_clear = (text == nullptr || text[0] == '\0'); - if (row.ea == BADADDR || row.ea == 0) { - // Non-addressable lines (signature/blank/comment-only) cannot hold - // user comments in Hex-Rays. Allow clear/no-op updates so bulk - // cleanup queries do not fail the full statement. - if (is_clear) { - row.comment.clear(); - return true; - } - return false; - } - bool ok = false; - if (is_clear) { - ok = clear_decompiler_comment_all_placements(row.func_addr, row.ea); - } else { - ok = set_decompiler_comment(row.func_addr, row.ea, text, row.comment_placement); - } - if (ok) { - row.comment = text ? text : ""; - } - return ok; - }) - .column_text_rw("comment_placement", - [](const PseudocodeLine& r) -> std::string { return itp_to_name(r.comment_placement); }, - [](PseudocodeLine& row, xsql::FunctionArg val) -> bool { - if (val.is_null()) return false; - const char* name = val.as_c_str(); - row.comment_placement = name_to_itp(name); - return true; // just sets the field, actual comment write happens in comment setter - }) - .filter_eq("func_addr", [](int64_t func_addr) -> std::unique_ptr { - return std::make_unique(static_cast(func_addr)); - }, 50.0) - .filter_eq("ea", [](int64_t ea) -> std::unique_ptr { - return std::make_unique(static_cast(ea)); - }, 20.0, 5.0) - .filter_eq("line_num", [](int64_t line_num) -> std::unique_ptr { - return std::make_unique(static_cast(line_num)); - }, 200.0, 100.0) - .build(); -} +// ============================================================================ +// Lvar Helpers +// ============================================================================ -// Snapshot one lvar from a function by index. -inline bool get_lvar_snapshot(ea_t func_addr, int lvar_idx, LvarInfo& out) { +bool get_lvar_snapshot(ea_t func_addr, int lvar_idx, LvarInfo& out) { if (!hexrays_available()) return false; if (lvar_idx < 0) return false; @@ -1397,8 +1221,7 @@ inline bool get_lvar_snapshot(ea_t func_addr, int lvar_idx, LvarInfo& out) { return true; } -// Helper: Rename lvar by func_addr and lvar index with explicit readback validation. -inline LvarRenameResult rename_lvar_at_ex(ea_t func_addr, int lvar_idx, const char* new_name) { +LvarRenameResult rename_lvar_at_ex(ea_t func_addr, int lvar_idx, const char* new_name) { LvarRenameResult result; result.func_addr = func_addr; result.lvar_idx = lvar_idx; @@ -1489,15 +1312,12 @@ inline LvarRenameResult rename_lvar_at_ex(ea_t func_addr, int lvar_idx, const ch return result; } -// Helper: Rename lvar by idx preserving legacy bool return. -// Returns true on a successful mutation or a no-op unchanged rename. -inline bool rename_lvar_at(ea_t func_addr, int lvar_idx, const char* new_name) { +bool rename_lvar_at(ea_t func_addr, int lvar_idx, const char* new_name) { LvarRenameResult r = rename_lvar_at_ex(func_addr, lvar_idx, new_name); return r.success && (r.applied || r.reason == "unchanged"); } -// Helper: Rename lvar by old name (exact match). -inline LvarRenameResult rename_lvar_by_name_ex(ea_t func_addr, const char* old_name, const char* new_name) { +LvarRenameResult rename_lvar_by_name_ex(ea_t func_addr, const char* old_name, const char* new_name) { LvarRenameResult result; result.func_addr = func_addr; result.lvar_idx = -1; @@ -1547,8 +1367,134 @@ inline LvarRenameResult rename_lvar_by_name_ex(ea_t func_addr, const char* old_n return result; } -// Helper: Set lvar type by func_addr and lvar index -inline bool set_lvar_type_at(ea_t func_addr, int lvar_idx, const char* type_str) { +bool get_label_snapshot(ea_t func_addr, int label_num, CtreeLabelInfo& out) { + if (!hexrays_available()) return false; + if (label_num < 0) return false; + + std::vector labels; + if (!collect_ctree_labels(labels, func_addr)) return false; + for (const auto& label : labels) { + if (label.label_num != label_num) continue; + out = label; + return true; + } + return false; +} + +LabelRenameResult rename_label_ex(ea_t func_addr, int label_num, const char* new_name) { + LabelRenameResult result; + result.func_addr = func_addr; + result.label_num = label_num; + result.requested_name = new_name ? new_name : ""; + + if (!hexrays_available()) { + result.success = false; + result.reason = "hexrays_unavailable"; + return result; + } + if (label_num < 0) { + result.success = true; + result.reason = "invalid_label"; + return result; + } + if (!new_name || !new_name[0]) { + result.success = true; + result.reason = "invalid_name"; + return result; + } + + CtreeLabelInfo before{}; + if (!get_label_snapshot(func_addr, label_num, before)) { + result.success = true; + result.reason = "not_found"; + return result; + } + result.before_name = before.name; + + if (before.name == new_name) { + result.success = true; + result.applied = false; + result.after_name = before.name; + result.reason = "unchanged"; + return result; + } + + func_t* f = get_func(func_addr); + if (!f) { + result.success = false; + result.reason = "function_not_found"; + return result; + } + + hexrays_failure_t hf; + cfuncptr_t cfunc = decompile(f, &hf); + if (!cfunc) { + result.success = false; + result.reason = "decompile_failed"; + return result; + } + + citem_t* label_item = cfunc->find_label(label_num); + if (label_item == nullptr) { + result.success = true; + result.reason = "not_found"; + return result; + } + + user_labels_t* labels = restore_user_labels(func_addr, &*cfunc); + if (labels == nullptr) { + labels = user_labels_new(); + } + if (labels == nullptr) { + result.success = false; + result.reason = "alloc_failed"; + return result; + } + + const std::string default_name = default_label_name(label_num); + const bool clear_user_override = (default_name == new_name); + auto it = user_labels_find(labels, label_num); + if (clear_user_override) { + if (it != user_labels_end(labels)) { + user_labels_erase(labels, it); + } + } else if (it == user_labels_end(labels)) { + user_labels_insert(labels, label_num, qstring(new_name)); + } else { + user_labels_second(it) = new_name; + } + + save_user_labels(func_addr, labels, &*cfunc); + user_labels_free(labels); + + invalidate_decompiler_cache(func_addr); + + CtreeLabelInfo after{}; + if (!get_label_snapshot(func_addr, label_num, after)) { + result.success = false; + result.reason = "post_verify_failed"; + return result; + } + + result.success = true; + result.after_name = after.name; + if (result.after_name == new_name) { + result.applied = true; + return result; + } + + result.applied = false; + result.reason = "not_applied"; + result.warnings.push_back("rename request did not match post-refresh label name"); + return result; +} + +bool rename_label(ea_t func_addr, int label_num, const char* new_name) { + LabelRenameResult r = rename_label_ex(func_addr, label_num, new_name); + return r.success && (r.applied || r.reason == "unchanged"); +} + +bool set_lvar_type_at(ea_t func_addr, int lvar_idx, const char* type_str) { if (!hexrays_available()) return false; @@ -1589,8 +1535,7 @@ inline bool set_lvar_type_at(ea_t func_addr, int lvar_idx, const char* type_str) return ok; } -// Helper: Set lvar comment by func_addr and lvar index -inline bool set_lvar_comment_at(ea_t func_addr, int lvar_idx, const char* comment) { +bool set_lvar_comment_at(ea_t func_addr, int lvar_idx, const char* comment) { if (!hexrays_available()) return false; @@ -1619,7 +1564,83 @@ inline bool set_lvar_comment_at(ea_t func_addr, int lvar_idx, const char* commen return ok; } -inline CachedTableDef define_ctree_lvars() { +// ============================================================================ +// Table Definitions +// ============================================================================ + +CachedTableDef define_pseudocode() { + return cached_table("pseudocode") + .no_shared_cache() + .estimate_rows([]() -> size_t { return get_func_qty() * 20; }) + .cache_builder([](std::vector& cache) { + collect_all_pseudocode(cache); + }) + .row_populator([](PseudocodeLine& row, int argc, xsql::FunctionArg* argv) { + // argv[2]=func_addr, argv[3]=line_num, argv[4]=line, argv[5]=ea, argv[6]=comment, argv[7]=comment_placement + if (argc > 2) row.func_addr = static_cast(argv[2].as_int64()); + if (argc > 3) row.line_num = argv[3].as_int(); + if (argc > 5) row.ea = static_cast(argv[5].as_int64()); + if (argc > 7 && !argv[7].is_null()) { + const char* p = argv[7].as_c_str(); + row.comment_placement = name_to_itp(p); + } + }) + .column_int64("func_addr", [](const PseudocodeLine& r) -> int64_t { return r.func_addr; }) + .column_int("line_num", [](const PseudocodeLine& r) -> int { return r.line_num; }) + .column_text("line", [](const PseudocodeLine& r) -> std::string { return r.text; }) + .column_int64("ea", [](const PseudocodeLine& r) -> int64_t { + return r.ea != BADADDR ? r.ea : 0; + }) + .column_text_rw("comment", + [](const PseudocodeLine& r) -> std::string { return r.comment; }, + [](PseudocodeLine& row, xsql::FunctionArg val) -> bool { + const char* text = nullptr; + if (!val.is_null()) { + text = val.as_c_str(); + } + const bool is_clear = (text == nullptr || text[0] == '\0'); + if (row.ea == BADADDR || row.ea == 0) { + // Non-addressable lines (signature/blank/comment-only) cannot hold + // user comments in Hex-Rays. Allow clear/no-op updates so bulk + // cleanup queries do not fail the full statement. + if (is_clear) { + row.comment.clear(); + return true; + } + return false; + } + bool ok = false; + if (is_clear) { + ok = clear_decompiler_comment_all_placements(row.func_addr, row.ea); + } else { + ok = set_decompiler_comment(row.func_addr, row.ea, text, row.comment_placement); + } + if (ok) { + row.comment = text ? text : ""; + } + return ok; + }) + .column_text_rw("comment_placement", + [](const PseudocodeLine& r) -> std::string { return itp_to_name(r.comment_placement); }, + [](PseudocodeLine& row, xsql::FunctionArg val) -> bool { + if (val.is_null()) return false; + const char* name = val.as_c_str(); + row.comment_placement = name_to_itp(name); + return true; // just sets the field, actual comment write happens in comment setter + }) + .filter_eq("func_addr", [](int64_t func_addr) -> std::unique_ptr { + return std::make_unique(static_cast(func_addr)); + }, 50.0) + .filter_eq("ea", [](int64_t ea) -> std::unique_ptr { + return std::make_unique(static_cast(ea)); + }, 20.0, 5.0) + .filter_eq("line_num", [](int64_t line_num) -> std::unique_ptr { + return std::make_unique(static_cast(line_num)); + }, 200.0, 100.0) + .build(); +} + +CachedTableDef define_ctree_lvars() { return cached_table("ctree_lvars") .no_shared_cache() .estimate_rows([]() -> size_t { return get_func_qty() * 20; }) @@ -1685,7 +1706,48 @@ inline CachedTableDef define_ctree_lvars() { .build(); } -inline GeneratorTableDef define_ctree() { +CachedTableDef define_ctree_labels() { + return cached_table("ctree_labels") + .no_shared_cache() + .estimate_rows([]() -> size_t { return get_func_qty() * 8; }) + .cache_builder([](std::vector& rows) { + collect_all_ctree_labels(rows); + }) + .row_lookup([](CtreeLabelInfo& row, int64_t raw_rowid) -> bool { + if (raw_rowid < 0) return false; + std::vector rows; + collect_all_ctree_labels(rows); + const size_t idx = static_cast(raw_rowid); + if (idx >= rows.size()) return false; + row = rows[idx]; + return true; + }) + .column_int64("func_addr", [](const CtreeLabelInfo& row) -> int64_t { return row.func_addr; }) + .column_int("label_num", [](const CtreeLabelInfo& row) -> int { return row.label_num; }) + .column_text_rw("name", + [](const CtreeLabelInfo& row) -> std::string { + return row.name; + }, + [](CtreeLabelInfo& row, const char* new_name) -> bool { + bool ok = rename_label(row.func_addr, row.label_num, new_name); + if (!ok) return false; + row.name = new_name ? new_name : ""; + row.is_user_defined = row.name != default_label_name(row.label_num); + return true; + }) + .column_int("item_id", [](const CtreeLabelInfo& row) -> int { + return row.item_id; + }) + .column_int64("item_ea", [](const CtreeLabelInfo& row) -> int64_t { + return row.item_ea != BADADDR ? static_cast(row.item_ea) : 0; + }) + .column_int("is_user_defined", [](const CtreeLabelInfo& row) -> int { + return row.is_user_defined ? 1 : 0; + }) + .build(); +} + +GeneratorTableDef define_ctree() { return generator_table("ctree") // Cheap estimate for query planning (doesn't decompile) .estimate_rows([]() -> size_t { @@ -1724,13 +1786,15 @@ inline GeneratorTableDef define_ctree() { .column_int("var_is_reg", [](const CtreeItem& r) -> int { return r.var_is_reg ? 1 : 0; }) .column_int("var_is_arg", [](const CtreeItem& r) -> int { return r.var_is_arg ? 1 : 0; }) .column_text("obj_name", [](const CtreeItem& r) -> std::string { return r.obj_name; }) + .column_int("label_num", [](const CtreeItem& r) -> int { return r.label_num; }) + .column_int("goto_label_num", [](const CtreeItem& r) -> int { return r.goto_label_num; }) .filter_eq("func_addr", [](int64_t func_addr) -> std::unique_ptr { return std::make_unique(static_cast(func_addr)); }, 100.0, 100.0) .build(); } -inline GeneratorTableDef define_ctree_call_args() { +GeneratorTableDef define_ctree_call_args() { return generator_table("ctree_call_args") // Cheap estimate for query planning .estimate_rows([]() -> size_t { @@ -1769,7 +1833,7 @@ inline GeneratorTableDef define_ctree_call_args() { // Views Registration // ============================================================================ -inline bool register_ctree_views(xsql::Database& db) { +bool register_ctree_views(xsql::Database& db) { const char* v_calls = R"( CREATE VIEW IF NOT EXISTS ctree_v_calls AS @@ -1977,47 +2041,41 @@ inline bool register_ctree_views(xsql::Database& db) { // Registry // ============================================================================ -struct DecompilerRegistry { - // Cached tables (query-scoped cache, write support) - CachedTableDef pseudocode; - CachedTableDef ctree_lvars; - // Generator tables (lazy full scans) - GeneratorTableDef ctree; - GeneratorTableDef ctree_call_args; - - DecompilerRegistry() - : pseudocode(define_pseudocode()) - , ctree_lvars(define_ctree_lvars()) - , ctree(define_ctree()) - , ctree_call_args(define_ctree_call_args()) - {} - - void register_all(xsql::Database& db) { - // Initialize Hex-Rays decompiler ONCE at startup - // If unavailable, skip registering decompiler tables entirely - if (!init_hexrays()) { - // Hex-Rays not available - don't register decompiler tables - return; - } +DecompilerRegistry::DecompilerRegistry() + : pseudocode(define_pseudocode()) + , ctree_lvars(define_ctree_lvars()) + , ctree_labels(define_ctree_labels()) + , ctree(define_ctree()) + , ctree_call_args(define_ctree_call_args()) +{} + +void DecompilerRegistry::register_all(xsql::Database& db) { + // Initialize Hex-Rays decompiler ONCE at startup + // If unavailable, skip registering decompiler tables entirely + if (!init_hexrays()) { + // Hex-Rays not available - don't register decompiler tables + return; + } - // Cached table (query-scoped cache, freed when no cursors reference it) - db.register_cached_table("ida_pseudocode", &pseudocode); - db.create_table("pseudocode", "ida_pseudocode"); + // Cached table (query-scoped cache, freed when no cursors reference it) + db.register_cached_table("ida_pseudocode", &pseudocode); + db.create_table("pseudocode", "ida_pseudocode"); - db.register_cached_table("ida_ctree_lvars", &ctree_lvars); - db.create_table("ctree_lvars", "ida_ctree_lvars"); + db.register_cached_table("ida_ctree_lvars", &ctree_lvars); + db.create_table("ctree_lvars", "ida_ctree_lvars"); - // Generator tables (lazy full scans, stop work early with LIMIT) - db.register_generator_table("ida_ctree", &ctree); - db.create_table("ctree", "ida_ctree"); + db.register_cached_table("ida_ctree_labels", &ctree_labels); + db.create_table("ctree_labels", "ida_ctree_labels"); - db.register_generator_table("ida_ctree_call_args", &ctree_call_args); - db.create_table("ctree_call_args", "ida_ctree_call_args"); + // Generator tables (lazy full scans, stop work early with LIMIT) + db.register_generator_table("ida_ctree", &ctree); + db.create_table("ctree", "ida_ctree"); - register_ctree_views(db); - } -}; + db.register_generator_table("ida_ctree_call_args", &ctree_call_args); + db.create_table("ctree_call_args", "ida_ctree_call_args"); + + register_ctree_views(db); +} } // namespace decompiler } // namespace idasql - diff --git a/src/lib/src/decompiler.hpp b/src/lib/src/decompiler.hpp new file mode 100644 index 0000000..92a3ecb --- /dev/null +++ b/src/lib/src/decompiler.hpp @@ -0,0 +1,479 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +/** + * decompiler.hpp - Hex-Rays decompiler virtual tables (pseudocode, ctree, lvars, call args) + * + * Provides SQLite virtual tables for accessing decompiled function data: + * pseudocode - Decompiled function pseudocode lines + * ctree_lvars - Local variables from decompiled functions + * ctree_labels - User/default labels for decompiled control flow + * ctree - Full AST (expressions and statements) + * ctree_call_args - Flattened call arguments + * + * All tables support constraint pushdown on func_addr via filter_eq framework: + * SELECT * FROM pseudocode WHERE func_addr = 0x401000; + * SELECT * FROM ctree_lvars WHERE func_addr = 0x401000; + * + * Requires Hex-Rays decompiler license. + */ + +#pragma once + +#include + +#include +#include + +#include +#include +#include + +#include "ida_headers.hpp" + +namespace idasql { +namespace decompiler { + +// ============================================================================ +// Decompiler Initialization +// ============================================================================ + +// Global flag tracking if Hex-Rays is available. +// Set once during DecompilerRegistry::register_all(). +// Must remain inline with static bool -- called from multiple TUs +// (functions.hpp, database.hpp, decompiler.cpp). +inline bool& hexrays_available() { + static bool available = false; + return available; +} + +// Initialize Hex-Rays decompiler - call ONCE at startup. +// Returns true if decompiler is available. +bool init_hexrays(); + +// Invalidate decompiler cache for the function containing ea. +// Safe to call even if Hex-Rays is unavailable or ea is not in a function. +void invalidate_decompiler_cache(ea_t ea); + +// ============================================================================ +// Data Structures +// ============================================================================ + +// ITP name / enum helpers +const char* itp_to_name(item_preciser_t itp); +item_preciser_t name_to_itp(const char* name); + +// Pseudocode line data +struct PseudocodeLine { + ea_t func_addr; + int line_num; + std::string text; + ea_t ea; // Associated address (from COLOR_ADDR anchor) + std::string comment; // User comment at this ea (from restore_user_cmts) + item_preciser_t comment_placement = ITP_SEMI; // Comment placement type +}; + +// Local variable data +struct LvarInfo { + ea_t func_addr; + int idx; + std::string name; + std::string type; + std::string comment; + int size; + bool is_arg; + bool is_result; + bool is_stk_var; + bool is_reg_var; + sval_t stkoff; + mreg_t mreg; +}; + +// Local variable rename result with explicit post-apply observability. +struct LvarRenameResult { + bool success = false; // Operation executed without internal API failure + bool applied = false; // Observed name changed to requested target + ea_t func_addr = BADADDR; + int lvar_idx = -1; + std::string target_name; // Original name selector (for by-name API) + std::string requested_name; // Requested new name + std::string before_name; // Name before mutation + std::string after_name; // Name after mutation/readback + std::string reason; // not_found, ambiguous_name, unchanged, not_nameable, etc. + std::vector warnings; +}; + +// Ctree item data +struct CtreeItem { + ea_t func_addr; + int item_id; + bool is_expr; + int op; + std::string op_name; + ea_t ea; + int parent_id; + int depth; + int x_id, y_id, z_id; + int cond_id, then_id, else_id; + int body_id, init_id, step_id; + int var_idx; + ea_t obj_ea; + int64_t num_value; + std::string str_value; + std::string helper_name; + int member_offset; + std::string var_name; + bool var_is_stk, var_is_reg, var_is_arg; + std::string obj_name; + int label_num; + int goto_label_num; + + CtreeItem() : func_addr(0), item_id(-1), is_expr(false), op(0), ea(BADADDR), + parent_id(-1), depth(0), + x_id(-1), y_id(-1), z_id(-1), + cond_id(-1), then_id(-1), else_id(-1), + body_id(-1), init_id(-1), step_id(-1), + var_idx(-1), obj_ea(BADADDR), num_value(0), member_offset(0), + var_is_stk(false), var_is_reg(false), var_is_arg(false), + label_num(-1), goto_label_num(-1) {} +}; + +// Ctree label data +struct CtreeLabelInfo { + ea_t func_addr; + int label_num; + std::string name; + int item_id; + ea_t item_ea; + bool is_user_defined; + + CtreeLabelInfo() : func_addr(0), label_num(-1), item_id(-1), item_ea(BADADDR), + is_user_defined(false) {} +}; + +// Label rename result with explicit post-apply observability. +struct LabelRenameResult { + bool success = false; + bool applied = false; + ea_t func_addr = BADADDR; + int label_num = -1; + std::string requested_name; + std::string before_name; + std::string after_name; + std::string reason; + std::vector warnings; +}; + +// Call argument data +struct CallArgInfo { + ea_t func_addr; + int call_item_id; + ea_t call_ea; + std::string call_obj_name; + std::string call_helper_name; + int arg_idx; + int arg_item_id; + std::string arg_op; + int arg_var_idx; + std::string arg_var_name; + bool arg_var_is_stk; + bool arg_var_is_arg; + ea_t arg_obj_ea; + std::string arg_obj_name; + int64_t arg_num_value; + std::string arg_str_value; + + CallArgInfo() : func_addr(0), call_item_id(-1), call_ea(BADADDR), arg_idx(-1), arg_item_id(-1), + arg_var_idx(-1), arg_var_is_stk(false), arg_var_is_arg(false), + arg_obj_ea(BADADDR), arg_num_value(0) {} +}; + +// ============================================================================ +// Helper Functions +// ============================================================================ + +// Get full ctype name with cot_/cit_ prefix +std::string get_full_ctype_name(ctype_t op); + +// Extract the first COLOR_ADDR anchor ea from a raw pseudocode line. +// Returns BADADDR if no anchor found. +ea_t extract_line_ea(cfunc_t* cfunc, const qstring& raw_line); + +// ============================================================================ +// Collect Functions +// ============================================================================ + +// Collect pseudocode for a single function +bool collect_pseudocode(std::vector& lines, ea_t func_addr); + +// Collect pseudocode for all functions +void collect_all_pseudocode(std::vector& lines); + +// Collect lvars for a single function +bool collect_lvars(std::vector& vars, ea_t func_addr); + +// Collect lvars for all functions +void collect_all_lvars(std::vector& vars); + +// Collect ctree items for a single function +bool collect_ctree(std::vector& items, ea_t func_addr); + +// Collect ctree for all functions +void collect_all_ctree(std::vector& items); + +// Collect ctree labels for a single function +bool collect_ctree_labels(std::vector& rows, ea_t func_addr); + +// Collect ctree labels for all functions +void collect_all_ctree_labels(std::vector& rows); + +// Collect call args for a single function +bool collect_call_args(std::vector& args, ea_t func_addr); + +// Collect call args for all functions +void collect_all_call_args(std::vector& args); + +// ============================================================================ +// Collector Visitors +// ============================================================================ + +// Ctree collector visitor +struct ctree_collector_t : public ctree_parentee_t { + std::vector& items; + std::map item_ids; + cfunc_t* cfunc; + ea_t func_addr; + int next_id; + + ctree_collector_t(std::vector& items_, cfunc_t* cfunc_, ea_t func_addr_); + + int idaapi visit_insn(cinsn_t* insn) override; + int idaapi visit_expr(cexpr_t* expr) override; + void resolve_child_ids(); +}; + +// Call args collector visitor +struct call_args_collector_t : public ctree_parentee_t { + std::vector& args; + std::map item_ids; + cfunc_t* cfunc; + ea_t func_addr; + int next_id; + + call_args_collector_t(std::vector& args_, cfunc_t* cfunc_, ea_t func_addr_); + + int idaapi visit_insn(cinsn_t* insn) override; + int idaapi visit_expr(cexpr_t* expr) override; +}; + +// ============================================================================ +// Iterators for constraint pushdown +// ============================================================================ + +// Pseudocode iterator for single function +class PseudocodeInFuncIterator : public xsql::RowIterator { + std::vector lines_; + size_t idx_ = 0; + bool started_ = false; + +public: + explicit PseudocodeInFuncIterator(ea_t func_addr); + bool next() override; + bool eof() const override; + void column(xsql::FunctionContext& ctx, int col) override; + int64_t rowid() const override; +}; + +// Pseudocode iterator for a single mapped address +class PseudocodeAtEaIterator : public xsql::RowIterator { + std::vector lines_; + size_t idx_ = 0; + bool started_ = false; + +public: + explicit PseudocodeAtEaIterator(ea_t ea); + bool next() override; + bool eof() const override; + void column(xsql::FunctionContext& ctx, int col) override; + int64_t rowid() const override; +}; + +// Pseudocode iterator for line number across all functions +class PseudocodeLineNumIterator : public xsql::RowIterator { + std::vector lines_; + size_t idx_ = 0; + bool started_ = false; + +public: + explicit PseudocodeLineNumIterator(int line_num); + bool next() override; + bool eof() const override; + void column(xsql::FunctionContext& ctx, int col) override; + int64_t rowid() const override; +}; + +// Lvars iterator for single function +class LvarsInFuncIterator : public xsql::RowIterator { + std::vector vars_; + size_t idx_ = 0; + bool started_ = false; + +public: + explicit LvarsInFuncIterator(ea_t func_addr); + bool next() override; + bool eof() const override; + void column(xsql::FunctionContext& ctx, int col) override; + int64_t rowid() const override; +}; + +// Ctree iterator for single function +class CtreeInFuncIterator : public xsql::RowIterator { + std::vector items_; + size_t idx_ = 0; + bool started_ = false; + +public: + explicit CtreeInFuncIterator(ea_t func_addr); + bool next() override; + bool eof() const override; + void column(xsql::FunctionContext& ctx, int col) override; + int64_t rowid() const override; +}; + +// Call args iterator for single function +class CallArgsInFuncIterator : public xsql::RowIterator { + std::vector args_; + size_t idx_ = 0; + bool started_ = false; + +public: + explicit CallArgsInFuncIterator(ea_t func_addr); + bool next() override; + bool eof() const override; + void column(xsql::FunctionContext& ctx, int col) override; + int64_t rowid() const override; +}; + +// ============================================================================ +// Generators for full scans (lazy, one function at a time) +// ============================================================================ + +class CtreeGenerator : public xsql::Generator { + size_t func_idx_ = 0; + std::vector items_; + size_t idx_ = 0; + int64_t rowid_ = -1; + bool started_ = false; + + bool load_next_func(); + +public: + bool next() override; + const CtreeItem& current() const override; + int64_t rowid() const override; +}; + +class CallArgsGenerator : public xsql::Generator { + size_t func_idx_ = 0; + std::vector args_; + size_t idx_ = 0; + int64_t rowid_ = -1; + bool started_ = false; + + bool load_next_func(); + +public: + bool next() override; + const CallArgInfo& current() const override; + int64_t rowid() const override; +}; + +// ============================================================================ +// Comment / Union Helpers +// ============================================================================ + +// Set or delete a decompiler comment at an ea within a function +bool set_decompiler_comment(ea_t func_addr, ea_t target_ea, const char* comment, item_preciser_t itp = ITP_SEMI); + +// Clear any existing comment regardless of placement +bool clear_decompiler_comment_all_placements(ea_t func_addr, ea_t target_ea); + +// Resolve an EA for a ctree item within a function +bool get_ctree_item_ea(ea_t func_addr, int item_id, ea_t& out_ea); + +// Persist user union selection path for an EA. Empty path clears selection. +bool set_union_selection_at_ea(ea_t func_addr, ea_t target_ea, const intvec_t& path); + +// Persist user union selection path by ctree item id. +bool set_union_selection_at_item(ea_t func_addr, int item_id, const intvec_t& path); + +// Read user union selection path for an EA. Returns false when not found. +bool get_union_selection_at_ea(ea_t func_addr, ea_t target_ea, intvec_t& out_path); + +// ============================================================================ +// Lvar Helpers +// ============================================================================ + +// Snapshot one lvar from a function by index. +bool get_lvar_snapshot(ea_t func_addr, int lvar_idx, LvarInfo& out); + +// Rename lvar by func_addr and lvar index with explicit readback validation. +LvarRenameResult rename_lvar_at_ex(ea_t func_addr, int lvar_idx, const char* new_name); + +// Rename lvar by idx preserving legacy bool return. +// Returns true on a successful mutation or a no-op unchanged rename. +bool rename_lvar_at(ea_t func_addr, int lvar_idx, const char* new_name); + +// Rename lvar by old name (exact match). +LvarRenameResult rename_lvar_by_name_ex(ea_t func_addr, const char* old_name, const char* new_name); + +// Set lvar type by func_addr and lvar index +bool set_lvar_type_at(ea_t func_addr, int lvar_idx, const char* type_str); + +// Set lvar comment by func_addr and lvar index +bool set_lvar_comment_at(ea_t func_addr, int lvar_idx, const char* comment); + +// Snapshot one label from a function by label number. +bool get_label_snapshot(ea_t func_addr, int label_num, CtreeLabelInfo& out); + +// Rename label by func_addr and label number with explicit readback validation. +LabelRenameResult rename_label_ex(ea_t func_addr, int label_num, const char* new_name); + +// Rename label preserving legacy bool return semantics. +bool rename_label(ea_t func_addr, int label_num, const char* new_name); + +// ============================================================================ +// Table Definitions +// ============================================================================ + +CachedTableDef define_pseudocode(); +CachedTableDef define_ctree_lvars(); +CachedTableDef define_ctree_labels(); +GeneratorTableDef define_ctree(); +GeneratorTableDef define_ctree_call_args(); + +// ============================================================================ +// Views Registration +// ============================================================================ + +bool register_ctree_views(xsql::Database& db); + +// ============================================================================ +// Registry +// ============================================================================ + +struct DecompilerRegistry { + // Cached tables (query-scoped cache, write support) + CachedTableDef pseudocode; + CachedTableDef ctree_lvars; + CachedTableDef ctree_labels; + // Generator tables (lazy full scans) + GeneratorTableDef ctree; + GeneratorTableDef ctree_call_args; + + DecompilerRegistry(); + void register_all(xsql::Database& db); +}; + +} // namespace decompiler +} // namespace idasql diff --git a/src/lib/include/idasql/disassembly.hpp b/src/lib/src/disassembly.cpp similarity index 72% rename from src/lib/include/idasql/disassembly.hpp rename to src/lib/src/disassembly.cpp index 1d9b8e6..318e24d 100644 --- a/src/lib/include/idasql/disassembly.hpp +++ b/src/lib/src/disassembly.cpp @@ -1,71 +1,48 @@ -/** - * disassembly.hpp - Disassembly-level SQL tables - * - * Provides instruction-level analysis via SQLite virtual tables. - * Parallels the decompiler.hpp ctree tables but at the disassembly level. - * - * Tables: - * disasm_calls - All call instructions with callee info - * disasm_loops - Detected loops via back-edge analysis - * - * Views: - * disasm_v_leaf_funcs - Functions with no outgoing calls - * disasm_v_call_chains - Recursive call chain paths up to depth 10 - * disasm_v_calls_in_loops - Calls that occur inside detected loops - * disasm_v_funcs_with_loops - Functions that contain loops - * - * All tables support constraint pushdown on func_addr for efficient queries. - */ - -#pragma once - -#include - -#include -#include - -#include - -// IDA SDK headers -#include -#include -#include // decode_insn, insn_t, is_call_insn -#include // is_call_insn -#include // get_first_fcref_from -#include // get_name -#include // qflow_chart_t for CFG analysis - -#include -#include +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +#include "disassembly.hpp" namespace idasql { namespace disassembly { -// ============================================================================ -// Helper functions -// ============================================================================ - -inline std::string safe_name(ea_t ea) { +std::string safe_name(ea_t ea) { qstring name; get_name(&name, ea); return std::string(name.c_str()); } -// ============================================================================ -// DISASM_CALLS Table -// All call instructions across all functions -// ============================================================================ +void collect_loops_for_func(std::vector& loops, func_t* pfn) { + if (!pfn) return; -struct DisasmCallInfo { - ea_t func_addr; // Function containing this call - ea_t ea; // Address of call instruction - ea_t callee_addr; // Target of call (BADADDR if unknown) - std::string callee_name; -}; + qflow_chart_t fc; + fc.create("", pfn, pfn->start_ea, pfn->end_ea, FC_NOEXT); + + for (int i = 0; i < fc.size(); i++) { + const qbasic_block_t& block = fc.blocks[i]; + + for (int j = 0; j < fc.nsucc(i); j++) { + int succ_idx = fc.succ(i, j); + if (succ_idx < 0 || succ_idx >= fc.size()) continue; + + const qbasic_block_t& succ = fc.blocks[succ_idx]; + + if (succ.start_ea <= block.start_ea) { + LoopInfo li; + li.func_addr = pfn->start_ea; + li.loop_id = succ_idx; + li.header_ea = succ.start_ea; + li.header_end_ea = succ.end_ea; + li.back_edge_block_ea = block.start_ea; + li.back_edge_block_end = block.end_ea; + loops.push_back(li); + } + } + } +} // ============================================================================ -// DisasmCallsInFuncIterator - Constraint pushdown for func_addr = X -// Iterates calls in a single function without building the full cache +// DisasmCallsInFuncIterator // ============================================================================ class DisasmCallsInFuncIterator : public xsql::RowIterator { @@ -74,8 +51,6 @@ class DisasmCallsInFuncIterator : public xsql::RowIterator { func_item_iterator_t fii_; bool started_ = false; bool valid_ = false; - - // Current call info ea_t current_ea_ = BADADDR; ea_t callee_addr_ = BADADDR; std::string callee_name_; @@ -110,12 +85,10 @@ class DisasmCallsInFuncIterator : public xsql::RowIterator { if (!started_) { started_ = true; - // Initialize iterator and find first code item if (!fii_.set(pfn_)) { valid_ = false; return false; } - // Check if first item is a call ea_t ea = fii_.current(); insn_t insn; if (decode_insn(&insn, ea) > 0 && is_call_insn(insn)) { @@ -129,7 +102,6 @@ class DisasmCallsInFuncIterator : public xsql::RowIterator { valid_ = true; return true; } - // First item wasn't a call, find next valid_ = find_next_call(); return valid_; } @@ -144,22 +116,16 @@ class DisasmCallsInFuncIterator : public xsql::RowIterator { void column(xsql::FunctionContext& ctx, int col) override { switch (col) { - case 0: // func_addr - ctx.result_int64(static_cast(func_addr_)); - break; - case 1: // ea - ctx.result_int64(static_cast(current_ea_)); - break; - case 2: // callee_addr + case 0: ctx.result_int64(static_cast(func_addr_)); break; + case 1: ctx.result_int64(static_cast(current_ea_)); break; + case 2: if (callee_addr_ != BADADDR) { ctx.result_int64(static_cast(callee_addr_)); } else { ctx.result_int64(0); } break; - case 3: // callee_name - ctx.result_text(callee_name_.c_str()); - break; + case 3: ctx.result_text(callee_name_.c_str()); break; } } @@ -168,6 +134,10 @@ class DisasmCallsInFuncIterator : public xsql::RowIterator { } }; +// ============================================================================ +// DisasmCallsGenerator +// ============================================================================ + class DisasmCallsGenerator : public xsql::Generator { size_t func_idx_ = 0; func_t* pfn_ = nullptr; @@ -231,79 +201,13 @@ class DisasmCallsGenerator : public xsql::Generator { } const DisasmCallInfo& current() const override { return current_; } - int64_t rowid() const override { return static_cast(current_.ea); } }; -inline GeneratorTableDef define_disasm_calls() { - return generator_table("disasm_calls") - .estimate_rows([]() -> size_t { - // Heuristic: a few calls per function - return get_func_qty() * 5; - }) - .generator([]() -> std::unique_ptr> { - return std::make_unique(); - }) - .column_int64("func_addr", [](const DisasmCallInfo& r) -> int64_t { return r.func_addr; }) - .column_int64("ea", [](const DisasmCallInfo& r) -> int64_t { return r.ea; }) - .column_int64("callee_addr", [](const DisasmCallInfo& r) -> int64_t { - return r.callee_addr != BADADDR ? static_cast(r.callee_addr) : 0; - }) - .column_text("callee_name", [](const DisasmCallInfo& r) -> std::string { return r.callee_name; }) - // Constraint pushdown: func_addr = X bypasses full scan - .filter_eq("func_addr", [](int64_t func_addr) -> std::unique_ptr { - return std::make_unique(static_cast(func_addr)); - }, 10.0) // Low cost - only iterates one function - .build(); -} - // ============================================================================ -// DISASM_LOOPS Table -// Detected loops via back-edge analysis using qflow_chart_t +// LoopsInFuncIterator // ============================================================================ -struct LoopInfo { - ea_t func_addr; - int loop_id; // Unique ID (header block index) - ea_t header_ea; // Loop header start address - ea_t header_end_ea; // Loop header end address - ea_t back_edge_block_ea; // Block containing the back-edge jump - ea_t back_edge_block_end; // End of back-edge block -}; - -inline void collect_loops_for_func(std::vector& loops, func_t* pfn) { - if (!pfn) return; - - qflow_chart_t fc; - fc.create("", pfn, pfn->start_ea, pfn->end_ea, FC_NOEXT); - - for (int i = 0; i < fc.size(); i++) { - const qbasic_block_t& block = fc.blocks[i]; - - // Check each successor for back-edges - for (int j = 0; j < fc.nsucc(i); j++) { - int succ_idx = fc.succ(i, j); - if (succ_idx < 0 || succ_idx >= fc.size()) continue; - - const qbasic_block_t& succ = fc.blocks[succ_idx]; - - // Back-edge: successor starts at or before current block - // This indicates a loop where succ is the header - if (succ.start_ea <= block.start_ea) { - LoopInfo li; - li.func_addr = pfn->start_ea; - li.loop_id = succ_idx; // Use header block index as loop ID - li.header_ea = succ.start_ea; - li.header_end_ea = succ.end_ea; - li.back_edge_block_ea = block.start_ea; - li.back_edge_block_end = block.end_ea; - loops.push_back(li); - } - } - } -} - -// Iterator for loops in a single function (constraint pushdown) class LoopsInFuncIterator : public xsql::RowIterator { std::vector loops_; size_t idx_ = 0; @@ -347,6 +251,10 @@ class LoopsInFuncIterator : public xsql::RowIterator { int64_t rowid() const override { return static_cast(idx_); } }; +// ============================================================================ +// DisasmLoopsGenerator +// ============================================================================ + class DisasmLoopsGenerator : public xsql::Generator { size_t func_idx_ = 0; std::vector loops_; @@ -391,14 +299,36 @@ class DisasmLoopsGenerator : public xsql::Generator { } const LoopInfo& current() const override { return loops_[idx_]; } - int64_t rowid() const override { return rowid_; } }; -inline GeneratorTableDef define_disasm_loops() { +// ============================================================================ +// Table definitions +// ============================================================================ + +GeneratorTableDef define_disasm_calls() { + return generator_table("disasm_calls") + .estimate_rows([]() -> size_t { + return get_func_qty() * 5; + }) + .generator([]() -> std::unique_ptr> { + return std::make_unique(); + }) + .column_int64("func_addr", [](const DisasmCallInfo& r) -> int64_t { return r.func_addr; }) + .column_int64("ea", [](const DisasmCallInfo& r) -> int64_t { return r.ea; }) + .column_int64("callee_addr", [](const DisasmCallInfo& r) -> int64_t { + return r.callee_addr != BADADDR ? static_cast(r.callee_addr) : 0; + }) + .column_text("callee_name", [](const DisasmCallInfo& r) -> std::string { return r.callee_name; }) + .filter_eq("func_addr", [](int64_t func_addr) -> std::unique_ptr { + return std::make_unique(static_cast(func_addr)); + }, 10.0) + .build(); +} + +GeneratorTableDef define_disasm_loops() { return generator_table("disasm_loops") .estimate_rows([]() -> size_t { - // Heuristic: very few loops per function return get_func_qty() * 2; }) .generator([]() -> std::unique_ptr> { @@ -416,14 +346,7 @@ inline GeneratorTableDef define_disasm_loops() { .build(); } -// ============================================================================ -// View Registration -// ============================================================================ - -inline bool register_disasm_views(xsql::Database& db) { - - // disasm_v_leaf_funcs - Functions with no outgoing calls (terminal/leaf functions) - // Uses disasm_calls to detect calls at the disassembly level +bool register_disasm_views(xsql::Database& db) { const char* v_leaf_funcs = R"( CREATE VIEW IF NOT EXISTS disasm_v_leaf_funcs AS SELECT f.address, f.name @@ -434,19 +357,15 @@ inline bool register_disasm_views(xsql::Database& db) { )"; db.exec(v_leaf_funcs); - // disasm_v_call_chains - All call chain paths (root_func -> current_func at depth N) - // Enables queries like "find functions with call chains reaching depth 6" const char* v_call_chains = R"( CREATE VIEW IF NOT EXISTS disasm_v_call_chains AS WITH RECURSIVE call_chain(root_func, current_func, depth) AS ( - -- Base: direct calls from each function SELECT DISTINCT func_addr, callee_addr, 1 FROM disasm_calls WHERE callee_addr IS NOT NULL AND callee_addr != 0 UNION ALL - -- Recursive: follow callees deeper SELECT cc.root_func, c.callee_addr, cc.depth + 1 FROM call_chain cc JOIN disasm_calls c ON c.func_addr = cc.current_func @@ -462,9 +381,6 @@ inline bool register_disasm_views(xsql::Database& db) { )"; db.exec(v_call_chains); - // disasm_v_calls_in_loops - Calls that occur inside detected loops - // A call is considered "in a loop" if its address is between the loop header - // and the end of the back-edge block const char* v_calls_in_loops = R"( CREATE VIEW IF NOT EXISTS disasm_v_calls_in_loops AS SELECT @@ -482,7 +398,6 @@ inline bool register_disasm_views(xsql::Database& db) { )"; db.exec(v_calls_in_loops); - // disasm_v_funcs_with_loops - Functions that contain loops const char* v_funcs_with_loops = R"( CREATE VIEW IF NOT EXISTS disasm_v_funcs_with_loops AS SELECT @@ -499,30 +414,23 @@ inline bool register_disasm_views(xsql::Database& db) { } // ============================================================================ -// Registry for all disassembly tables +// Registry // ============================================================================ -struct DisassemblyRegistry { - GeneratorTableDef disasm_calls; - GeneratorTableDef disasm_loops; +DisassemblyRegistry::DisassemblyRegistry() + : disasm_calls(define_disasm_calls()) + , disasm_loops(define_disasm_loops()) +{} - DisassemblyRegistry() - : disasm_calls(define_disasm_calls()) - , disasm_loops(define_disasm_loops()) - {} +void DisassemblyRegistry::register_all(xsql::Database& db) { + db.register_generator_table("ida_disasm_calls", &disasm_calls); + db.create_table("disasm_calls", "ida_disasm_calls"); - void register_all(xsql::Database& db) { - db.register_generator_table("ida_disasm_calls", &disasm_calls); - db.create_table("disasm_calls", "ida_disasm_calls"); + db.register_generator_table("ida_disasm_loops", &disasm_loops); + db.create_table("disasm_loops", "ida_disasm_loops"); - db.register_generator_table("ida_disasm_loops", &disasm_loops); - db.create_table("disasm_loops", "ida_disasm_loops"); - - // Register views on top - register_disasm_views(db); - } -}; + register_disasm_views(db); +} } // namespace disassembly } // namespace idasql - diff --git a/src/lib/src/disassembly.hpp b/src/lib/src/disassembly.hpp new file mode 100644 index 0000000..7036e4f --- /dev/null +++ b/src/lib/src/disassembly.hpp @@ -0,0 +1,58 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +/** + * disassembly.hpp - Disassembly-level SQL tables + * + * Tables: disasm_calls, disasm_loops + */ + +#pragma once + +#include + +#include +#include + +#include "ida_headers.hpp" + +#include +#include + +namespace idasql { +namespace disassembly { + +std::string safe_name(ea_t ea); + +struct DisasmCallInfo { + ea_t func_addr; + ea_t ea; + ea_t callee_addr; + std::string callee_name; +}; + +struct LoopInfo { + ea_t func_addr; + int loop_id; + ea_t header_ea; + ea_t header_end_ea; + ea_t back_edge_block_ea; + ea_t back_edge_block_end; +}; + +void collect_loops_for_func(std::vector& loops, func_t* pfn); + +GeneratorTableDef define_disasm_calls(); +GeneratorTableDef define_disasm_loops(); +bool register_disasm_views(xsql::Database& db); + +struct DisassemblyRegistry { + GeneratorTableDef disasm_calls; + GeneratorTableDef disasm_loops; + + DisassemblyRegistry(); + void register_all(xsql::Database& db); +}; + +} // namespace disassembly +} // namespace idasql diff --git a/src/lib/include/idasql/entities.hpp b/src/lib/src/entities.cpp similarity index 74% rename from src/lib/include/idasql/entities.hpp rename to src/lib/src/entities.cpp index a234f80..f80fac8 100644 --- a/src/lib/include/idasql/entities.hpp +++ b/src/lib/src/entities.cpp @@ -1,55 +1,12 @@ -/** - * entities.hpp - IDA entity definitions for SQLite virtual tables - * - * Defines all IDA entities as virtual tables using the clean ida_vtable.hpp framework. - * - * Tables: - * funcs - Functions - * segments - Memory segments - * names - Named locations (from nlist) - * entries - Entry points (exports) - * imports - Imported functions - * strings - String literals - * xrefs - Cross-references (universal) - */ - -#pragma once - -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -// IDA SDK headers -#include -#include -#include // Must come before moves.hpp -#include -#include -#include -#include -#include -#include // For tinfo_t, func_type_data_t -#include -#include -#include -#include -#include // For comments (get_cmt, set_cmt) -#include // For instructions (insn_t, decode_insn) -#include // For bookmarks - -#include // For invalidate_decompiler_cache +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +#include "entities.hpp" +#include "entities_search.hpp" + +#include "decompiler.hpp" + +// entities.hpp already includes ida_headers.hpp namespace idasql { namespace entities { @@ -58,33 +15,33 @@ namespace entities { // Helper: Safe string extraction from IDA // ============================================================================ -inline std::string safe_func_name(ea_t ea) { +std::string safe_func_name(ea_t ea) { qstring name; get_func_name(&name, ea); return std::string(name.c_str()); } -inline std::string safe_segm_name(segment_t* seg) { +std::string safe_segm_name(segment_t* seg) { if (!seg) return ""; qstring name; get_segm_name(&name, seg); return std::string(name.c_str()); } -inline std::string safe_segm_class(segment_t* seg) { +std::string safe_segm_class(segment_t* seg) { if (!seg) return ""; qstring cls; get_segm_class(&cls, seg); return std::string(cls.c_str()); } -inline std::string safe_name(ea_t ea) { +std::string safe_name(ea_t ea) { qstring name; get_name(&name, ea); return std::string(name.c_str()); } -inline std::string safe_entry_name(size_t idx) { +std::string safe_entry_name(size_t idx) { uval_t ord = get_entry_ordinal(idx); qstring name; get_entry_name(&name, ord); @@ -92,16 +49,14 @@ inline std::string safe_entry_name(size_t idx) { } // ============================================================================ -// FUNCS Table (with UPDATE/DELETE support) +// Function type helpers // ============================================================================ -// Helper to get function type info -inline bool get_func_tinfo(ea_t ea, tinfo_t& tif) { +bool get_func_tinfo(ea_t ea, tinfo_t& tif) { return get_tinfo(&tif, ea); } -// Helper to get calling convention name from callcnv_t -inline const char* get_cc_name(callcnv_t cc) { +const char* get_cc_name(callcnv_t cc) { switch (cc) { case CM_CC_CDECL: return "cdecl"; case CM_CC_STDCALL: return "stdcall"; @@ -116,7 +71,11 @@ inline const char* get_cc_name(callcnv_t cc) { } } -inline VTableDef define_funcs() { +// ============================================================================ +// FUNCS Table (with UPDATE/DELETE support) +// ============================================================================ + +VTableDef define_funcs() { return table("funcs") .count([]() { return get_func_qty(); }) .column_int64("address", [](size_t i) -> int64_t { @@ -304,7 +263,7 @@ inline VTableDef define_funcs() { // SEGMENTS Table // ============================================================================ -inline VTableDef define_segments() { +VTableDef define_segments() { return table("segments") .count([]() { return static_cast(get_segm_qty()); }) .column_int64("start_ea", [](size_t i) -> int64_t { @@ -369,6 +328,63 @@ inline VTableDef define_segments() { auto_wait(); return ok; }) + .insertable([](int argc, xsql::FunctionArg* argv) -> bool { + // start_ea (col 0) and end_ea (col 1) are required + if (argc < 2 || argv[0].is_null() || argv[1].is_null()) + return false; + + ea_t start = static_cast(argv[0].as_int64()); + ea_t end = static_cast(argv[1].as_int64()); + if (start == BADADDR || end == BADADDR || end <= start) + return false; + + int perm = 0; + bool has_perm = false; + if (argc > 4 && !argv[4].is_null()) { + perm = argv[4].as_int(); + if (perm < 0 || perm > 7) + return false; + has_perm = true; + } + + // Avoid destructive overlap behavior from add_segm(). + const int seg_qty = get_segm_qty(); + for (int seg_idx = 0; seg_idx < seg_qty; ++seg_idx) { + segment_t* seg = getnseg(seg_idx); + if (!seg) continue; + if (start < seg->end_ea && end > seg->start_ea) { + return false; + } + } + + const char* seg_name = nullptr; + if (argc > 2 && !argv[2].is_null()) { + seg_name = argv[2].as_c_str(); + if (seg_name && seg_name[0] == '\0') seg_name = nullptr; + } + + const char* seg_class = nullptr; + if (argc > 3 && !argv[3].is_null()) { + seg_class = argv[3].as_c_str(); + if (seg_class && seg_class[0] == '\0') seg_class = nullptr; + } + + const ea_t para = start >> 4; + + auto_wait(); + bool ok = add_segm(para, start, end, seg_name, seg_class, ADDSEG_QUIET | ADDSEG_NOAA); + if (ok && has_perm) { + segment_t* created = getseg(start); + if (created == nullptr) { + ok = false; + } else { + created->perm = static_cast(perm); + ok = created->update(); + } + } + auto_wait(); + return ok; + }) .build(); } @@ -376,7 +392,7 @@ inline VTableDef define_segments() { // NAMES Table (with UPDATE/DELETE support) // ============================================================================ -inline VTableDef define_names() { +VTableDef define_names() { return table("names") .count([]() { return get_nlist_size(); }) .column_int64("address", [](size_t i) -> int64_t { @@ -437,7 +453,7 @@ inline VTableDef define_names() { // ENTRIES Table (entry points / exports) // ============================================================================ -inline VTableDef define_entries() { +VTableDef define_entries() { return table("entries") .count([]() { return get_entry_qty(); }) .column_int64("ordinal", [](size_t i) -> int64_t { @@ -457,13 +473,7 @@ inline VTableDef define_entries() { // COMMENTS Table (with UPDATE/DELETE support) // ============================================================================ -struct CommentRow { - ea_t ea = BADADDR; - std::string comment; - std::string rpt_comment; -}; - -inline void collect_comment_rows(std::vector& rows) { +void collect_comment_rows(std::vector& rows) { rows.clear(); ea_t ea = inf_get_min_ea(); @@ -488,7 +498,7 @@ inline void collect_comment_rows(std::vector& rows) { } } -inline CachedTableDef define_comments() { +CachedTableDef define_comments() { return cached_table("comments") .no_shared_cache() .estimate_rows([]() -> size_t { @@ -571,38 +581,23 @@ inline CachedTableDef define_comments() { // ============================================================================ // IMPORTS Table -// Collects all imports across all modules into a flat table // ============================================================================ -struct ImportInfo { - int module_idx; - ea_t ea; - std::string name; - uval_t ord; -}; - -inline std::string get_import_module_name_safe(int idx) { +std::string get_import_module_name_safe(int idx) { qstring name; get_import_module_name(&name, idx); return std::string(name.c_str()); } // ============================================================================ -// STRINGS Tables - By type (ASCII, Unicode) +// STRING helpers // ============================================================================ -// String type encoding (from ida_nalt): -// Bits 0-1: Width (0=1B/ASCII, 1=2B/UTF-16, 2=4B/UTF-32) -// Bits 2-7: Layout (0=TERMCHR, 1=PASCAL1, 2=PASCAL2, 3=PASCAL4) -// Bits 8-15: term1 (first termination character) -// Bits 16-23: term2 (second termination character) -// Bits 24-31: encoding index - -inline int get_string_width(int strtype) { +int get_string_width(int strtype) { return strtype & 0x03; // 0=ASCII, 1=UTF-16, 2=UTF-32 } -inline const char* get_string_width_name(int strtype) { +const char* get_string_width_name(int strtype) { int width = get_string_width(strtype); switch (width) { case 0: return "1-byte"; @@ -612,7 +607,7 @@ inline const char* get_string_width_name(int strtype) { } } -inline const char* get_string_type_name(int strtype) { +const char* get_string_type_name(int strtype) { int width = get_string_width(strtype); switch (width) { case 0: return "ascii"; @@ -622,11 +617,11 @@ inline const char* get_string_type_name(int strtype) { } } -inline int get_string_layout(int strtype) { +int get_string_layout(int strtype) { return (strtype >> 2) & 0x3F; // Bits 2-7 } -inline const char* get_string_layout_name(int strtype) { +const char* get_string_layout_name(int strtype) { int layout = get_string_layout(strtype); switch (layout) { case 0: return "termchr"; // Null-terminated (C-style) @@ -637,135 +632,93 @@ inline const char* get_string_layout_name(int strtype) { } } -inline int get_string_encoding(int strtype) { +int get_string_encoding(int strtype) { return (strtype >> 24) & 0xFF; // Bits 24-31: encoding index } -inline std::string get_string_content(const string_info_t& si) { +std::string get_string_content(const string_info_t& si) { qstring content; get_strlit_contents(&content, si.ea, si.length, si.type); return std::string(content.c_str()); } // ============================================================================ -// XREFS Table (universal cross-references) -// Collects all xrefs from all functions -// ============================================================================ - -struct XrefInfo { - ea_t from_ea; - ea_t to_ea; - uint8_t type; - bool is_code; -}; - -// ============================================================================ -// Xref Iterators for Constraint Pushdown +// Xref Iterators // ============================================================================ -/** - * Iterator for xrefs TO a specific address. - * Used when query has: WHERE to_ea = X - * Uses xrefblk_t::first_to/next_to for O(refs_to_X) instead of O(all_xrefs) - */ -class XrefsToIterator : public xsql::RowIterator { - ea_t target_; - xrefblk_t xb_; - bool started_ = false; - bool valid_ = false; +XrefsToIterator::XrefsToIterator(ea_t target) : target_(target) {} -public: - explicit XrefsToIterator(ea_t target) : target_(target) {} - - bool next() override { - if (!started_) { - started_ = true; - valid_ = xb_.first_to(target_, XREF_ALL); - } else if (valid_) { - valid_ = xb_.next_to(); - } - return valid_; +bool XrefsToIterator::next() { + if (!started_) { + started_ = true; + valid_ = xb_.first_to(target_, XREF_ALL); + } else if (valid_) { + valid_ = xb_.next_to(); } + return valid_; +} - bool eof() const override { - return started_ && !valid_; - } +bool XrefsToIterator::eof() const { + return started_ && !valid_; +} - void column(xsql::FunctionContext& ctx, int col) override { - if (!valid_) { - ctx.result_null(); - return; - } - switch (col) { - case 0: ctx.result_int64(static_cast(xb_.from)); break; - case 1: ctx.result_int64(static_cast(target_)); break; - case 2: ctx.result_int(xb_.type); break; - case 3: ctx.result_int(xb_.iscode ? 1 : 0); break; - default: ctx.result_null(); break; - } +void XrefsToIterator::column(xsql::FunctionContext& ctx, int col) { + if (!valid_) { + ctx.result_null(); + return; } - - int64_t rowid() const override { - return valid_ ? static_cast(xb_.from) : 0; + switch (col) { + case 0: ctx.result_int64(static_cast(xb_.from)); break; + case 1: ctx.result_int64(static_cast(target_)); break; + case 2: ctx.result_int(xb_.type); break; + case 3: ctx.result_int(xb_.iscode ? 1 : 0); break; + default: ctx.result_null(); break; } -}; +} -/** - * Iterator for xrefs FROM a specific address. - * Used when query has: WHERE from_ea = X - * Uses xrefblk_t::first_from/next_from for O(refs_from_X) instead of O(all_xrefs) - */ -class XrefsFromIterator : public xsql::RowIterator { - ea_t source_; - xrefblk_t xb_; - bool started_ = false; - bool valid_ = false; +int64_t XrefsToIterator::rowid() const { + return valid_ ? static_cast(xb_.from) : 0; +} -public: - explicit XrefsFromIterator(ea_t source) : source_(source) {} +XrefsFromIterator::XrefsFromIterator(ea_t source) : source_(source) {} - bool next() override { - if (!started_) { - started_ = true; - valid_ = xb_.first_from(source_, XREF_ALL); - } else if (valid_) { - valid_ = xb_.next_from(); - } - return valid_; +bool XrefsFromIterator::next() { + if (!started_) { + started_ = true; + valid_ = xb_.first_from(source_, XREF_ALL); + } else if (valid_) { + valid_ = xb_.next_from(); } + return valid_; +} - bool eof() const override { - return started_ && !valid_; - } +bool XrefsFromIterator::eof() const { + return started_ && !valid_; +} - void column(xsql::FunctionContext& ctx, int col) override { - if (!valid_) { - ctx.result_null(); - return; - } - switch (col) { - case 0: ctx.result_int64(static_cast(source_)); break; - case 1: ctx.result_int64(static_cast(xb_.to)); break; - case 2: ctx.result_int(xb_.type); break; - case 3: ctx.result_int(xb_.iscode ? 1 : 0); break; - default: ctx.result_null(); break; - } +void XrefsFromIterator::column(xsql::FunctionContext& ctx, int col) { + if (!valid_) { + ctx.result_null(); + return; } - - int64_t rowid() const override { - return valid_ ? static_cast(xb_.to) : 0; + switch (col) { + case 0: ctx.result_int64(static_cast(source_)); break; + case 1: ctx.result_int64(static_cast(xb_.to)); break; + case 2: ctx.result_int(xb_.type); break; + case 3: ctx.result_int(xb_.iscode ? 1 : 0); break; + default: ctx.result_null(); break; } -}; +} -/** - * Xrefs table with query-scoped cache. - * - * Features: - * - Cache lives in cursor (freed when query completes) - * - Lazy cache build (only if not using constraint pushdown) - * - Row count estimation (no cache rebuild in xBestIndex) - */ -inline CachedTableDef define_xrefs() { +int64_t XrefsFromIterator::rowid() const { + return valid_ ? static_cast(xb_.to) : 0; +} + +// ============================================================================ +// XREFS Table +// ============================================================================ + +CachedTableDef define_xrefs() { return cached_table("xrefs") .no_shared_cache() // Estimate row count without building cache @@ -819,63 +772,44 @@ inline CachedTableDef define_xrefs() { // BLOCKS Table (basic blocks) // ============================================================================ -struct BlockInfo { - ea_t func_ea; - ea_t start_ea; - ea_t end_ea; -}; - -/** - * Iterator for blocks in a specific function. - * Used when query has: WHERE func_ea = X - * Uses qflow_chart_t on single function for O(func_blocks) instead of O(all_blocks) - */ -class BlocksInFuncIterator : public xsql::RowIterator { - ea_t func_ea_; - qflow_chart_t fc_; - int idx_ = -1; - bool valid_ = false; - -public: - explicit BlocksInFuncIterator(ea_t func_ea) : func_ea_(func_ea) { - func_t* pfn = get_func(func_ea); - if (pfn) { - fc_.create("", pfn, pfn->start_ea, pfn->end_ea, FC_NOEXT); - } +BlocksInFuncIterator::BlocksInFuncIterator(ea_t func_ea) : func_ea_(func_ea) { + func_t* pfn = get_func(func_ea); + if (pfn) { + fc_.create("", pfn, pfn->start_ea, pfn->end_ea, FC_NOEXT); } +} - bool next() override { - ++idx_; - valid_ = (idx_ < fc_.size()); - return valid_; - } +bool BlocksInFuncIterator::next() { + ++idx_; + valid_ = (idx_ < fc_.size()); + return valid_; +} - bool eof() const override { - return idx_ >= 0 && !valid_; - } +bool BlocksInFuncIterator::eof() const { + return idx_ >= 0 && !valid_; +} - void column(xsql::FunctionContext& ctx, int col) override { - if (!valid_ || idx_ < 0 || idx_ >= fc_.size()) { - ctx.result_null(); - return; - } - const qbasic_block_t& bb = fc_.blocks[idx_]; - switch (col) { - case 0: ctx.result_int64(static_cast(func_ea_)); break; - case 1: ctx.result_int64(static_cast(bb.start_ea)); break; - case 2: ctx.result_int64(static_cast(bb.end_ea)); break; - case 3: ctx.result_int64(static_cast(bb.end_ea - bb.start_ea)); break; - default: ctx.result_null(); break; - } +void BlocksInFuncIterator::column(xsql::FunctionContext& ctx, int col) { + if (!valid_ || idx_ < 0 || idx_ >= fc_.size()) { + ctx.result_null(); + return; } - - int64_t rowid() const override { - if (!valid_ || idx_ < 0 || idx_ >= fc_.size()) return 0; - return static_cast(fc_.blocks[idx_].start_ea); + const qbasic_block_t& bb = fc_.blocks[idx_]; + switch (col) { + case 0: ctx.result_int64(static_cast(func_ea_)); break; + case 1: ctx.result_int64(static_cast(bb.start_ea)); break; + case 2: ctx.result_int64(static_cast(bb.end_ea)); break; + case 3: ctx.result_int64(static_cast(bb.end_ea - bb.start_ea)); break; + default: ctx.result_null(); break; } -}; +} + +int64_t BlocksInFuncIterator::rowid() const { + if (!valid_ || idx_ < 0 || idx_ >= fc_.size()) return 0; + return static_cast(fc_.blocks[idx_].start_ea); +} -inline CachedTableDef define_blocks() { +CachedTableDef define_blocks() { return cached_table("blocks") .no_shared_cache() .estimate_rows([]() -> size_t { @@ -923,13 +857,7 @@ inline CachedTableDef define_blocks() { // IMPORTS Table (query-scoped cache) // ============================================================================ -// Helper struct for import enumeration callback -struct ImportEnumContext { - std::vector* cache; - int module_idx; -}; - -inline CachedTableDef define_imports() { +CachedTableDef define_imports() { return cached_table("imports") .no_shared_cache() .estimate_rows([]() -> size_t { @@ -977,7 +905,7 @@ inline CachedTableDef define_imports() { // STRINGS Table (query-scoped cache) // ============================================================================ -inline CachedTableDef define_strings() { +CachedTableDef define_strings() { return cached_table("strings") .no_shared_cache() .estimate_rows([]() -> size_t { @@ -1029,13 +957,7 @@ inline CachedTableDef define_strings() { // BOOKMARKS Table (with UPDATE/DELETE support) // ============================================================================ -struct BookmarkRow { - uint32_t index = 0; - ea_t ea = BADADDR; - std::string desc; -}; - -inline void collect_bookmark_rows(std::vector& rows) { +void collect_bookmark_rows(std::vector& rows) { rows.clear(); idaplace_t idaplace(inf_get_min_ea(), 0); @@ -1058,7 +980,7 @@ inline void collect_bookmark_rows(std::vector& rows) { } } -inline CachedTableDef define_bookmarks() { +CachedTableDef define_bookmarks() { return cached_table("bookmarks") .no_shared_cache() .estimate_rows([]() -> size_t { @@ -1139,11 +1061,7 @@ inline CachedTableDef define_bookmarks() { // HEADS Table - All defined items in the database // ============================================================================ -struct HeadRow { - ea_t ea = BADADDR; -}; - -inline void collect_head_rows(std::vector& rows) { +void collect_head_rows(std::vector& rows) { rows.clear(); ea_t ea = inf_get_min_ea(); @@ -1155,7 +1073,7 @@ inline void collect_head_rows(std::vector& rows) { } } -inline const char* get_item_type_str(ea_t ea) { +const char* get_item_type_str(ea_t ea) { flags64_t f = get_flags(ea); if (is_code(f)) return "code"; if (is_strlit(f)) return "string"; @@ -1166,7 +1084,7 @@ inline const char* get_item_type_str(ea_t ea) { return "other"; } -inline CachedTableDef define_heads() { +CachedTableDef define_heads() { return cached_table("heads") .no_shared_cache() .estimate_rows([]() -> size_t { @@ -1200,57 +1118,55 @@ inline CachedTableDef define_heads() { // BYTES Table - Read/write byte values with patch support // ============================================================================ -// Iterator for single-address point query (constraint pushdown on ea) -class BytesAtIterator : public xsql::RowIterator { - ea_t ea_; - bool yielded_ = false; // true after next() returned true (row available) - bool exhausted_ = false; // true after next() returned false (no more rows) +BytesAtIterator::BytesAtIterator(ea_t ea) : ea_(ea) {} -public: - explicit BytesAtIterator(ea_t ea) : ea_(ea) {} - - bool next() override { - if (yielded_) { - // Second call — exhausted - exhausted_ = true; - return false; - } - // First call — yield the single row - yielded_ = true; - return true; +bool BytesAtIterator::next() { + if (yielded_) { + // Second call - exhausted + exhausted_ = true; + return false; } + // First call - yield the single row + yielded_ = true; + return true; +} - bool eof() const override { return exhausted_; } +bool BytesAtIterator::eof() const { return exhausted_; } - void column(xsql::FunctionContext& ctx, int col) override { - switch (col) { - case 0: // ea - ctx.result_int64(ea_); - break; - case 1: // value - ctx.result_int(get_byte(ea_)); - break; - case 2: // original_value - ctx.result_int(static_cast(get_original_byte(ea_))); - break; - case 3: // size - ctx.result_int(get_item_size(ea_)); - break; - case 4: // type - ctx.result_text(get_item_type_str(ea_)); - break; - case 5: { // is_patched - int patched = (get_byte(ea_) != static_cast(get_original_byte(ea_))) ? 1 : 0; - ctx.result_int(patched); - break; - } +void BytesAtIterator::column(xsql::FunctionContext& ctx, int col) { + switch (col) { + case 0: // ea + ctx.result_int64(ea_); + break; + case 1: // value + ctx.result_int(get_byte(ea_)); + break; + case 2: // original_value + ctx.result_int(static_cast(get_original_byte(ea_))); + break; + case 3: // size + ctx.result_int(get_item_size(ea_)); + break; + case 4: // type + ctx.result_text(get_item_type_str(ea_)); + break; + case 5: { // is_patched + int patched = (get_byte(ea_) != static_cast(get_original_byte(ea_))) ? 1 : 0; + ctx.result_int(patched); + break; + } + case 6: { // fpos + const qoff64_t fpos = get_fileregion_offset(ea_); + if (fpos < 0) ctx.result_null(); + else ctx.result_int64(static_cast(fpos)); + break; } } +} - int64_t rowid() const override { return static_cast(ea_); } -}; +int64_t BytesAtIterator::rowid() const { return static_cast(ea_); } -inline CachedTableDef define_bytes() { +CachedTableDef define_bytes() { return cached_table("bytes") .no_shared_cache() .estimate_rows([]() -> size_t { @@ -1285,6 +1201,12 @@ inline CachedTableDef define_bytes() { .column_int("is_patched", [](const HeadRow& row) -> int { return (get_byte(row.ea) != static_cast(get_original_byte(row.ea))) ? 1 : 0; }) + .column("fpos", xsql::ColumnType::Integer, + [](xsql::FunctionContext& ctx, const HeadRow& row) { + const qoff64_t fpos = get_fileregion_offset(row.ea); + if (fpos < 0) ctx.result_null(); + else ctx.result_int64(static_cast(fpos)); + }) .filter_eq("ea", [](int64_t ea_val) -> std::unique_ptr { return std::make_unique(static_cast(ea_val)); }, 1.0) @@ -1295,21 +1217,14 @@ inline CachedTableDef define_bytes() { // PATCHED_BYTES Table - All patched locations via visit_patched_bytes() // ============================================================================ -struct PatchedByteInfo { - ea_t ea; - qoff64_t fpos; - uint64 original_value; - uint64 patched_value; -}; - // Callback for visit_patched_bytes (requires idaapi calling convention) -static int idaapi patched_bytes_visitor(ea_t ea, qoff64_t fpos, uint64 o, uint64 v, void* ud) { +int idaapi patched_bytes_visitor(ea_t ea, qoff64_t fpos, uint64 o, uint64 v, void* ud) { auto* vec = static_cast*>(ud); vec->push_back({ea, fpos, o, v}); return 0; // continue } -inline CachedTableDef define_patched_bytes() { +CachedTableDef define_patched_bytes() { return cached_table("patched_bytes") .no_shared_cache() .estimate_rows([]() -> size_t { @@ -1335,133 +1250,10 @@ inline CachedTableDef define_patched_bytes() { } // ============================================================================ -// INSTRUCTIONS Table - With func_addr constraint pushdown +// INSTRUCTIONS Table helpers and parsing // ============================================================================ -inline std::string operand_kind_text(ea_t ea, int opnum); -inline std::string operand_type_text(ea_t ea, int opnum); -inline int operand_enum_serial(ea_t ea, int opnum); -inline int64_t operand_stroff_delta(ea_t ea, int opnum); -inline std::string operand_class_text(ea_t ea, int opnum); -inline std::string operand_repr_kind_text(ea_t ea, int opnum); -inline std::string operand_repr_type_name_text(ea_t ea, int opnum); -inline std::string operand_repr_member_name_text(ea_t ea, int opnum); -inline int operand_repr_serial(ea_t ea, int opnum); -inline int64_t operand_repr_delta(ea_t ea, int opnum); -inline std::string operand_format_spec_text(ea_t ea, int opnum); -inline void instruction_column_common(xsql::FunctionContext& ctx, ea_t ea, ea_t func_addr, int col); - -inline constexpr int kInstructionOperandCount = 8; -inline constexpr int kInstructionOperandBaseCol = 4; -inline constexpr int kInstructionDisasmCol = kInstructionOperandBaseCol + kInstructionOperandCount; -inline constexpr int kInstructionFuncAddrCol = kInstructionDisasmCol + 1; -inline constexpr int kInstructionClassBaseCol = kInstructionFuncAddrCol + 1; -inline constexpr int kInstructionReprKindBaseCol = kInstructionClassBaseCol + kInstructionOperandCount; -inline constexpr int kInstructionReprTypeBaseCol = kInstructionReprKindBaseCol + kInstructionOperandCount; -inline constexpr int kInstructionReprMemberBaseCol = kInstructionReprTypeBaseCol + kInstructionOperandCount; -inline constexpr int kInstructionReprSerialBaseCol = kInstructionReprMemberBaseCol + kInstructionOperandCount; -inline constexpr int kInstructionReprDeltaBaseCol = kInstructionReprSerialBaseCol + kInstructionOperandCount; -inline constexpr int kInstructionFormatSpecBaseCol = kInstructionReprDeltaBaseCol + kInstructionOperandCount; -inline constexpr int kInstructionColumnCount = kInstructionFormatSpecBaseCol + kInstructionOperandCount; - -// Iterator for instructions within a single function (constraint pushdown) -class InstructionsInFuncIterator : public xsql::RowIterator { - ea_t func_addr_; - func_t* pfn_ = nullptr; - func_item_iterator_t fii_; - bool started_ = false; - bool valid_ = false; - ea_t current_ea_ = BADADDR; - -public: - explicit InstructionsInFuncIterator(ea_t func_addr) - : func_addr_(func_addr) - { - pfn_ = get_func(func_addr_); - } - - bool next() override { - if (!pfn_) return false; - - if (!started_) { - started_ = true; - valid_ = fii_.set(pfn_); - if (valid_) current_ea_ = fii_.current(); - } else if (valid_) { - valid_ = fii_.next_code(); - if (valid_) current_ea_ = fii_.current(); - } - return valid_; - } - - bool eof() const override { - return started_ && !valid_; - } - - void column(xsql::FunctionContext& ctx, int col) override { - instruction_column_common(ctx, current_ea_, func_addr_, col); - } - - int64_t rowid() const override { - return static_cast(current_ea_); - } -}; - -// Iterator for a single instruction by exact address. -class InstructionAtAddressIterator : public xsql::RowIterator { - ea_t ea_; - bool started_ = false; - bool valid_ = false; - -public: - explicit InstructionAtAddressIterator(ea_t ea) : ea_(ea) {} - - bool next() override { - if (!started_) { - started_ = true; - valid_ = (ea_ != BADADDR) && is_code(get_flags(ea_)); - return valid_; - } - valid_ = false; - return false; - } - - bool eof() const override { - return started_ && !valid_; - } - - void column(xsql::FunctionContext& ctx, int col) override { - func_t* f = get_func(ea_); - ea_t func_addr = f ? f->start_ea : 0; - instruction_column_common(ctx, ea_, func_addr, col); - } - - int64_t rowid() const override { - return static_cast(ea_); - } -}; - -struct InstructionRow { - ea_t ea = BADADDR; -}; - -enum class OperandApplyKind { - None, - Clear, - Enum, - Stroff, -}; - -struct OperandApplyRequest { - OperandApplyKind kind = OperandApplyKind::None; - std::string enum_name; - std::string enum_member_name; - uchar enum_serial = 0; - std::vector stroff_path_names; - adiff_t stroff_delta = 0; -}; - -inline std::string trim_copy(const std::string& in) { +std::string trim_copy(const std::string& in) { size_t begin = 0; size_t end = in.size(); while (begin < end && std::isspace(static_cast(in[begin])) != 0) { @@ -1473,7 +1265,7 @@ inline std::string trim_copy(const std::string& in) { return in.substr(begin, end - begin); } -inline bool starts_with_ci(const std::string& text, const char* prefix) { +bool starts_with_ci(const std::string& text, const char* prefix) { if (!prefix) return false; const size_t prefix_len = std::strlen(prefix); if (text.size() < prefix_len) return false; @@ -1485,14 +1277,14 @@ inline bool starts_with_ci(const std::string& text, const char* prefix) { return true; } -inline bool equals_ci(const std::string& text, const char* token) { +bool equals_ci(const std::string& text, const char* token) { if (!token) return false; const size_t token_len = std::strlen(token); if (text.size() != token_len) return false; return starts_with_ci(text, token); } -inline bool parse_int64(const std::string& text, int64_t& out_value) { +bool parse_int64(const std::string& text, int64_t& out_value) { const std::string trimmed = trim_copy(text); if (trimmed.empty()) return false; char* end_ptr = nullptr; @@ -1502,7 +1294,7 @@ inline bool parse_int64(const std::string& text, int64_t& out_value) { return true; } -inline bool resolve_named_type_tid(const std::string& name, tid_t& out_tid, tinfo_t* out_tif = nullptr) { +bool resolve_named_type_tid(const std::string& name, tid_t& out_tid, tinfo_t* out_tif) { if (name.empty()) return false; tinfo_t tif; if (!tif.get_named_type(nullptr, name.c_str())) { @@ -1519,7 +1311,7 @@ inline bool resolve_named_type_tid(const std::string& name, tid_t& out_tid, tinf return true; } -inline std::string tid_name_or_fallback(tid_t tid) { +std::string tid_name_or_fallback(tid_t tid) { qstring out; if (get_tid_name(&out, tid)) { return std::string(out.c_str()); @@ -1527,7 +1319,7 @@ inline std::string tid_name_or_fallback(tid_t tid) { return ""; } -inline void split_path_names(const std::string& path_spec, std::vector& out_names) { +void split_path_names(const std::string& path_spec, std::vector& out_names) { out_names.clear(); size_t start = 0; while (start <= path_spec.size()) { @@ -1542,7 +1334,7 @@ inline void split_path_names(const std::string& path_spec, std::vectorclear(); out = OperandApplyRequest{}; if (!spec) { @@ -1654,11 +1446,11 @@ inline bool parse_operand_format_spec(const char* spec, OperandApplyRequest& out return false; } -inline bool parse_operand_apply_spec(const char* spec, OperandApplyRequest& out) { +bool parse_operand_apply_spec(const char* spec, OperandApplyRequest& out) { return parse_operand_format_spec(spec, out, nullptr); } -inline bool decode_operand(ea_t ea, int opnum, insn_t& out_insn, op_t& out_op, std::string* out_error = nullptr) { +bool decode_operand(ea_t ea, int opnum, insn_t& out_insn, op_t& out_op, std::string* out_error) { if (out_error) out_error->clear(); if (ea == BADADDR || !is_code(get_flags(ea))) { if (out_error) *out_error = "address is not code"; @@ -1680,7 +1472,7 @@ inline bool decode_operand(ea_t ea, int opnum, insn_t& out_insn, op_t& out_op, s return true; } -inline bool operand_numeric_value(ea_t ea, int opnum, uint64& out_value, std::string* out_error = nullptr) { +bool operand_numeric_value(ea_t ea, int opnum, uint64& out_value, std::string* out_error) { if (out_error) out_error->clear(); insn_t insn; op_t op; @@ -1702,7 +1494,7 @@ inline bool operand_numeric_value(ea_t ea, int opnum, uint64& out_value, std::st } } -inline bool resolve_enum_member_serial(const tinfo_t& enum_tif, const std::string& member_name, uchar& out_serial, std::string* out_error = nullptr) { +bool resolve_enum_member_serial(const tinfo_t& enum_tif, const std::string& member_name, uchar& out_serial, std::string* out_error) { if (out_error) out_error->clear(); edm_t target; const ssize_t idx = enum_tif.get_edm(&target, member_name.c_str()); @@ -1725,7 +1517,7 @@ inline bool resolve_enum_member_serial(const tinfo_t& enum_tif, const std::strin return false; } -inline bool apply_operand_representation(ea_t ea, int opnum, const OperandApplyRequest& req, std::string* out_error = nullptr) { +bool apply_operand_representation(ea_t ea, int opnum, const OperandApplyRequest& req, std::string* out_error) { if (out_error) out_error->clear(); if (ea == BADADDR || !is_code(get_flags(ea))) { if (out_error) *out_error = "address is not code"; @@ -1847,7 +1639,7 @@ inline bool apply_operand_representation(ea_t ea, int opnum, const OperandApplyR return ok; } -inline const char* operand_class_name(optype_t type) { +const char* operand_class_name(optype_t type) { switch (type) { case o_void: return ""; case o_reg: return "reg"; @@ -1869,21 +1661,21 @@ inline const char* operand_class_name(optype_t type) { } } -inline std::string operand_class_text(ea_t ea, int opnum) { +std::string operand_class_text(ea_t ea, int opnum) { insn_t insn; op_t op; if (!decode_operand(ea, opnum, insn, op, nullptr)) return ""; return operand_class_name(op.type); } -inline std::string operand_repr_kind_text(ea_t ea, int opnum) { +std::string operand_repr_kind_text(ea_t ea, int opnum) { const flags64_t flags = get_flags(ea); if (is_enum(flags, opnum)) return "enum"; if (is_stroff(flags, opnum)) return "stroff"; return "plain"; } -inline std::string operand_repr_type_name_text(ea_t ea, int opnum) { +std::string operand_repr_type_name_text(ea_t ea, int opnum) { const flags64_t flags = get_flags(ea); if (is_enum(flags, opnum)) { uchar serial = 0; @@ -1916,7 +1708,7 @@ inline std::string operand_repr_type_name_text(ea_t ea, int opnum) { return ""; } -inline std::string operand_repr_member_name_text(ea_t ea, int opnum) { +std::string operand_repr_member_name_text(ea_t ea, int opnum) { if (!is_enum(get_flags(ea), opnum)) return ""; uchar serial = 0; @@ -1936,14 +1728,14 @@ inline std::string operand_repr_member_name_text(ea_t ea, int opnum) { return expr.c_str(); } -inline int operand_repr_serial(ea_t ea, int opnum) { +int operand_repr_serial(ea_t ea, int opnum) { if (!is_enum(get_flags(ea), opnum)) return 0; uchar serial = 0; get_enum_id(&serial, ea, opnum); return static_cast(serial); } -inline int64_t operand_repr_delta(ea_t ea, int opnum) { +int64_t operand_repr_delta(ea_t ea, int opnum) { if (!is_stroff(get_flags(ea), opnum)) return 0; std::array path{}; adiff_t delta = 0; @@ -1951,7 +1743,7 @@ inline int64_t operand_repr_delta(ea_t ea, int opnum) { return static_cast(delta); } -inline std::string operand_format_spec_text(ea_t ea, int opnum) { +std::string operand_format_spec_text(ea_t ea, int opnum) { const std::string kind = operand_repr_kind_text(ea, opnum); if (kind == "enum") { const std::string type_name = operand_repr_type_name_text(ea, opnum); @@ -1969,12 +1761,12 @@ inline std::string operand_format_spec_text(ea_t ea, int opnum) { } // Legacy wrappers kept for compatibility with older call sites. -inline std::string operand_kind_text(ea_t ea, int opnum) { return operand_repr_kind_text(ea, opnum); } -inline std::string operand_type_text(ea_t ea, int opnum) { return operand_repr_type_name_text(ea, opnum); } -inline int operand_enum_serial(ea_t ea, int opnum) { return operand_repr_serial(ea, opnum); } -inline int64_t operand_stroff_delta(ea_t ea, int opnum) { return operand_repr_delta(ea, opnum); } +std::string operand_kind_text(ea_t ea, int opnum) { return operand_repr_kind_text(ea, opnum); } +std::string operand_type_text(ea_t ea, int opnum) { return operand_repr_type_name_text(ea, opnum); } +int operand_enum_serial(ea_t ea, int opnum) { return operand_repr_serial(ea, opnum); } +int64_t operand_stroff_delta(ea_t ea, int opnum) { return operand_repr_delta(ea, opnum); } -inline void instruction_column_common(xsql::FunctionContext& ctx, ea_t ea, ea_t func_addr, int col) { +void instruction_column_common(xsql::FunctionContext& ctx, ea_t ea, ea_t func_addr, int col) { if (col == 0) { ctx.result_int64(ea); return; @@ -2045,7 +1837,73 @@ inline void instruction_column_common(xsql::FunctionContext& ctx, ea_t ea, ea_t ctx.result_null(); } -inline void collect_instruction_rows(std::vector& rows) { +// ============================================================================ +// INSTRUCTIONS Table - Iterators +// ============================================================================ + +InstructionsInFuncIterator::InstructionsInFuncIterator(ea_t func_addr) + : func_addr_(func_addr) +{ + pfn_ = get_func(func_addr_); +} + +bool InstructionsInFuncIterator::next() { + if (!pfn_) return false; + + if (!started_) { + started_ = true; + valid_ = fii_.set(pfn_); + if (valid_) current_ea_ = fii_.current(); + } else if (valid_) { + valid_ = fii_.next_code(); + if (valid_) current_ea_ = fii_.current(); + } + return valid_; +} + +bool InstructionsInFuncIterator::eof() const { + return started_ && !valid_; +} + +void InstructionsInFuncIterator::column(xsql::FunctionContext& ctx, int col) { + instruction_column_common(ctx, current_ea_, func_addr_, col); +} + +int64_t InstructionsInFuncIterator::rowid() const { + return static_cast(current_ea_); +} + +InstructionAtAddressIterator::InstructionAtAddressIterator(ea_t ea) : ea_(ea) {} + +bool InstructionAtAddressIterator::next() { + if (!started_) { + started_ = true; + valid_ = (ea_ != BADADDR) && is_code(get_flags(ea_)); + return valid_; + } + valid_ = false; + return false; +} + +bool InstructionAtAddressIterator::eof() const { + return started_ && !valid_; +} + +void InstructionAtAddressIterator::column(xsql::FunctionContext& ctx, int col) { + func_t* f = get_func(ea_); + ea_t func_addr = f ? f->start_ea : 0; + instruction_column_common(ctx, ea_, func_addr, col); +} + +int64_t InstructionAtAddressIterator::rowid() const { + return static_cast(ea_); +} + +// ============================================================================ +// INSTRUCTIONS Table - Definition +// ============================================================================ + +void collect_instruction_rows(std::vector& rows) { rows.clear(); ea_t ea = inf_get_min_ea(); @@ -2058,7 +1916,7 @@ inline void collect_instruction_rows(std::vector& rows) { } } -inline CachedTableDef define_instructions() { +CachedTableDef define_instructions() { auto builder = cached_table("instructions") .no_shared_cache() .estimate_rows([]() -> size_t { @@ -2260,139 +2118,289 @@ inline CachedTableDef define_instructions() { } // ============================================================================ -// Registry: All tables in one place +// USERDATA Table - netnode-backed key-value store // ============================================================================ -struct TableRegistry { - // Index-based tables (use IDA's indexed access, no cache needed) - VTableDef funcs; - VTableDef segments; - VTableDef names; - VTableDef entries; - CachedTableDef comments; - CachedTableDef bookmarks; - CachedTableDef heads; - CachedTableDef bytes; - CachedTableDef patched_bytes; - CachedTableDef instructions; - - // Cached tables (query-scoped cache - memory freed after query) - CachedTableDef xrefs; - CachedTableDef blocks; - CachedTableDef imports; - CachedTableDef strings; - - // Global pointer for cache invalidation from SQL functions - static inline TableRegistry* g_instance = nullptr; - - TableRegistry() - : funcs(define_funcs()) - , segments(define_segments()) - , names(define_names()) - , entries(define_entries()) - , comments(define_comments()) - , bookmarks(define_bookmarks()) - , heads(define_heads()) - , bytes(define_bytes()) - , patched_bytes(define_patched_bytes()) - , instructions(define_instructions()) - , xrefs(define_xrefs()) - , blocks(define_blocks()) - , imports(define_imports()) - , strings(define_strings()) - { - g_instance = this; - } - - ~TableRegistry() { - if (g_instance == this) g_instance = nullptr; - } - - // Invalidate the strings cache (call after rebuild_strings) - void invalidate_strings_cache() { - strings.invalidate_cache(); - } - - // Static method for SQL functions to invalidate strings cache - static void invalidate_strings_cache_global() { - if (g_instance) g_instance->invalidate_strings_cache(); - } - - void register_all(xsql::Database& db) { - // Index-based tables (use IDA's indexed access) - register_index_table(db, "funcs", &funcs); - register_index_table(db, "segments", &segments); - register_index_table(db, "names", &names); - register_index_table(db, "entries", &entries); - - // Cached tables (query-scoped cache) - register_cached_table(db, "comments", &comments); - register_cached_table(db, "bookmarks", &bookmarks); - register_cached_table(db, "heads", &heads); - register_cached_table(db, "bytes", &bytes); - register_cached_table(db, "patched_bytes", &patched_bytes); - register_cached_table(db, "instructions", &instructions); - register_cached_table(db, "xrefs", &xrefs); - register_cached_table(db, "blocks", &blocks); - register_cached_table(db, "imports", &imports); - register_cached_table(db, "strings", &strings); - - // Grep-style entity search table - search::register_grep_entities(db); - - // Create convenience views for common queries - create_helper_views(db); - } - - void create_helper_views(xsql::Database& db) { - // callers view - who calls a function - db.exec(R"( - CREATE VIEW IF NOT EXISTS callers AS - SELECT - x.to_ea as func_addr, - x.from_ea as caller_addr, - f.name as caller_name, - f.address as caller_func_addr - FROM xrefs x - LEFT JOIN funcs f ON x.from_ea >= f.address - AND x.from_ea < f.end_ea - WHERE x.is_code = 1 - )"); - - // callees view - what does a function call - db.exec(R"( - CREATE VIEW IF NOT EXISTS callees AS - SELECT - f.address as func_addr, - f.name as func_name, - x.to_ea as callee_addr, - COALESCE(f2.name, n.name, printf('sub_%X', x.to_ea)) as callee_name - FROM funcs f - JOIN xrefs x ON x.from_ea >= f.address - AND x.from_ea < f.end_ea - LEFT JOIN funcs f2 ON x.to_ea = f2.address - LEFT JOIN names n ON x.to_ea = n.address - WHERE x.is_code = 1 - )"); - - } - -private: - void register_index_table(xsql::Database& db, const char* name, const VTableDef* def) { - std::string module_name = std::string("ida_") + name; - db.register_table(module_name.c_str(), def); - db.create_table(name, module_name.c_str()); - } - - template - void register_cached_table(xsql::Database& db, const char* name, const CachedTableDef* def) { - std::string module_name = std::string("ida_") + name; - db.register_cached_table(module_name.c_str(), def); - db.create_table(name, module_name.c_str()); +static constexpr const char* NETNODE_KV_MASTER_NAME = "$ idasql netnode_kv"; + +static netnode get_netnode_kv_master(bool create) { + netnode master(NETNODE_KV_MASTER_NAME, 0, create); + return master; +} + +// Iterator for single-key lookup via filter_eq_text("key"). +// Uses entry_id as rowid for O(1) DELETE/UPDATE via row_lookup. +class NetnodeKvKeyIterator : public xsql::RowIterator { + std::string key_; + std::string value_; + nodeidx_t entry_id_ = 0; + bool started_ = false; + bool valid_ = false; + +public: + explicit NetnodeKvKeyIterator(const char* key) : key_(key ? key : "") {} + + bool next() override { + if (started_) { valid_ = false; return false; } + started_ = true; + + if (key_.empty()) { valid_ = false; return false; } + netnode master = get_netnode_kv_master(false); + if (master == BADNODE) { valid_ = false; return false; } + + entry_id_ = master.hashval_long(key_.c_str()); + if (entry_id_ == 0) { valid_ = false; return false; } + + netnode entry(entry_id_); + qstring blob; + if (entry.getblob(&blob, 0, stag) < 0) { + value_.clear(); + } else { + value_ = blob.c_str(); + } + + valid_ = true; + return true; } + + bool eof() const override { return started_ && !valid_; } + + void column(xsql::FunctionContext& ctx, int col) override { + if (!valid_) { ctx.result_null(); return; } + switch (col) { + case 0: ctx.result_text(key_.c_str()); break; + case 1: ctx.result_text(value_.c_str()); break; + default: ctx.result_null(); break; + } + } + + int64_t rowid() const override { return static_cast(entry_id_); } }; -} // namespace entities -} // namespace idasql +CachedTableDef define_netnode_kv() { + return cached_table("netnode_kv") + .no_shared_cache() + .estimate_rows([]() -> size_t { + return 64; + }) + .cache_builder([](std::vector& rows) { + rows.clear(); + netnode master = get_netnode_kv_master(false); + if (master == BADNODE) return; + + qstring key_buf; + for (ssize_t r = master.hashfirst(&key_buf); r >= 0; + r = master.hashnext(&key_buf, key_buf.c_str())) { + nodeidx_t entry_id = master.hashval_long(key_buf.c_str()); + if (entry_id == 0) continue; + + NetnodeKvRow row; + row.key = key_buf.c_str(); + + netnode entry(entry_id); + qstring blob; + if (entry.getblob(&blob, 0, stag) >= 0) { + row.value = blob.c_str(); + } + rows.push_back(std::move(row)); + } + }) + .row_populator([](NetnodeKvRow& row, int argc, xsql::FunctionArg* argv) { + // argv[2]=key, argv[3]=value + if (argc > 2 && !argv[2].is_null()) { + const char* k = argv[2].as_c_str(); + row.key = k ? k : ""; + } + if (argc > 3 && !argv[3].is_null()) { + const char* v = argv[3].as_c_str(); + row.value = v ? v : ""; + } + }) + .column_text("key", [](const NetnodeKvRow& row) -> std::string { + return row.key; + }) + .column_text_rw("value", + [](const NetnodeKvRow& row) -> std::string { + return row.value; + }, + [](NetnodeKvRow& row, const char* new_value) -> bool { + netnode master = get_netnode_kv_master(false); + if (master == BADNODE) return false; + + nodeidx_t entry_id = master.hashval_long(row.key.c_str()); + if (entry_id == 0) return false; + + netnode entry(entry_id); + const char* val = new_value ? new_value : ""; + size_t len = strlen(val); + bool ok = entry.setblob(val, len, 0, stag); + if (ok) row.value = val; + return ok; + }) + .row_lookup([](NetnodeKvRow& row, int64_t raw_rowid) -> bool { + netnode master = get_netnode_kv_master(false); + if (master == BADNODE) return false; + nodeidx_t entry_id = static_cast(raw_rowid); + qstring key_buf; + if (master.supstr(&key_buf, entry_id) <= 0) return false; + row.key = key_buf.c_str(); + netnode entry(entry_id); + qstring blob; + if (entry.getblob(&blob, 0, stag) >= 0) + row.value = blob.c_str(); + return true; + }) + .deletable([](NetnodeKvRow& row) -> bool { + netnode master = get_netnode_kv_master(false); + if (master == BADNODE) return false; + nodeidx_t entry_id = master.hashval_long(row.key.c_str()); + if (entry_id == 0) return false; + netnode entry(entry_id); + entry.kill(); + master.hashdel(row.key.c_str()); + master.supdel(entry_id); // clean reverse index + return true; + }) + .insertable([](int argc, xsql::FunctionArg* argv) -> bool { + // argv[0]=key, argv[1]=value + if (argc < 1 || argv[0].is_null()) return false; + + const char* key = argv[0].as_c_str(); + if (!key || !key[0]) return false; + + netnode master = get_netnode_kv_master(true); + if (master == BADNODE) return false; + + // Check if key already exists + nodeidx_t existing = master.hashval_long(key); + if (existing != 0) return false; + + // Create entry netnode + netnode entry; + if (!entry.create()) return false; + + const char* val = ""; + if (argc > 1 && !argv[1].is_null()) { + val = argv[1].as_c_str(); + if (!val) val = ""; + } + + size_t len = strlen(val); + entry.setblob(val, len, 0, stag); + nodeidx_t entry_id = static_cast(entry); + master.hashset(key, entry_id); + master.supset(entry_id, key); // reverse index for O(1) row_lookup + return true; + }) + .filter_eq_text("key", [](const char* key) -> std::unique_ptr { + return std::make_unique(key); + }, 1.0, 1.0) + .build(); +} + +// ============================================================================ +// Registry: All tables in one place +// ============================================================================ + +TableRegistry::TableRegistry() + : funcs(define_funcs()) + , segments(define_segments()) + , names(define_names()) + , entries(define_entries()) + , comments(define_comments()) + , bookmarks(define_bookmarks()) + , heads(define_heads()) + , bytes(define_bytes()) + , patched_bytes(define_patched_bytes()) + , instructions(define_instructions()) + , xrefs(define_xrefs()) + , blocks(define_blocks()) + , imports(define_imports()) + , strings(define_strings()) + , netnode_kv(define_netnode_kv()) +{ + g_instance = this; +} + +TableRegistry::~TableRegistry() { + if (g_instance == this) g_instance = nullptr; +} + +void TableRegistry::invalidate_strings_cache() { + strings.invalidate_cache(); +} + +void TableRegistry::invalidate_strings_cache_global() { + if (g_instance) g_instance->invalidate_strings_cache(); +} + +void TableRegistry::register_all(xsql::Database& db) { + // Index-based tables (use IDA's indexed access) + register_index_table(db, "funcs", &funcs); + register_index_table(db, "segments", &segments); + register_index_table(db, "names", &names); + register_index_table(db, "entries", &entries); + + // Cached tables (query-scoped cache) + register_cached_table(db, "comments", &comments); + register_cached_table(db, "bookmarks", &bookmarks); + register_cached_table(db, "heads", &heads); + register_cached_table(db, "bytes", &bytes); + register_cached_table(db, "patched_bytes", &patched_bytes); + register_cached_table(db, "instructions", &instructions); + register_cached_table(db, "xrefs", &xrefs); + register_cached_table(db, "blocks", &blocks); + register_cached_table(db, "imports", &imports); + register_cached_table(db, "strings", &strings); + register_cached_table(db, "netnode_kv", &netnode_kv); + + // Grep-style entity search table + search::register_grep_entities(db); + + // Create convenience views for common queries + create_helper_views(db); +} + +void TableRegistry::create_helper_views(xsql::Database& db) { + // callers view - who calls a function + db.exec(R"( + CREATE VIEW IF NOT EXISTS callers AS + SELECT + x.to_ea as func_addr, + x.from_ea as caller_addr, + f.name as caller_name, + f.address as caller_func_addr + FROM xrefs x + LEFT JOIN funcs f ON x.from_ea >= f.address + AND x.from_ea < f.end_ea + WHERE x.is_code = 1 + )"); + + // callees view - what does a function call + db.exec(R"( + CREATE VIEW IF NOT EXISTS callees AS + SELECT + f.address as func_addr, + f.name as func_name, + x.to_ea as callee_addr, + COALESCE(f2.name, n.name, printf('sub_%X', x.to_ea)) as callee_name + FROM funcs f + JOIN xrefs x ON x.from_ea >= f.address + AND x.from_ea < f.end_ea + LEFT JOIN funcs f2 ON x.to_ea = f2.address + LEFT JOIN names n ON x.to_ea = n.address + WHERE x.is_code = 1 + )"); + +} + +void TableRegistry::register_index_table(xsql::Database& db, const char* name, const VTableDef* def) { + std::string module_name = std::string("ida_") + name; + db.register_table(module_name.c_str(), def); + db.create_table(name, module_name.c_str()); +} + +} // namespace entities +} // namespace idasql diff --git a/src/lib/src/entities.hpp b/src/lib/src/entities.hpp new file mode 100644 index 0000000..d110b59 --- /dev/null +++ b/src/lib/src/entities.hpp @@ -0,0 +1,364 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +/** + * entities.hpp - IDA entity tables (funcs, segments, names, entries, comments, + * imports, strings, xrefs, blocks, etc.) + */ + +#pragma once + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "ida_headers.hpp" + +namespace idasql { +namespace entities { + +// ============================================================================ +// Struct declarations +// ============================================================================ + +struct CommentRow { + ea_t ea = BADADDR; + std::string comment; + std::string rpt_comment; +}; + +struct ImportInfo { + int module_idx; + ea_t ea; + std::string name; + uval_t ord; +}; + +struct XrefInfo { + ea_t from_ea; + ea_t to_ea; + uint8_t type; + bool is_code; +}; + +struct BlockInfo { + ea_t func_ea; + ea_t start_ea; + ea_t end_ea; +}; + +struct BookmarkRow { + uint32_t index = 0; + ea_t ea = BADADDR; + std::string desc; +}; + +struct NetnodeKvRow { + std::string key; + std::string value; +}; + +struct HeadRow { + ea_t ea = BADADDR; +}; + +struct PatchedByteInfo { + ea_t ea; + qoff64_t fpos; + uint64 original_value; + uint64 patched_value; +}; + +struct InstructionRow { + ea_t ea = BADADDR; +}; + +struct ImportEnumContext { + std::vector* cache; + int module_idx; +}; + +enum class OperandApplyKind { + None, + Clear, + Enum, + Stroff, +}; + +struct OperandApplyRequest { + OperandApplyKind kind = OperandApplyKind::None; + std::string enum_name; + std::string enum_member_name; + uchar enum_serial = 0; + std::vector stroff_path_names; + adiff_t stroff_delta = 0; +}; + +// ============================================================================ +// Helper function declarations +// ============================================================================ + +std::string safe_func_name(ea_t ea); +std::string safe_segm_name(segment_t* seg); +std::string safe_segm_class(segment_t* seg); +std::string safe_name(ea_t ea); +std::string safe_entry_name(size_t idx); + +bool get_func_tinfo(ea_t ea, tinfo_t& tif); +const char* get_cc_name(callcnv_t cc); + +void collect_comment_rows(std::vector& rows); + +std::string get_import_module_name_safe(int idx); + +// String helpers +int get_string_width(int strtype); +const char* get_string_width_name(int strtype); +const char* get_string_type_name(int strtype); +int get_string_layout(int strtype); +const char* get_string_layout_name(int strtype); +int get_string_encoding(int strtype); +std::string get_string_content(const string_info_t& si); + +void collect_bookmark_rows(std::vector& rows); + +void collect_head_rows(std::vector& rows); +const char* get_item_type_str(ea_t ea); + +void collect_instruction_rows(std::vector& rows); + +// Operand helpers +std::string operand_kind_text(ea_t ea, int opnum); +std::string operand_type_text(ea_t ea, int opnum); +int operand_enum_serial(ea_t ea, int opnum); +int64_t operand_stroff_delta(ea_t ea, int opnum); +std::string operand_class_text(ea_t ea, int opnum); +std::string operand_repr_kind_text(ea_t ea, int opnum); +std::string operand_repr_type_name_text(ea_t ea, int opnum); +std::string operand_repr_member_name_text(ea_t ea, int opnum); +int operand_repr_serial(ea_t ea, int opnum); +int64_t operand_repr_delta(ea_t ea, int opnum); +std::string operand_format_spec_text(ea_t ea, int opnum); + +void instruction_column_common(xsql::FunctionContext& ctx, ea_t ea, ea_t func_addr, int col); + +// Constants +inline constexpr int kInstructionOperandCount = 8; +inline constexpr int kInstructionOperandBaseCol = 4; +inline constexpr int kInstructionDisasmCol = kInstructionOperandBaseCol + kInstructionOperandCount; +inline constexpr int kInstructionFuncAddrCol = kInstructionDisasmCol + 1; +inline constexpr int kInstructionClassBaseCol = kInstructionFuncAddrCol + 1; +inline constexpr int kInstructionReprKindBaseCol = kInstructionClassBaseCol + kInstructionOperandCount; +inline constexpr int kInstructionReprTypeBaseCol = kInstructionReprKindBaseCol + kInstructionOperandCount; +inline constexpr int kInstructionReprMemberBaseCol = kInstructionReprTypeBaseCol + kInstructionOperandCount; +inline constexpr int kInstructionReprSerialBaseCol = kInstructionReprMemberBaseCol + kInstructionOperandCount; +inline constexpr int kInstructionReprDeltaBaseCol = kInstructionReprSerialBaseCol + kInstructionOperandCount; +inline constexpr int kInstructionFormatSpecBaseCol = kInstructionReprDeltaBaseCol + kInstructionOperandCount; +inline constexpr int kInstructionColumnCount = kInstructionFormatSpecBaseCol + kInstructionOperandCount; + +// Parsing helpers +std::string trim_copy(const std::string& in); +bool starts_with_ci(const std::string& text, const char* prefix); +bool equals_ci(const std::string& text, const char* token); +bool parse_int64(const std::string& text, int64_t& out_value); +bool resolve_named_type_tid(const std::string& name, tid_t& out_tid, tinfo_t* out_tif = nullptr); +std::string tid_name_or_fallback(tid_t tid); +void split_path_names(const std::string& path_spec, std::vector& out_names); +bool parse_operand_format_spec(const char* spec, OperandApplyRequest& out, std::string* out_error = nullptr); +bool parse_operand_apply_spec(const char* spec, OperandApplyRequest& out); +bool decode_operand(ea_t ea, int opnum, insn_t& out_insn, op_t& out_op, std::string* out_error = nullptr); +bool operand_numeric_value(ea_t ea, int opnum, uint64& out_value, std::string* out_error = nullptr); +bool resolve_enum_member_serial(const tinfo_t& enum_tif, const std::string& member_name, uchar& out_serial, std::string* out_error = nullptr); +bool apply_operand_representation(ea_t ea, int opnum, const OperandApplyRequest& req, std::string* out_error = nullptr); +const char* operand_class_name(optype_t type); + +int idaapi patched_bytes_visitor(ea_t ea, qoff64_t fpos, uint64 o, uint64 v, void* ud); + +// ============================================================================ +// Table definition declarations +// ============================================================================ + +VTableDef define_funcs(); +VTableDef define_segments(); +VTableDef define_names(); +VTableDef define_entries(); +CachedTableDef define_comments(); +CachedTableDef define_bookmarks(); +CachedTableDef define_heads(); +CachedTableDef define_bytes(); +CachedTableDef define_patched_bytes(); +CachedTableDef define_instructions(); +CachedTableDef define_xrefs(); +CachedTableDef define_blocks(); +CachedTableDef define_imports(); +CachedTableDef define_strings(); +CachedTableDef define_netnode_kv(); + +// ============================================================================ +// Iterator class declarations +// ============================================================================ + +/** + * Iterator for xrefs TO a specific address. + * Used when query has: WHERE to_ea = X + * Uses xrefblk_t::first_to/next_to for O(refs_to_X) instead of O(all_xrefs) + */ +class XrefsToIterator : public xsql::RowIterator { + ea_t target_; + xrefblk_t xb_; + bool started_ = false; + bool valid_ = false; + +public: + explicit XrefsToIterator(ea_t target); + bool next() override; + bool eof() const override; + void column(xsql::FunctionContext& ctx, int col) override; + int64_t rowid() const override; +}; + +/** + * Iterator for xrefs FROM a specific address. + * Used when query has: WHERE from_ea = X + * Uses xrefblk_t::first_from/next_from for O(refs_from_X) instead of O(all_xrefs) + */ +class XrefsFromIterator : public xsql::RowIterator { + ea_t source_; + xrefblk_t xb_; + bool started_ = false; + bool valid_ = false; + +public: + explicit XrefsFromIterator(ea_t source); + bool next() override; + bool eof() const override; + void column(xsql::FunctionContext& ctx, int col) override; + int64_t rowid() const override; +}; + +/** + * Iterator for blocks in a specific function. + * Used when query has: WHERE func_ea = X + * Uses qflow_chart_t on single function for O(func_blocks) instead of O(all_blocks) + */ +class BlocksInFuncIterator : public xsql::RowIterator { + ea_t func_ea_; + qflow_chart_t fc_; + int idx_ = -1; + bool valid_ = false; + +public: + explicit BlocksInFuncIterator(ea_t func_ea); + bool next() override; + bool eof() const override; + void column(xsql::FunctionContext& ctx, int col) override; + int64_t rowid() const override; +}; + +// Iterator for single-address point query (constraint pushdown on ea) +class BytesAtIterator : public xsql::RowIterator { + ea_t ea_; + bool yielded_ = false; + bool exhausted_ = false; + +public: + explicit BytesAtIterator(ea_t ea); + bool next() override; + bool eof() const override; + void column(xsql::FunctionContext& ctx, int col) override; + int64_t rowid() const override; +}; + +// Iterator for instructions within a single function (constraint pushdown) +class InstructionsInFuncIterator : public xsql::RowIterator { + ea_t func_addr_; + func_t* pfn_ = nullptr; + func_item_iterator_t fii_; + bool started_ = false; + bool valid_ = false; + ea_t current_ea_ = BADADDR; + +public: + explicit InstructionsInFuncIterator(ea_t func_addr); + bool next() override; + bool eof() const override; + void column(xsql::FunctionContext& ctx, int col) override; + int64_t rowid() const override; +}; + +// Iterator for a single instruction by exact address. +class InstructionAtAddressIterator : public xsql::RowIterator { + ea_t ea_; + bool started_ = false; + bool valid_ = false; + +public: + explicit InstructionAtAddressIterator(ea_t ea); + bool next() override; + bool eof() const override; + void column(xsql::FunctionContext& ctx, int col) override; + int64_t rowid() const override; +}; + +// ============================================================================ +// TableRegistry +// ============================================================================ + +struct TableRegistry { + // Index-based tables (use IDA's indexed access, no cache needed) + VTableDef funcs; + VTableDef segments; + VTableDef names; + VTableDef entries; + CachedTableDef comments; + CachedTableDef bookmarks; + CachedTableDef heads; + CachedTableDef bytes; + CachedTableDef patched_bytes; + CachedTableDef instructions; + + // Cached tables (query-scoped cache - memory freed after query) + CachedTableDef xrefs; + CachedTableDef blocks; + CachedTableDef imports; + CachedTableDef strings; + CachedTableDef netnode_kv; + + // Global pointer for cache invalidation from SQL functions + static inline TableRegistry* g_instance = nullptr; + + TableRegistry(); + ~TableRegistry(); + + // Invalidate the strings cache (call after rebuild_strings) + void invalidate_strings_cache(); + + // Static method for SQL functions to invalidate strings cache + static void invalidate_strings_cache_global(); + + void register_all(xsql::Database& db); + + void create_helper_views(xsql::Database& db); + +private: + void register_index_table(xsql::Database& db, const char* name, const VTableDef* def); + + template + void register_cached_table(xsql::Database& db, const char* name, const CachedTableDef* def) { + std::string module_name = std::string("ida_") + name; + db.register_cached_table(module_name.c_str(), def); + db.create_table(name, module_name.c_str()); + } +}; + +} // namespace entities +} // namespace idasql diff --git a/src/lib/include/idasql/entities_dbg.hpp b/src/lib/src/entities_dbg.cpp similarity index 83% rename from src/lib/include/idasql/entities_dbg.hpp rename to src/lib/src/entities_dbg.cpp index a2df1eb..04accdc 100644 --- a/src/lib/include/idasql/entities_dbg.hpp +++ b/src/lib/src/entities_dbg.cpp @@ -1,35 +1,12 @@ -/** - * entities_dbg.hpp - Debugger-related IDA entities as virtual tables - * - * Tables: - * breakpoints - Debugger breakpoints (software, hardware, symbolic, source) - * - * Breakpoints persist in the IDB, so they're queryable even without an active - * debugger session. Supports full CRUD operations. - */ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT -#pragma once - -#include - -#include -#include - -#include - -// IDA SDK headers -#include -#include -#include +#include "entities_dbg.hpp" namespace idasql { namespace debugger { -// ============================================================================ -// Helpers -// ============================================================================ - -inline const char* bpt_type_name(bpttype_t type) { +const char* bpt_type_name(bpttype_t type) { switch (type) { case BPT_WRITE: return "hardware_write"; case BPT_READ: return "hardware_read"; @@ -40,7 +17,7 @@ inline const char* bpt_type_name(bpttype_t type) { } } -inline const char* bpt_loc_type_name(int loc_type) { +const char* bpt_loc_type_name(int loc_type) { switch (loc_type) { case BPLT_ABS: return "absolute"; case BPLT_REL: return "relative"; @@ -50,14 +27,14 @@ inline const char* bpt_loc_type_name(int loc_type) { } } -inline std::string safe_bpt_group(const bpt_t& bpt) { +std::string safe_bpt_group(const bpt_t& bpt) { qstring grp; if (get_bpt_group(&grp, bpt.loc)) return std::string(grp.c_str()); return ""; } -inline std::string safe_bpt_loc_path(const bpt_t& bpt) { +std::string safe_bpt_loc_path(const bpt_t& bpt) { const bpt_location_t& loc = bpt.loc; if (loc.type() == BPLT_REL || loc.type() == BPLT_SRC) { const char* p = loc.path(); @@ -66,7 +43,7 @@ inline std::string safe_bpt_loc_path(const bpt_t& bpt) { return ""; } -inline std::string safe_bpt_loc_symbol(const bpt_t& bpt) { +std::string safe_bpt_loc_symbol(const bpt_t& bpt) { const bpt_location_t& loc = bpt.loc; if (loc.type() == BPLT_SYM) { const char* s = loc.symbol(); @@ -75,11 +52,7 @@ inline std::string safe_bpt_loc_symbol(const bpt_t& bpt) { return ""; } -// ============================================================================ -// BREAKPOINTS Table (full CRUD) -// ============================================================================ - -inline VTableDef define_breakpoints() { +VTableDef define_breakpoints() { return table("breakpoints") .count([]() { return static_cast(get_bpt_qty()); }) // Column 0: address (R) @@ -262,16 +235,7 @@ inline VTableDef define_breakpoints() { return del_bpt(bpt.loc); }) // INSERT support - // argv column order: address(0), enabled(1), type(2), type_name(3), - // size(4), flags(5), pass_count(6), condition(7), loc_type(8), - // loc_type_name(9), module(10), symbol(11), offset(12), - // source_file(13), source_line(14), is_hardware(15), is_active(16), - // group(17), bptid(18) .insertable([](int argc, xsql::FunctionArg* argv) -> bool { - // Determine location type from which columns are non-NULL - // argv[0] = address, argv[11] = symbol, argv[10] = module, - // argv[13] = source_file - auto is_non_null = [&](int col) -> bool { return col < argc && !argv[col].is_null(); }; @@ -294,7 +258,6 @@ inline VTableDef define_breakpoints() { bool ok = false; if (is_non_null(11)) { - // Symbolic breakpoint: symbol column set const char* sym = get_text(11); if (!sym) return false; int64_t off = get_int64(12, 0); @@ -304,7 +267,6 @@ inline VTableDef define_breakpoints() { bpt.size = get_int(4, 0); ok = add_bpt(bpt); } else if (is_non_null(10)) { - // Relative breakpoint: module column set const char* mod = get_text(10); if (!mod) return false; int64_t off = get_int64(12, 0); @@ -314,7 +276,6 @@ inline VTableDef define_breakpoints() { bpt.size = get_int(4, 0); ok = add_bpt(bpt); } else if (is_non_null(13)) { - // Source breakpoint: source_file column set const char* file = get_text(13); if (!file) return false; int line = get_int(14, 1); @@ -324,36 +285,29 @@ inline VTableDef define_breakpoints() { bpt.size = get_int(4, 0); ok = add_bpt(bpt); } else if (is_non_null(0)) { - // Absolute breakpoint: address column set ea_t ea = static_cast(get_int64(0)); int sz = get_int(4, 0); bpttype_t tp = static_cast(get_int(2, BPT_SOFT)); ok = add_bpt(ea, sz, tp); } else { - return false; // No location specified + return false; } if (!ok) return false; // Apply optional properties after creation - // We need to find the breakpoint we just created - // Re-read to get the bpt_t for the newly added breakpoint if (is_non_null(7)) { - // condition const char* cond = get_text(7); if (cond) { - // Find the breakpoint and update condition bpt_t bpt; int n = get_bpt_qty(); for (int j = n - 1; j >= 0; --j) { if (getn_bpt(j, &bpt)) { - // Match by address for absolute, or just use last added if (is_non_null(0) && bpt.ea == static_cast(get_int64(0))) { bpt.cndbody = cond; update_bpt(&bpt); break; } else if (!is_non_null(0)) { - // For non-absolute, use the last breakpoint bpt.cndbody = cond; update_bpt(&bpt); break; @@ -364,7 +318,6 @@ inline VTableDef define_breakpoints() { } if (is_non_null(6)) { - // pass_count bpt_t bpt; int n = get_bpt_qty(); for (int j = n - 1; j >= 0; --j) { @@ -383,7 +336,6 @@ inline VTableDef define_breakpoints() { } if (is_non_null(5)) { - // flags bpt_t bpt; int n = get_bpt_qty(); for (int j = n - 1; j >= 0; --j) { @@ -402,7 +354,6 @@ inline VTableDef define_breakpoints() { } if (is_non_null(1)) { - // enabled - use enable_bpt API bool enable = get_int(1) != 0; bpt_t bpt; int n = get_bpt_qty(); @@ -420,7 +371,6 @@ inline VTableDef define_breakpoints() { } if (is_non_null(17)) { - // group const char* grp = get_text(17); if (grp) { bpt_t bpt; @@ -445,22 +395,17 @@ inline VTableDef define_breakpoints() { } // ============================================================================ -// Debugger Registry +// Registry // ============================================================================ -struct DebuggerRegistry { - VTableDef breakpoints; - - DebuggerRegistry() - : breakpoints(define_breakpoints()) - {} +DebuggerRegistry::DebuggerRegistry() + : breakpoints(define_breakpoints()) +{} - void register_all(xsql::Database& db) { - db.register_table("ida_breakpoints", &breakpoints); - db.create_table("breakpoints", "ida_breakpoints"); - } -}; +void DebuggerRegistry::register_all(xsql::Database& db) { + db.register_table("ida_breakpoints", &breakpoints); + db.create_table("breakpoints", "ida_breakpoints"); +} } // namespace debugger } // namespace idasql - diff --git a/src/lib/src/entities_dbg.hpp b/src/lib/src/entities_dbg.hpp new file mode 100644 index 0000000..536abbc --- /dev/null +++ b/src/lib/src/entities_dbg.hpp @@ -0,0 +1,38 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +/** + * entities_dbg.hpp - Debugger-related IDA entities as virtual tables + * + * Tables: breakpoints + */ + +#pragma once + +#include + +#include +#include + +#include "ida_headers.hpp" + +namespace idasql { +namespace debugger { + +const char* bpt_type_name(bpttype_t type); +const char* bpt_loc_type_name(int loc_type); +std::string safe_bpt_group(const bpt_t& bpt); +std::string safe_bpt_loc_path(const bpt_t& bpt); +std::string safe_bpt_loc_symbol(const bpt_t& bpt); + +VTableDef define_breakpoints(); + +struct DebuggerRegistry { + VTableDef breakpoints; + + DebuggerRegistry(); + void register_all(xsql::Database& db); +}; + +} // namespace debugger +} // namespace idasql diff --git a/src/lib/include/idasql/entities_ext.hpp b/src/lib/src/entities_ext.cpp similarity index 66% rename from src/lib/include/idasql/entities_ext.hpp rename to src/lib/src/entities_ext.cpp index 7e7de10..c9d495f 100644 --- a/src/lib/include/idasql/entities_ext.hpp +++ b/src/lib/src/entities_ext.cpp @@ -1,53 +1,16 @@ -/** - * entities_ext.hpp - Additional IDA entities as virtual tables - * - * This file provides additional virtual tables beyond the core entities. - * These tables cover: fixups, hidden ranges, problems, function chunks, - * signatures, local types, and more. - * - * Tables: - * fixups - Relocation/fixup records - * hidden_ranges - Collapsed/hidden regions - * problems - Analysis problems - * fchunks - Function chunks (tails) - * signatures - Applied FLIRT signatures - * local_types - Local type library entries - * mappings - Address mappings - */ - -#pragma once - -#include - -#include -#include - -#include - -// IDA SDK headers -#include -#include -#include -#include -#include -#include -#include -#include -#include +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +#include "entities_ext.hpp" namespace idasql { namespace extended { // ============================================================================ -// FIXUPS Table - Relocation records +// Collection helpers // ============================================================================ -struct FixupEntry { - ea_t ea; - fixup_data_t data; -}; - -inline void collect_fixups(std::vector& rows) { +void collect_fixups(std::vector& rows) { rows.clear(); for (ea_t ea = get_first_fixup_ea(); ea != BADADDR; ea = get_next_fixup_ea(ea)) { @@ -59,7 +22,83 @@ inline void collect_fixups(std::vector& rows) { } } -inline CachedTableDef define_fixups() { +void collect_problems(std::vector& rows) { + rows.clear(); + + for (int t = PR_NOBASE; t < PR_END; ++t) { + problist_id_t ptype = static_cast(t); + const char* tname = get_problem_name(ptype, true); + + for (ea_t ea = get_problem(ptype, 0); ea != BADADDR; ea = get_problem(ptype, ea + 1)) { + ProblemEntry entry; + entry.ea = ea; + entry.type = ptype; + entry.type_name = tname ? tname : ""; + + qstring desc; + if (get_problem_desc(&desc, ptype, ea) > 0) { + entry.description = desc.c_str(); + } + rows.push_back(entry); + } + } +} + +void collect_signatures(std::vector& rows) { + rows.clear(); + + int qty = get_idasgn_qty(); + for (int i = 0; i < qty; ++i) { + SignatureEntry entry; + entry.index = i; + + qstring signame, optlibs; + entry.state = get_idasgn_desc(&signame, &optlibs, i); + entry.name = signame.c_str(); + entry.optlibs = optlibs.c_str(); + rows.push_back(entry); + } +} + +void collect_local_types(std::vector& rows) { + rows.clear(); + + til_t* ti = get_idati(); + if (!ti) return; + + uint32_t ord = 1; + while (true) { + const char* name = get_numbered_type_name(ti, ord); + if (!name) break; + + LocalTypeEntry entry; + entry.ordinal = ord; + entry.name = name; + + tinfo_t tif; + if (tif.get_numbered_type(ti, ord)) { + qstring ts; + tif.print(&ts); + entry.type_str = ts.c_str(); + entry.is_struct = tif.is_struct() || tif.is_union(); + entry.is_enum = tif.is_enum(); + entry.is_typedef = tif.is_typedef(); + } else { + entry.is_struct = false; + entry.is_enum = false; + entry.is_typedef = false; + } + + rows.push_back(entry); + ++ord; + } +} + +// ============================================================================ +// Table definitions +// ============================================================================ + +CachedTableDef define_fixups() { return cached_table("fixups") .no_shared_cache() .estimate_rows([]() -> size_t { return 512; }) @@ -81,11 +120,7 @@ inline CachedTableDef define_fixups() { .build(); } -// ============================================================================ -// HIDDEN_RANGES Table - Collapsed/hidden regions -// ============================================================================ - -inline VTableDef define_hidden_ranges() { +VTableDef define_hidden_ranges() { return table("hidden_ranges") .count([]() { return static_cast(get_hidden_range_qty()); @@ -125,41 +160,7 @@ inline VTableDef define_hidden_ranges() { .build(); } -// ============================================================================ -// PROBLEMS Table - Analysis problems -// ============================================================================ - -struct ProblemEntry { - ea_t ea; - problist_id_t type; - std::string description; - std::string type_name; -}; - -inline void collect_problems(std::vector& rows) { - rows.clear(); - - // Iterate all problem types - for (int t = PR_NOBASE; t < PR_END; ++t) { - problist_id_t ptype = static_cast(t); - const char* tname = get_problem_name(ptype, true); - - for (ea_t ea = get_problem(ptype, 0); ea != BADADDR; ea = get_problem(ptype, ea + 1)) { - ProblemEntry entry; - entry.ea = ea; - entry.type = ptype; - entry.type_name = tname ? tname : ""; - - qstring desc; - if (get_problem_desc(&desc, ptype, ea) > 0) { - entry.description = desc.c_str(); - } - rows.push_back(entry); - } - } -} - -inline CachedTableDef define_problems() { +CachedTableDef define_problems() { return cached_table("problems") .no_shared_cache() .estimate_rows([]() -> size_t { return 512; }) @@ -181,11 +182,7 @@ inline CachedTableDef define_problems() { .build(); } -// ============================================================================ -// FCHUNKS Table - Function chunks (tails) -// ============================================================================ - -inline VTableDef define_fchunks() { +VTableDef define_fchunks() { return table("fchunks") .count([]() { return get_fchunk_qty(); @@ -205,7 +202,6 @@ inline VTableDef define_fchunks() { .column_int64("owner", [](size_t i) -> int64_t { func_t* chunk = getn_fchunk(i); if (!chunk) return 0; - // For tail chunks, find the owner func_t* owner = get_func(chunk->start_ea); return owner ? owner->start_ea : 0; }) @@ -215,40 +211,12 @@ inline VTableDef define_fchunks() { }) .column_int("is_tail", [](size_t i) -> int { func_t* chunk = getn_fchunk(i); - // FUNC_TAIL indicates this is a tail/chunk of another function return chunk ? ((chunk->flags & FUNC_TAIL) ? 1 : 0) : 0; }) .build(); } -// ============================================================================ -// SIGNATURES Table - Applied FLIRT signatures -// ============================================================================ - -struct SignatureEntry { - int index; - std::string name; - std::string optlibs; - int32 state; -}; - -inline void collect_signatures(std::vector& rows) { - rows.clear(); - - int qty = get_idasgn_qty(); - for (int i = 0; i < qty; ++i) { - SignatureEntry entry; - entry.index = i; - - qstring signame, optlibs; - entry.state = get_idasgn_desc(&signame, &optlibs, i); - entry.name = signame.c_str(); - entry.optlibs = optlibs.c_str(); - rows.push_back(entry); - } -} - -inline CachedTableDef define_signatures() { +CachedTableDef define_signatures() { return cached_table("signatures") .no_shared_cache() .estimate_rows([]() -> size_t { return 128; }) @@ -270,55 +238,7 @@ inline CachedTableDef define_signatures() { .build(); } -// ============================================================================ -// LOCAL_TYPES Table - Local type library entries -// ============================================================================ - -struct LocalTypeEntry { - uint32_t ordinal; - std::string name; - std::string type_str; - bool is_struct; - bool is_enum; - bool is_typedef; -}; - -inline void collect_local_types(std::vector& rows) { - rows.clear(); - - til_t* ti = get_idati(); - if (!ti) return; - - // Iterate numbered types - uint32_t ord = 1; - while (true) { - const char* name = get_numbered_type_name(ti, ord); - if (!name) break; - - LocalTypeEntry entry; - entry.ordinal = ord; - entry.name = name; - - tinfo_t tif; - if (tif.get_numbered_type(ti, ord)) { - qstring ts; - tif.print(&ts); - entry.type_str = ts.c_str(); - entry.is_struct = tif.is_struct() || tif.is_union(); - entry.is_enum = tif.is_enum(); - entry.is_typedef = tif.is_typedef(); - } else { - entry.is_struct = false; - entry.is_enum = false; - entry.is_typedef = false; - } - - rows.push_back(entry); - ++ord; - } -} - -inline CachedTableDef define_local_types() { +CachedTableDef define_local_types() { return cached_table("local_types") .no_shared_cache() .estimate_rows([]() -> size_t { return 256; }) @@ -346,11 +266,7 @@ inline CachedTableDef define_local_types() { .build(); } -// ============================================================================ -// MAPPINGS Table - Address mappings -// ============================================================================ - -inline VTableDef define_mappings() { +VTableDef define_mappings() { return table("mappings") .count([]() { return get_mappings_qty(); @@ -383,51 +299,41 @@ inline VTableDef define_mappings() { } // ============================================================================ -// Extended Registry +// Registry // ============================================================================ -struct ExtendedRegistry { - CachedTableDef fixups; - VTableDef hidden_ranges; - CachedTableDef problems; - VTableDef fchunks; - CachedTableDef signatures; - CachedTableDef local_types; - VTableDef mappings; - - ExtendedRegistry() - : fixups(define_fixups()) - , hidden_ranges(define_hidden_ranges()) - , problems(define_problems()) - , fchunks(define_fchunks()) - , signatures(define_signatures()) - , local_types(define_local_types()) - , mappings(define_mappings()) - {} - - void register_all(xsql::Database& db) { - db.register_cached_table("ida_fixups", &fixups); - db.create_table("fixups", "ida_fixups"); - - db.register_table("ida_hidden_ranges", &hidden_ranges); - db.create_table("hidden_ranges", "ida_hidden_ranges"); - - db.register_cached_table("ida_problems", &problems); - db.create_table("problems", "ida_problems"); - - db.register_table("ida_fchunks", &fchunks); - db.create_table("fchunks", "ida_fchunks"); - - db.register_cached_table("ida_signatures", &signatures); - db.create_table("signatures", "ida_signatures"); - - db.register_cached_table("ida_local_types", &local_types); - db.create_table("local_types", "ida_local_types"); - - db.register_table("ida_mappings", &mappings); - db.create_table("mappings", "ida_mappings"); - } -}; +ExtendedRegistry::ExtendedRegistry() + : fixups(define_fixups()) + , hidden_ranges(define_hidden_ranges()) + , problems(define_problems()) + , fchunks(define_fchunks()) + , signatures(define_signatures()) + , local_types(define_local_types()) + , mappings(define_mappings()) +{} + +void ExtendedRegistry::register_all(xsql::Database& db) { + db.register_cached_table("ida_fixups", &fixups); + db.create_table("fixups", "ida_fixups"); + + db.register_table("ida_hidden_ranges", &hidden_ranges); + db.create_table("hidden_ranges", "ida_hidden_ranges"); + + db.register_cached_table("ida_problems", &problems); + db.create_table("problems", "ida_problems"); + + db.register_table("ida_fchunks", &fchunks); + db.create_table("fchunks", "ida_fchunks"); + + db.register_cached_table("ida_signatures", &signatures); + db.create_table("signatures", "ida_signatures"); + + db.register_cached_table("ida_local_types", &local_types); + db.create_table("local_types", "ida_local_types"); + + db.register_table("ida_mappings", &mappings); + db.create_table("mappings", "ida_mappings"); +} } // namespace extended } // namespace idasql diff --git a/src/lib/src/entities_ext.hpp b/src/lib/src/entities_ext.hpp new file mode 100644 index 0000000..3f3ea62 --- /dev/null +++ b/src/lib/src/entities_ext.hpp @@ -0,0 +1,77 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +/** + * entities_ext.hpp - Additional IDA entities as virtual tables + * + * Tables: fixups, hidden_ranges, problems, fchunks, signatures, local_types, mappings + */ + +#pragma once + +#include + +#include +#include + +#include "ida_headers.hpp" + +namespace idasql { +namespace extended { + +struct FixupEntry { + ea_t ea; + fixup_data_t data; +}; + +struct ProblemEntry { + ea_t ea; + problist_id_t type; + std::string description; + std::string type_name; +}; + +struct SignatureEntry { + int index; + std::string name; + std::string optlibs; + int32 state; +}; + +struct LocalTypeEntry { + uint32_t ordinal; + std::string name; + std::string type_str; + bool is_struct; + bool is_enum; + bool is_typedef; +}; + +void collect_fixups(std::vector& rows); +void collect_problems(std::vector& rows); +void collect_signatures(std::vector& rows); +void collect_local_types(std::vector& rows); + +CachedTableDef define_fixups(); +VTableDef define_hidden_ranges(); +CachedTableDef define_problems(); +VTableDef define_fchunks(); +CachedTableDef define_signatures(); +CachedTableDef define_local_types(); +VTableDef define_mappings(); + +struct ExtendedRegistry { + CachedTableDef fixups; + VTableDef hidden_ranges; + CachedTableDef problems; + VTableDef fchunks; + CachedTableDef signatures; + CachedTableDef local_types; + VTableDef mappings; + + ExtendedRegistry(); + void register_all(xsql::Database& db); +}; + +} // namespace extended +} // namespace idasql diff --git a/src/lib/src/entities_search.cpp b/src/lib/src/entities_search.cpp new file mode 100644 index 0000000..d5f4fe5 --- /dev/null +++ b/src/lib/src/entities_search.cpp @@ -0,0 +1,415 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +#include "entities_search.hpp" + +namespace idasql { +namespace search { + +// ============================================================================ +// NamePattern +// ============================================================================ + +NamePattern::NamePattern(const std::string& raw) { + std::string lowered = to_lower(raw); + std::replace(lowered.begin(), lowered.end(), '*', '%'); + if (lowered.empty()) { + return; + } + + if (!has_wildcards(lowered)) { + lowered = "%" + lowered + "%"; + } + + pattern_ = std::move(lowered); + valid_ = true; +} + +bool NamePattern::matches(const std::string& value) const { + if (!valid_) return false; + return like_match(to_lower(value), pattern_); +} + +std::string NamePattern::to_lower(const std::string& s) { + std::string out; + out.reserve(s.size()); + for (char c : s) { + out.push_back(static_cast(std::tolower(static_cast(c)))); + } + return out; +} + +bool NamePattern::has_wildcards(const std::string& s) { + return s.find('%') != std::string::npos || s.find('_') != std::string::npos; +} + +bool NamePattern::like_match(const std::string& text, const std::string& pattern) { + size_t ti = 0; + size_t pi = 0; + size_t star = std::string::npos; + size_t retry = 0; + + while (ti < text.size()) { + if (pi < pattern.size() && (pattern[pi] == '_' || pattern[pi] == text[ti])) { + ++ti; + ++pi; + continue; + } + if (pi < pattern.size() && pattern[pi] == '%') { + star = pi++; + retry = ti; + continue; + } + if (star != std::string::npos) { + pi = star + 1; + ti = ++retry; + continue; + } + return false; + } + + while (pi < pattern.size() && pattern[pi] == '%') { + ++pi; + } + return pi == pattern.size(); +} + +// ============================================================================ +// EntityGenerator +// ============================================================================ + +EntityGenerator::EntityGenerator(const std::string& pattern) : pattern_(pattern) {} + +bool EntityGenerator::next() { + if (!pattern_.valid()) return false; + + while (current_source_ != EntitySource::Done) { + if (advance_current_source()) { + return true; + } + current_source_ = static_cast(static_cast(current_source_) + 1); + current_index_ = 0; + type_ordinal_ = 0; + member_index_ = 0; + } + return false; +} + +bool EntityGenerator::matches(const std::string& name) const { + return pattern_.matches(name); +} + +bool EntityGenerator::advance_current_source() { + switch (current_source_) { + case EntitySource::Functions: return advance_functions(); + case EntitySource::Labels: return advance_labels(); + case EntitySource::Segments: return advance_segments(); + case EntitySource::Structs: return advance_structs(); + case EntitySource::Unions: return advance_unions(); + case EntitySource::Enums: return advance_enums(); + case EntitySource::Members: return advance_members(); + case EntitySource::EnumMembers: return advance_enum_members(); + case EntitySource::Done: return false; + } + return false; +} + +bool EntityGenerator::advance_functions() { + size_t count = get_func_qty(); + while (current_index_ < count) { + func_t* fn = getn_func(current_index_++); + if (!fn) continue; + + qstring name; + if (get_func_name(&name, fn->start_ea) <= 0) continue; + + std::string name_str(name.c_str()); + if (matches(name_str)) { + current_row_.name = name_str; + current_row_.kind = "function"; + current_row_.address = fn->start_ea; + current_row_.has_address = true; + current_row_.has_ordinal = false; + current_row_.parent_name.clear(); + current_row_.full_name = name_str; + return true; + } + } + return false; +} + +bool EntityGenerator::advance_labels() { + size_t count = get_nlist_size(); + while (current_index_ < count) { + ea_t ea = get_nlist_ea(current_index_); + const char* name = get_nlist_name(current_index_); + current_index_++; + + if (!name || !*name) continue; + + func_t* fn = get_func(ea); + if (fn && fn->start_ea == ea) continue; + + std::string name_str(name); + if (matches(name_str)) { + current_row_.name = name_str; + current_row_.kind = "label"; + current_row_.address = ea; + current_row_.has_address = true; + current_row_.has_ordinal = false; + current_row_.parent_name.clear(); + current_row_.full_name = name_str; + return true; + } + } + return false; +} + +bool EntityGenerator::advance_segments() { + int count = get_segm_qty(); + while (static_cast(current_index_) < count) { + segment_t* seg = getnseg(static_cast(current_index_++)); + if (!seg) continue; + + qstring name; + if (get_segm_name(&name, seg) <= 0) continue; + + std::string name_str(name.c_str()); + if (matches(name_str)) { + current_row_.name = name_str; + current_row_.kind = "segment"; + current_row_.address = seg->start_ea; + current_row_.has_address = true; + current_row_.has_ordinal = false; + current_row_.parent_name.clear(); + current_row_.full_name = name_str; + return true; + } + } + return false; +} + +bool EntityGenerator::advance_types_of_kind(const char* kind, bool want_struct, bool want_union, bool want_enum) { + uint32 count = get_ordinal_count(nullptr); + while (type_ordinal_ < count) { + uint32 ord = type_ordinal_++; + tinfo_t tif; + if (!tif.get_numbered_type(nullptr, ord)) continue; + + bool is_struct = tif.is_struct(); + bool is_union = tif.is_union(); + bool is_enum = tif.is_enum(); + + if (want_struct && !is_struct) continue; + if (want_union && !is_union) continue; + if (want_enum && !is_enum) continue; + + qstring name; + if (!tif.get_type_name(&name)) continue; + + std::string name_str(name.c_str()); + if (matches(name_str)) { + current_row_.name = name_str; + current_row_.kind = kind; + current_row_.has_address = false; + current_row_.ordinal = ord; + current_row_.has_ordinal = true; + current_row_.parent_name.clear(); + current_row_.full_name = name_str; + return true; + } + } + return false; +} + +bool EntityGenerator::advance_structs() { return advance_types_of_kind("struct", true, false, false); } +bool EntityGenerator::advance_unions() { return advance_types_of_kind("union", false, true, false); } +bool EntityGenerator::advance_enums() { return advance_types_of_kind("enum", false, false, true); } + +bool EntityGenerator::advance_members() { + uint32 count = get_ordinal_count(nullptr); + + while (type_ordinal_ < count) { + if (!current_type_.get_numbered_type(nullptr, type_ordinal_)) { + type_ordinal_++; + member_index_ = 0; + continue; + } + + if (!current_type_.is_struct() && !current_type_.is_union()) { + type_ordinal_++; + member_index_ = 0; + continue; + } + + udt_type_data_t udt; + if (!current_type_.get_udt_details(&udt)) { + type_ordinal_++; + member_index_ = 0; + continue; + } + + while (member_index_ < udt.size()) { + const udm_t& member = udt[member_index_++]; + std::string member_name(member.name.c_str()); + + if (matches(member_name)) { + qstring type_name; + current_type_.get_type_name(&type_name); + + current_row_.name = member_name; + current_row_.kind = "member"; + current_row_.has_address = false; + current_row_.ordinal = type_ordinal_; + current_row_.has_ordinal = true; + current_row_.parent_name = type_name.c_str(); + current_row_.full_name = std::string(type_name.c_str()) + "." + member_name; + return true; + } + } + + type_ordinal_++; + member_index_ = 0; + } + return false; +} + +bool EntityGenerator::advance_enum_members() { + uint32 count = get_ordinal_count(nullptr); + + while (type_ordinal_ < count) { + if (!current_type_.get_numbered_type(nullptr, type_ordinal_)) { + type_ordinal_++; + member_index_ = 0; + continue; + } + + if (!current_type_.is_enum()) { + type_ordinal_++; + member_index_ = 0; + continue; + } + + enum_type_data_t etd; + if (!current_type_.get_enum_details(&etd)) { + type_ordinal_++; + member_index_ = 0; + continue; + } + + while (member_index_ < etd.size()) { + const edm_t& em = etd[member_index_++]; + std::string value_name(em.name.c_str()); + + if (matches(value_name)) { + qstring type_name; + current_type_.get_type_name(&type_name); + + current_row_.name = value_name; + current_row_.kind = "enum_member"; + current_row_.has_address = false; + current_row_.ordinal = type_ordinal_; + current_row_.has_ordinal = true; + current_row_.parent_name = type_name.c_str(); + current_row_.full_name = std::string(type_name.c_str()) + "." + value_name; + return true; + } + } + + type_ordinal_++; + member_index_ = 0; + } + return false; +} + +// ============================================================================ +// GrepIterator +// ============================================================================ + +GrepIterator::GrepIterator(const std::string& pattern) + : generator_(pattern) {} + +bool GrepIterator::next() { + started_ = true; + valid_ = generator_.next(); + if (valid_) { + ++rowid_; + } + return valid_; +} + +bool GrepIterator::eof() const { + return started_ && !valid_; +} + +void GrepIterator::column(xsql::FunctionContext& ctx, int col) { + if (!valid_) { + ctx.result_null(); + return; + } + + const EntityRow& row = generator_.current(); + switch (col) { + case 0: + ctx.result_null(); + break; + case 1: + ctx.result_text(row.name); + break; + case 2: + ctx.result_text(row.kind); + break; + case 3: + if (row.has_address) ctx.result_int64(static_cast(row.address)); + else ctx.result_null(); + break; + case 4: + if (row.has_ordinal) ctx.result_int64(row.ordinal); + else ctx.result_null(); + break; + case 5: + if (row.parent_name.empty()) ctx.result_null(); + else ctx.result_text(row.parent_name); + break; + case 6: + ctx.result_text(row.full_name); + break; + default: + ctx.result_null(); + break; + } +} + +int64_t GrepIterator::rowid() const { + return rowid_; +} + +// ============================================================================ +// Table definition and registration +// ============================================================================ + +VTableDef define_grep() { + return table("grep") + .count([]() -> size_t { + return 0; + }) + .column_text("pattern", [](size_t) -> std::string { return ""; }) + .column_text("name", [](size_t) -> std::string { return ""; }) + .column_text("kind", [](size_t) -> std::string { return ""; }) + .column_int64("address", [](size_t) -> int64_t { return 0; }) + .column_int64("ordinal", [](size_t) -> int64_t { return 0; }) + .column_text("parent_name", [](size_t) -> std::string { return ""; }) + .column_text("full_name", [](size_t) -> std::string { return ""; }) + .filter_eq_text("pattern", [](const char* pattern) -> std::unique_ptr { + return std::make_unique(pattern ? pattern : ""); + }, 25.0, 100.0) + .build(); +} + +bool register_grep_entities(xsql::Database& db) { + static VTableDef grep = define_grep(); + return db.register_table("ida_grep", &grep) && db.create_table("grep", "ida_grep"); +} + +} // namespace search +} // namespace idasql diff --git a/src/lib/src/entities_search.hpp b/src/lib/src/entities_search.hpp new file mode 100644 index 0000000..fae5c22 --- /dev/null +++ b/src/lib/src/entities_search.hpp @@ -0,0 +1,111 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +/** + * entities_search.hpp - Grep-style entity search table + * + * Tables: grep + */ + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "ida_headers.hpp" + +namespace idasql { +namespace search { + +struct EntityRow { + std::string name; + std::string kind; + ea_t address = BADADDR; + uint32 ordinal = 0; + std::string parent_name; + std::string full_name; + bool has_address = false; + bool has_ordinal = false; +}; + +class NamePattern { + std::string pattern_; + bool valid_ = false; + +public: + explicit NamePattern(const std::string& raw); + bool valid() const { return valid_; } + bool matches(const std::string& value) const; + +private: + static std::string to_lower(const std::string& s); + static bool has_wildcards(const std::string& s); + static bool like_match(const std::string& text, const std::string& pattern); +}; + +enum class EntitySource { + Functions = 0, + Labels, + Segments, + Structs, + Unions, + Enums, + Members, + EnumMembers, + Done +}; + +class EntityGenerator { + NamePattern pattern_; + EntitySource current_source_ = EntitySource::Functions; + size_t current_index_ = 0; + EntityRow current_row_; + uint32 type_ordinal_ = 0; + size_t member_index_ = 0; + tinfo_t current_type_; + +public: + explicit EntityGenerator(const std::string& pattern); + bool next(); + const EntityRow& current() const { return current_row_; } + +private: + bool matches(const std::string& name) const; + bool advance_current_source(); + bool advance_functions(); + bool advance_labels(); + bool advance_segments(); + bool advance_types_of_kind(const char* kind, bool want_struct, bool want_union, bool want_enum); + bool advance_structs(); + bool advance_unions(); + bool advance_enums(); + bool advance_members(); + bool advance_enum_members(); +}; + +class GrepIterator : public xsql::RowIterator { + EntityGenerator generator_; + bool started_ = false; + bool valid_ = false; + int64_t rowid_ = -1; + +public: + explicit GrepIterator(const std::string& pattern); + bool next() override; + bool eof() const override; + void column(xsql::FunctionContext& ctx, int col) override; + int64_t rowid() const override; +}; + +VTableDef define_grep(); +bool register_grep_entities(xsql::Database& db); + +} // namespace search +} // namespace idasql diff --git a/src/lib/include/idasql/entities_types.hpp b/src/lib/src/entities_types.cpp similarity index 66% rename from src/lib/include/idasql/entities_types.hpp rename to src/lib/src/entities_types.cpp index e82cb6c..82d04d2 100644 --- a/src/lib/include/idasql/entities_types.hpp +++ b/src/lib/src/entities_types.cpp @@ -1,43 +1,22 @@ -/** - * entities_types.hpp - IDA type system tables - * - * Provides SQL tables for querying IDA's type library: - * types - All local types (structs, unions, enums, typedefs, funcs) - * types_members - Struct/union member details - * types_enum_values - Enum constant values - * types_func_args - Function prototype arguments - * - * Also provides views: - * types_v_structs - Filter: structs only - * types_v_unions - Filter: unions only - * types_v_enums - Filter: enums only - * types_v_typedefs - Filter: typedefs only - * types_v_funcs - Filter: function types only - */ - -#pragma once - -#include - -#include -#include - -#include - -// IDA SDK headers -#include -#include +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +#include "entities_types.hpp" namespace idasql { namespace types { -inline void ida_undo_hook(const std::string&) {} +// ============================================================================ +// Undo hook +// ============================================================================ + +void ida_undo_hook(const std::string&) {} // ============================================================================ // Type Kind Classification // ============================================================================ -inline const char* get_type_kind(const tinfo_t& tif) { +const char* get_type_kind(const tinfo_t& tif) { if (tif.is_struct()) return "struct"; if (tif.is_union()) return "union"; if (tif.is_enum()) return "enum"; @@ -52,24 +31,7 @@ inline const char* get_type_kind(const tinfo_t& tif) { // Type Entry Cache // ============================================================================ -struct TypeEntry { - uint32_t ordinal; - std::string name; - std::string kind; - int64_t size; - int alignment; - bool is_struct; - bool is_union; - bool is_enum; - bool is_typedef; - bool is_func; - bool is_ptr; - bool is_array; - std::string definition; - std::string resolved; // For typedefs: what it resolves to -}; - -inline void collect_types(std::vector& rows) { +void collect_types(std::vector& rows) { rows.clear(); til_t* ti = get_idati(); @@ -143,7 +105,7 @@ inline void collect_types(std::vector& rows) { // TYPES Table - All local types (enhanced) // ============================================================================ -inline CachedTableDef define_types() { +CachedTableDef define_types() { return cached_table("types") .no_shared_cache() .on_modify(ida_undo_hook) @@ -264,39 +226,15 @@ inline CachedTableDef define_types() { // TYPES_MEMBERS Table - Struct/union field details // ============================================================================ -struct MemberEntry { - uint32_t type_ordinal; - std::string type_name; - int member_index; - std::string member_name; - int64_t offset; - int64_t offset_bits; - int64_t size; - int64_t size_bits; - std::string member_type; - bool is_bitfield; - bool is_baseclass; - std::string comment; - // Member type classification (for efficient filtering) - bool mt_is_struct; - bool mt_is_union; - bool mt_is_enum; - bool mt_is_ptr; - bool mt_is_array; - int member_type_ordinal; // -1 if member type not in local types -}; - -// Helper to get ordinal of a type by name -inline int get_type_ordinal_by_name(til_t* ti, const char* type_name) { +int get_type_ordinal_by_name(til_t* ti, const char* type_name) { if (!ti || !type_name || !type_name[0]) return -1; uint32_t ord = get_type_ordinal(ti, type_name); return (ord != 0) ? static_cast(ord) : -1; } -// Helper to classify member type and get ordinal -inline void classify_member_type(const tinfo_t& mtype, til_t* ti, - bool& is_struct, bool& is_union, bool& is_enum, - bool& is_ptr, bool& is_array, int& type_ordinal) { +void classify_member_type(const tinfo_t& mtype, til_t* ti, + bool& is_struct, bool& is_union, bool& is_enum, + bool& is_ptr, bool& is_array, int& type_ordinal) { is_struct = false; is_union = false; is_enum = false; @@ -324,7 +262,7 @@ inline void classify_member_type(const tinfo_t& mtype, til_t* ti, } } -inline void collect_members(std::vector& rows) { +void collect_members(std::vector& rows) { rows.clear(); til_t* ti = get_idati(); @@ -374,123 +312,113 @@ inline void collect_members(std::vector& rows) { } } -/** - * Iterator for members of a specific type. - * Used when query has: WHERE type_ordinal = X - */ -class MembersInTypeIterator : public xsql::RowIterator { - uint32_t type_ordinal_; - std::string type_name_; - udt_type_data_t udt_; - int idx_ = -1; - bool valid_ = false; - bool has_data_ = false; - -public: - explicit MembersInTypeIterator(uint32_t ordinal) : type_ordinal_(ordinal) { - til_t* ti = get_idati(); - if (!ti) return; - - const char* name = get_numbered_type_name(ti, type_ordinal_); - if (!name) return; - type_name_ = name; +// ============================================================================ +// MembersInTypeIterator +// ============================================================================ - tinfo_t tif; - if (tif.get_numbered_type(ti, type_ordinal_)) { - if (tif.is_struct() || tif.is_union()) { - has_data_ = tif.get_udt_details(&udt_); - } +MembersInTypeIterator::MembersInTypeIterator(uint32_t ordinal) : type_ordinal_(ordinal) { + til_t* ti = get_idati(); + if (!ti) return; + + const char* name = get_numbered_type_name(ti, type_ordinal_); + if (!name) return; + type_name_ = name; + + tinfo_t tif; + if (tif.get_numbered_type(ti, type_ordinal_)) { + if (tif.is_struct() || tif.is_union()) { + has_data_ = tif.get_udt_details(&udt_); } } +} - bool next() override { - if (!has_data_) return false; - ++idx_; - valid_ = (idx_ >= 0 && static_cast(idx_) < udt_.size()); - return valid_; - } +bool MembersInTypeIterator::next() { + if (!has_data_) return false; + ++idx_; + valid_ = (idx_ >= 0 && static_cast(idx_) < udt_.size()); + return valid_; +} - bool eof() const override { - return idx_ >= 0 && !valid_; - } +bool MembersInTypeIterator::eof() const { + return idx_ >= 0 && !valid_; +} - void column(xsql::FunctionContext& ctx, int col) override { - if (!valid_ || idx_ < 0 || static_cast(idx_) >= udt_.size()) { - ctx.result_null(); - return; +void MembersInTypeIterator::column(xsql::FunctionContext& ctx, int col) { + if (!valid_ || idx_ < 0 || static_cast(idx_) >= udt_.size()) { + ctx.result_null(); + return; + } + const udm_t& m = udt_[idx_]; + switch (col) { + case 0: ctx.result_int(type_ordinal_); break; + case 1: ctx.result_text(type_name_.c_str()); break; + case 2: ctx.result_int(idx_); break; + case 3: ctx.result_text(m.name.c_str()); break; + case 4: ctx.result_int64(static_cast(m.offset / 8)); break; + case 5: ctx.result_int64(static_cast(m.offset)); break; + case 6: ctx.result_int64(static_cast(m.size / 8)); break; + case 7: ctx.result_int64(static_cast(m.size)); break; + case 8: { + qstring type_str; + m.type.print(&type_str); + ctx.result_text(type_str.c_str()); + break; } - const udm_t& m = udt_[idx_]; - switch (col) { - case 0: ctx.result_int(type_ordinal_); break; - case 1: ctx.result_text(type_name_.c_str()); break; - case 2: ctx.result_int(idx_); break; - case 3: ctx.result_text(m.name.c_str()); break; - case 4: ctx.result_int64(static_cast(m.offset / 8)); break; - case 5: ctx.result_int64(static_cast(m.offset)); break; - case 6: ctx.result_int64(static_cast(m.size / 8)); break; - case 7: ctx.result_int64(static_cast(m.size)); break; - case 8: { - qstring type_str; - m.type.print(&type_str); - ctx.result_text(type_str.c_str()); - break; + case 9: ctx.result_int(m.is_bitfield() ? 1 : 0); break; + case 10: ctx.result_int(m.is_baseclass() ? 1 : 0); break; + case 11: ctx.result_text(m.cmt.c_str()); break; + // Member type classification columns + case 12: case 13: case 14: case 15: case 16: case 17: { + // Classify the member type on-the-fly for iterator + bool mt_is_struct, mt_is_union, mt_is_enum, mt_is_ptr, mt_is_array; + int mt_ordinal; + classify_member_type(m.type, get_idati(), + mt_is_struct, mt_is_union, mt_is_enum, + mt_is_ptr, mt_is_array, mt_ordinal); + switch (col) { + case 12: ctx.result_int(mt_is_struct ? 1 : 0); break; + case 13: ctx.result_int(mt_is_union ? 1 : 0); break; + case 14: ctx.result_int(mt_is_enum ? 1 : 0); break; + case 15: ctx.result_int(mt_is_ptr ? 1 : 0); break; + case 16: ctx.result_int(mt_is_array ? 1 : 0); break; + case 17: ctx.result_int(mt_ordinal); break; } - case 9: ctx.result_int(m.is_bitfield() ? 1 : 0); break; - case 10: ctx.result_int(m.is_baseclass() ? 1 : 0); break; - case 11: ctx.result_text(m.cmt.c_str()); break; - // Member type classification columns - case 12: case 13: case 14: case 15: case 16: case 17: { - // Classify the member type on-the-fly for iterator - bool mt_is_struct, mt_is_union, mt_is_enum, mt_is_ptr, mt_is_array; - int mt_ordinal; - classify_member_type(m.type, get_idati(), - mt_is_struct, mt_is_union, mt_is_enum, - mt_is_ptr, mt_is_array, mt_ordinal); - switch (col) { - case 12: ctx.result_int(mt_is_struct ? 1 : 0); break; - case 13: ctx.result_int(mt_is_union ? 1 : 0); break; - case 14: ctx.result_int(mt_is_enum ? 1 : 0); break; - case 15: ctx.result_int(mt_is_ptr ? 1 : 0); break; - case 16: ctx.result_int(mt_is_array ? 1 : 0); break; - case 17: ctx.result_int(mt_ordinal); break; - } - break; - } - default: ctx.result_null(); break; + break; } + default: ctx.result_null(); break; } +} - int64_t rowid() const override { - return static_cast(type_ordinal_) * 10000 + idx_; - } -}; +int64_t MembersInTypeIterator::rowid() const { + return static_cast(type_ordinal_) * 10000 + idx_; +} -// Helper to get type and member by ordinal/index (for write operations) -struct TypeMemberRef { - tinfo_t tif; - udt_type_data_t udt; - bool valid; - uint32_t ordinal; +// ============================================================================ +// TypeMemberRef +// ============================================================================ - TypeMemberRef(uint32_t ord) : valid(false), ordinal(ord) { - til_t* ti = get_idati(); - if (!ti) return; - if (tif.get_numbered_type(ti, ord)) { - if (tif.is_struct() || tif.is_union()) { - valid = tif.get_udt_details(&udt); - } +TypeMemberRef::TypeMemberRef(uint32_t ord) : valid(false), ordinal(ord) { + til_t* ti = get_idati(); + if (!ti) return; + if (tif.get_numbered_type(ti, ord)) { + if (tif.is_struct() || tif.is_union()) { + valid = tif.get_udt_details(&udt); } } +} - bool save() { - if (!valid) return false; - tinfo_t new_tif; - new_tif.create_udt(udt, tif.is_union() ? BTF_UNION : BTF_STRUCT); - return new_tif.set_numbered_type(get_idati(), ordinal, NTF_REPLACE, nullptr); - } -}; +bool TypeMemberRef::save() { + if (!valid) return false; + tinfo_t new_tif; + new_tif.create_udt(udt, tif.is_union() ? BTF_UNION : BTF_STRUCT); + return new_tif.set_numbered_type(get_idati(), ordinal, NTF_REPLACE, nullptr); +} -inline bool build_member_entry(uint32_t ordinal, int member_index, MemberEntry& entry) { +// ============================================================================ +// build_member_entry +// ============================================================================ + +bool build_member_entry(uint32_t ordinal, int member_index, MemberEntry& entry) { til_t* ti = get_idati(); if (!ti) return false; @@ -528,7 +456,11 @@ inline bool build_member_entry(uint32_t ordinal, int member_index, MemberEntry& return true; } -inline CachedTableDef define_types_members() { +// ============================================================================ +// TYPES_MEMBERS Table Definition +// ============================================================================ + +CachedTableDef define_types_members() { return cached_table("types_members") .no_shared_cache() .on_modify(ida_undo_hook) @@ -684,17 +616,7 @@ inline CachedTableDef define_types_members() { // TYPES_ENUM_VALUES Table - Enum constants // ============================================================================ -struct EnumValueEntry { - uint32_t type_ordinal; - std::string type_name; - int value_index; - std::string value_name; - int64_t value; - uint64_t uvalue; - std::string comment; -}; - -inline void collect_enum_values(std::vector& rows) { +void collect_enum_values(std::vector& rows) { rows.clear(); til_t* ti = get_idati(); @@ -730,95 +652,85 @@ inline void collect_enum_values(std::vector& rows) { } } -/** - * Iterator for enum values of a specific enum type. - * Used when query has: WHERE type_ordinal = X - */ -class EnumValuesInTypeIterator : public xsql::RowIterator { - uint32_t type_ordinal_; - std::string type_name_; - enum_type_data_t ei_; - int idx_ = -1; - bool valid_ = false; - bool has_data_ = false; - -public: - explicit EnumValuesInTypeIterator(uint32_t ordinal) : type_ordinal_(ordinal) { - til_t* ti = get_idati(); - if (!ti) return; - - const char* name = get_numbered_type_name(ti, type_ordinal_); - if (!name) return; - type_name_ = name; +// ============================================================================ +// EnumValuesInTypeIterator +// ============================================================================ - tinfo_t tif; - if (tif.get_numbered_type(ti, type_ordinal_)) { - if (tif.is_enum()) { - has_data_ = tif.get_enum_details(&ei_); - } +EnumValuesInTypeIterator::EnumValuesInTypeIterator(uint32_t ordinal) : type_ordinal_(ordinal) { + til_t* ti = get_idati(); + if (!ti) return; + + const char* name = get_numbered_type_name(ti, type_ordinal_); + if (!name) return; + type_name_ = name; + + tinfo_t tif; + if (tif.get_numbered_type(ti, type_ordinal_)) { + if (tif.is_enum()) { + has_data_ = tif.get_enum_details(&ei_); } } +} - bool next() override { - if (!has_data_) return false; - ++idx_; - valid_ = (idx_ >= 0 && static_cast(idx_) < ei_.size()); - return valid_; - } +bool EnumValuesInTypeIterator::next() { + if (!has_data_) return false; + ++idx_; + valid_ = (idx_ >= 0 && static_cast(idx_) < ei_.size()); + return valid_; +} - bool eof() const override { - return idx_ >= 0 && !valid_; - } +bool EnumValuesInTypeIterator::eof() const { + return idx_ >= 0 && !valid_; +} - void column(xsql::FunctionContext& ctx, int col) override { - if (!valid_ || idx_ < 0 || static_cast(idx_) >= ei_.size()) { - ctx.result_null(); - return; - } - const edm_t& e = ei_[idx_]; - switch (col) { - case 0: ctx.result_int(type_ordinal_); break; - case 1: ctx.result_text(type_name_.c_str()); break; - case 2: ctx.result_int(idx_); break; - case 3: ctx.result_text(e.name.c_str()); break; - case 4: ctx.result_int64(static_cast(e.value)); break; - case 5: ctx.result_int64(static_cast(e.value)); break; // uvalue - case 6: ctx.result_text(e.cmt.c_str()); break; - default: ctx.result_null(); break; - } +void EnumValuesInTypeIterator::column(xsql::FunctionContext& ctx, int col) { + if (!valid_ || idx_ < 0 || static_cast(idx_) >= ei_.size()) { + ctx.result_null(); + return; } - - int64_t rowid() const override { - return static_cast(type_ordinal_) * 10000 + idx_; + const edm_t& e = ei_[idx_]; + switch (col) { + case 0: ctx.result_int(type_ordinal_); break; + case 1: ctx.result_text(type_name_.c_str()); break; + case 2: ctx.result_int(idx_); break; + case 3: ctx.result_text(e.name.c_str()); break; + case 4: ctx.result_int64(static_cast(e.value)); break; + case 5: ctx.result_int64(static_cast(e.value)); break; // uvalue + case 6: ctx.result_text(e.cmt.c_str()); break; + default: ctx.result_null(); break; } -}; +} -// Helper to get enum type by ordinal (for write operations) -struct EnumTypeRef { - tinfo_t tif; - enum_type_data_t ei; - bool valid; - uint32_t ordinal; +int64_t EnumValuesInTypeIterator::rowid() const { + return static_cast(type_ordinal_) * 10000 + idx_; +} - EnumTypeRef(uint32_t ord) : valid(false), ordinal(ord) { - til_t* ti = get_idati(); - if (!ti) return; - if (tif.get_numbered_type(ti, ord)) { - if (tif.is_enum()) { - valid = tif.get_enum_details(&ei); - } +// ============================================================================ +// EnumTypeRef +// ============================================================================ + +EnumTypeRef::EnumTypeRef(uint32_t ord) : valid(false), ordinal(ord) { + til_t* ti = get_idati(); + if (!ti) return; + if (tif.get_numbered_type(ti, ord)) { + if (tif.is_enum()) { + valid = tif.get_enum_details(&ei); } } +} - bool save() { - if (!valid) return false; - tinfo_t new_tif; - new_tif.create_enum(ei); - return new_tif.set_numbered_type(get_idati(), ordinal, NTF_REPLACE, nullptr); - } -}; +bool EnumTypeRef::save() { + if (!valid) return false; + tinfo_t new_tif; + new_tif.create_enum(ei); + return new_tif.set_numbered_type(get_idati(), ordinal, NTF_REPLACE, nullptr); +} -inline bool build_enum_value_entry(uint32_t ordinal, int value_index, EnumValueEntry& entry) { +// ============================================================================ +// build_enum_value_entry +// ============================================================================ + +bool build_enum_value_entry(uint32_t ordinal, int value_index, EnumValueEntry& entry) { til_t* ti = get_idati(); if (!ti) return false; const char* type_name = get_numbered_type_name(ti, ordinal); @@ -843,7 +755,11 @@ inline bool build_enum_value_entry(uint32_t ordinal, int value_index, EnumValueE return true; } -inline CachedTableDef define_types_enum_values() { +// ============================================================================ +// TYPES_ENUM_VALUES Table Definition +// ============================================================================ + +CachedTableDef define_types_enum_values() { return cached_table("types_enum_values") .no_shared_cache() .on_modify(ida_undo_hook) @@ -967,31 +883,8 @@ inline CachedTableDef define_types_enum_values() { // TYPES_FUNC_ARGS Table - Function prototype arguments // ============================================================================ -// Type classification info (surface + resolved) -struct TypeClassification { - // Surface-level classification (literal type as written) - bool is_ptr = false; - bool is_int = false; // Exactly int type - bool is_integral = false; // Int-like family (int, long, short, char, bool) - bool is_float = false; - bool is_void = false; - bool is_struct = false; - bool is_array = false; - int ptr_depth = 0; - std::string base_type; // Type name with pointers stripped - - // Resolved classification (after typedef resolution) - bool is_ptr_resolved = false; - bool is_int_resolved = false; - bool is_integral_resolved = false; - bool is_float_resolved = false; - bool is_void_resolved = false; - int ptr_depth_resolved = 0; - std::string base_type_resolved; -}; - // Get pointer depth (int** -> 2, int* -> 1, int -> 0) -inline int get_ptr_depth(tinfo_t tif) { +int get_ptr_depth(tinfo_t tif) { int depth = 0; while (tif.is_ptr()) { depth++; @@ -1001,7 +894,7 @@ inline int get_ptr_depth(tinfo_t tif) { } // Get base type name (strips pointers/arrays) -inline std::string get_base_type_name(tinfo_t tif) { +std::string get_base_type_name(tinfo_t tif) { // Strip pointers while (tif.is_ptr()) { tif = tif.get_pointed_object(); @@ -1016,10 +909,10 @@ inline std::string get_base_type_name(tinfo_t tif) { } // Classify a single tinfo_t (surface or resolved) -inline void classify_tinfo(const tinfo_t& tif, - bool& is_ptr, bool& is_int, bool& is_integral, - bool& is_float, bool& is_void, bool& is_struct, - bool& is_array, int& ptr_depth, std::string& base_type) { +void classify_tinfo(const tinfo_t& tif, + bool& is_ptr, bool& is_int, bool& is_integral, + bool& is_float, bool& is_void, bool& is_struct, + bool& is_array, int& ptr_depth, std::string& base_type) { is_ptr = tif.is_ptr(); is_array = tif.is_array(); is_struct = tif.is_struct() || tif.is_union(); @@ -1038,16 +931,16 @@ inline void classify_tinfo(const tinfo_t& tif, } // Check if type is a typedef (type reference) at surface level -inline bool is_surface_typedef(const tinfo_t& tif) { +bool is_surface_typedef(const tinfo_t& tif) { return tif.is_typeref(); } // Classify surface-level type (WITHOUT typedef resolution) // If tif is a typedef, surface classification shows it as "other" not the underlying type -inline void classify_surface(const tinfo_t& tif, - bool& is_ptr, bool& is_int, bool& is_integral, - bool& is_float, bool& is_void, bool& is_struct, - bool& is_array, int& ptr_depth, std::string& base_type) { +void classify_surface(const tinfo_t& tif, + bool& is_ptr, bool& is_int, bool& is_integral, + bool& is_float, bool& is_void, bool& is_struct, + bool& is_array, int& ptr_depth, std::string& base_type) { // If it's a typedef, surface level is NOT a ptr/int/etc - it's a typedef if (is_surface_typedef(tif)) { is_ptr = false; @@ -1075,7 +968,7 @@ inline void classify_surface(const tinfo_t& tif, } // Full type classification (surface + resolved) -inline TypeClassification classify_arg_type(const tinfo_t& tif) { +TypeClassification classify_arg_type(const tinfo_t& tif) { TypeClassification tc; // Surface classification (without typedef resolution) @@ -1095,19 +988,11 @@ inline TypeClassification classify_arg_type(const tinfo_t& tif) { return tc; } -struct FuncArgEntry { - uint32_t type_ordinal; - std::string type_name; - int arg_index; // -1 for return type - std::string arg_name; - std::string arg_type; - std::string calling_conv; // Only set on arg_index=-1 row - - // Type classification - TypeClassification tc; -}; +// ============================================================================ +// Calling Convention +// ============================================================================ -inline const char* get_calling_convention_name(cm_t cc) { +const char* get_calling_convention_name(cm_t cc) { // Extract calling convention from cm_t (using CM_CC_MASK) callcnv_t conv = cc & CM_CC_MASK; switch (conv) { @@ -1127,7 +1012,11 @@ inline const char* get_calling_convention_name(cm_t cc) { } } -inline void collect_func_args(std::vector& rows) { +// ============================================================================ +// collect_func_args +// ============================================================================ + +void collect_func_args(std::vector& rows) { rows.clear(); til_t* ti = get_idati(); @@ -1181,135 +1070,129 @@ inline void collect_func_args(std::vector& rows) { } } -/** - * Iterator for function args of a specific function type. - * Used when query has: WHERE type_ordinal = X - */ -class FuncArgsInTypeIterator : public xsql::RowIterator { - uint32_t type_ordinal_; - std::string type_name_; - func_type_data_t fi_; - int idx_ = -2; // Start at -2, first next() moves to -1 (return type) - bool valid_ = false; - bool has_data_ = false; - -public: - explicit FuncArgsInTypeIterator(uint32_t ordinal) : type_ordinal_(ordinal) { - til_t* ti = get_idati(); - if (!ti) return; - - const char* name = get_numbered_type_name(ti, type_ordinal_); - if (!name) return; - type_name_ = name; +// ============================================================================ +// FuncArgsInTypeIterator +// ============================================================================ - tinfo_t tif; - if (tif.get_numbered_type(ti, type_ordinal_)) { - if (tif.is_func()) { - has_data_ = tif.get_func_details(&fi_); - } +FuncArgsInTypeIterator::FuncArgsInTypeIterator(uint32_t ordinal) : type_ordinal_(ordinal) { + til_t* ti = get_idati(); + if (!ti) return; + + const char* name = get_numbered_type_name(ti, type_ordinal_); + if (!name) return; + type_name_ = name; + + tinfo_t tif; + if (tif.get_numbered_type(ti, type_ordinal_)) { + if (tif.is_func()) { + has_data_ = tif.get_func_details(&fi_); } } +} - bool next() override { - if (!has_data_) return false; - ++idx_; - // idx=-1 is return type, idx=0..fi_.size()-1 are arguments - valid_ = (idx_ == -1) || (idx_ >= 0 && static_cast(idx_) < fi_.size()); - return valid_; - } +bool FuncArgsInTypeIterator::next() { + if (!has_data_) return false; + ++idx_; + // idx=-1 is return type, idx=0..fi_.size()-1 are arguments + valid_ = (idx_ == -1) || (idx_ >= 0 && static_cast(idx_) < fi_.size()); + return valid_; +} - bool eof() const override { - return idx_ >= -1 && !valid_; - } +bool FuncArgsInTypeIterator::eof() const { + return idx_ >= -1 && !valid_; +} - void column(xsql::FunctionContext& ctx, int col) override { - if (!valid_) { - ctx.result_null(); - return; - } +void FuncArgsInTypeIterator::column(xsql::FunctionContext& ctx, int col) { + if (!valid_) { + ctx.result_null(); + return; + } - // Get the type for classification (computed on-the-fly for iterator) - auto get_current_type = [&]() -> tinfo_t { - if (idx_ == -1) return fi_.rettype; - if (static_cast(idx_) < fi_.size()) return fi_[idx_].type; - return tinfo_t(); - }; - - switch (col) { - case 0: // type_ordinal - ctx.result_int(type_ordinal_); - break; - case 1: // type_name - ctx.result_text(type_name_.c_str()); - break; - case 2: // arg_index - ctx.result_int(idx_); - break; - case 3: // arg_name - if (idx_ == -1) { - ctx.result_text_static("(return)"); - } else if (static_cast(idx_) < fi_.size()) { - ctx.result_text(fi_[idx_].name.c_str()); - } else { - ctx.result_null(); - } - break; - case 4: // arg_type - if (idx_ == -1) { - qstring ret_str; - fi_.rettype.print(&ret_str); - ctx.result_text(ret_str.c_str()); - } else if (static_cast(idx_) < fi_.size()) { - qstring type_str; - fi_[idx_].type.print(&type_str); - ctx.result_text(type_str.c_str()); - } else { - ctx.result_null(); - } - break; - case 5: // calling_conv - if (idx_ == -1) { - ctx.result_text_static(get_calling_convention_name(fi_.get_cc())); - } else { - ctx.result_text_static(""); - } - break; - // Type classification columns (computed on-the-fly) - case 6: case 7: case 8: case 9: case 10: case 11: case 12: case 13: case 14: - case 15: case 16: case 17: case 18: case 19: case 20: case 21: { - TypeClassification tc = classify_arg_type(get_current_type()); - switch (col) { - case 6: ctx.result_int(tc.is_ptr ? 1 : 0); break; - case 7: ctx.result_int(tc.is_int ? 1 : 0); break; - case 8: ctx.result_int(tc.is_integral ? 1 : 0); break; - case 9: ctx.result_int(tc.is_float ? 1 : 0); break; - case 10: ctx.result_int(tc.is_void ? 1 : 0); break; - case 11: ctx.result_int(tc.is_struct ? 1 : 0); break; - case 12: ctx.result_int(tc.is_array ? 1 : 0); break; - case 13: ctx.result_int(tc.ptr_depth); break; - case 14: ctx.result_text(tc.base_type.c_str()); break; - case 15: ctx.result_int(tc.is_ptr_resolved ? 1 : 0); break; - case 16: ctx.result_int(tc.is_int_resolved ? 1 : 0); break; - case 17: ctx.result_int(tc.is_integral_resolved ? 1 : 0); break; - case 18: ctx.result_int(tc.is_float_resolved ? 1 : 0); break; - case 19: ctx.result_int(tc.is_void_resolved ? 1 : 0); break; - case 20: ctx.result_int(tc.ptr_depth_resolved); break; - case 21: ctx.result_text(tc.base_type_resolved.c_str()); break; - } - break; + // Get the type for classification (computed on-the-fly for iterator) + auto get_current_type = [&]() -> tinfo_t { + if (idx_ == -1) return fi_.rettype; + if (static_cast(idx_) < fi_.size()) return fi_[idx_].type; + return tinfo_t(); + }; + + switch (col) { + case 0: // type_ordinal + ctx.result_int(type_ordinal_); + break; + case 1: // type_name + ctx.result_text(type_name_.c_str()); + break; + case 2: // arg_index + ctx.result_int(idx_); + break; + case 3: // arg_name + if (idx_ == -1) { + ctx.result_text_static("(return)"); + } else if (static_cast(idx_) < fi_.size()) { + ctx.result_text(fi_[idx_].name.c_str()); + } else { + ctx.result_null(); } - default: + break; + case 4: // arg_type + if (idx_ == -1) { + qstring ret_str; + fi_.rettype.print(&ret_str); + ctx.result_text(ret_str.c_str()); + } else if (static_cast(idx_) < fi_.size()) { + qstring type_str; + fi_[idx_].type.print(&type_str); + ctx.result_text(type_str.c_str()); + } else { ctx.result_null(); - break; + } + break; + case 5: // calling_conv + if (idx_ == -1) { + ctx.result_text_static(get_calling_convention_name(fi_.get_cc())); + } else { + ctx.result_text_static(""); + } + break; + // Type classification columns (computed on-the-fly) + case 6: case 7: case 8: case 9: case 10: case 11: case 12: case 13: case 14: + case 15: case 16: case 17: case 18: case 19: case 20: case 21: { + TypeClassification tc = classify_arg_type(get_current_type()); + switch (col) { + case 6: ctx.result_int(tc.is_ptr ? 1 : 0); break; + case 7: ctx.result_int(tc.is_int ? 1 : 0); break; + case 8: ctx.result_int(tc.is_integral ? 1 : 0); break; + case 9: ctx.result_int(tc.is_float ? 1 : 0); break; + case 10: ctx.result_int(tc.is_void ? 1 : 0); break; + case 11: ctx.result_int(tc.is_struct ? 1 : 0); break; + case 12: ctx.result_int(tc.is_array ? 1 : 0); break; + case 13: ctx.result_int(tc.ptr_depth); break; + case 14: ctx.result_text(tc.base_type.c_str()); break; + case 15: ctx.result_int(tc.is_ptr_resolved ? 1 : 0); break; + case 16: ctx.result_int(tc.is_int_resolved ? 1 : 0); break; + case 17: ctx.result_int(tc.is_integral_resolved ? 1 : 0); break; + case 18: ctx.result_int(tc.is_float_resolved ? 1 : 0); break; + case 19: ctx.result_int(tc.is_void_resolved ? 1 : 0); break; + case 20: ctx.result_int(tc.ptr_depth_resolved); break; + case 21: ctx.result_text(tc.base_type_resolved.c_str()); break; + } + break; } + default: + ctx.result_null(); + break; } +} - int64_t rowid() const override { - return static_cast(type_ordinal_) * 10000 + (idx_ + 1); - } -}; +int64_t FuncArgsInTypeIterator::rowid() const { + return static_cast(type_ordinal_) * 10000 + (idx_ + 1); +} + +// ============================================================================ +// TYPES_FUNC_ARGS Table Definition +// ============================================================================ -inline CachedTableDef define_types_func_args() { +CachedTableDef define_types_func_args() { return cached_table("types_func_args") .no_shared_cache() .estimate_rows([]() -> size_t { @@ -1395,48 +1278,38 @@ inline CachedTableDef define_types_func_args() { // Types Registry // ============================================================================ -struct TypesRegistry { - CachedTableDef types; - CachedTableDef types_members; - CachedTableDef types_enum_values; - CachedTableDef types_func_args; - - TypesRegistry() - : types(define_types()) - , types_members(define_types_members()) - , types_enum_values(define_types_enum_values()) - , types_func_args(define_types_func_args()) - {} +TypesRegistry::TypesRegistry() + : types(define_types()) + , types_members(define_types_members()) + , types_enum_values(define_types_enum_values()) + , types_func_args(define_types_func_args()) +{} - void register_all(xsql::Database& db) { - db.register_cached_table("ida_types", &types); - db.create_table("types", "ida_types"); +void TypesRegistry::register_all(xsql::Database& db) { + db.register_cached_table("ida_types", &types); + db.create_table("types", "ida_types"); - db.register_cached_table("ida_types_members", &types_members); - db.create_table("types_members", "ida_types_members"); + db.register_cached_table("ida_types_members", &types_members); + db.create_table("types_members", "ida_types_members"); - db.register_cached_table("ida_types_enum_values", &types_enum_values); - db.create_table("types_enum_values", "ida_types_enum_values"); + db.register_cached_table("ida_types_enum_values", &types_enum_values); + db.create_table("types_enum_values", "ida_types_enum_values"); - db.register_cached_table("ida_types_func_args", &types_func_args); - db.create_table("types_func_args", "ida_types_func_args"); + db.register_cached_table("ida_types_func_args", &types_func_args); + db.create_table("types_func_args", "ida_types_func_args"); - // Create views - create_views(db); - } + // Create views + create_views(db); +} -private: - void create_views(xsql::Database& db) { - // Filtering views - db.exec("CREATE VIEW IF NOT EXISTS types_v_structs AS SELECT * FROM types WHERE is_struct = 1"); - db.exec("CREATE VIEW IF NOT EXISTS types_v_unions AS SELECT * FROM types WHERE is_union = 1"); - db.exec("CREATE VIEW IF NOT EXISTS types_v_enums AS SELECT * FROM types WHERE is_enum = 1"); - db.exec("CREATE VIEW IF NOT EXISTS types_v_typedefs AS SELECT * FROM types WHERE is_typedef = 1"); - db.exec("CREATE VIEW IF NOT EXISTS types_v_funcs AS SELECT * FROM types WHERE is_func = 1"); - } -}; +void TypesRegistry::create_views(xsql::Database& db) { + // Filtering views + db.exec("CREATE VIEW IF NOT EXISTS types_v_structs AS SELECT * FROM types WHERE is_struct = 1"); + db.exec("CREATE VIEW IF NOT EXISTS types_v_unions AS SELECT * FROM types WHERE is_union = 1"); + db.exec("CREATE VIEW IF NOT EXISTS types_v_enums AS SELECT * FROM types WHERE is_enum = 1"); + db.exec("CREATE VIEW IF NOT EXISTS types_v_typedefs AS SELECT * FROM types WHERE is_typedef = 1"); + db.exec("CREATE VIEW IF NOT EXISTS types_v_funcs AS SELECT * FROM types WHERE is_func = 1"); +} } // namespace types } // namespace idasql - - diff --git a/src/lib/src/entities_types.hpp b/src/lib/src/entities_types.hpp new file mode 100644 index 0000000..751767a --- /dev/null +++ b/src/lib/src/entities_types.hpp @@ -0,0 +1,300 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +/** + * entities_types.hpp - IDA type system tables (types, members, enum values, func args) + * + * Provides SQL tables for querying IDA's type library: + * types - All local types (structs, unions, enums, typedefs, funcs) + * types_members - Struct/union member details + * types_enum_values - Enum constant values + * types_func_args - Function prototype arguments + * + * Also provides views: + * types_v_structs - Filter: structs only + * types_v_unions - Filter: unions only + * types_v_enums - Filter: enums only + * types_v_typedefs - Filter: typedefs only + * types_v_funcs - Filter: function types only + */ + +#pragma once + +#include + +#include +#include + +#include "ida_headers.hpp" + +namespace idasql { +namespace types { + +// ============================================================================ +// Undo hook +// ============================================================================ + +void ida_undo_hook(const std::string&); + +// ============================================================================ +// Type Kind Classification +// ============================================================================ + +const char* get_type_kind(const tinfo_t& tif); + +// ============================================================================ +// Type Entry Cache +// ============================================================================ + +struct TypeEntry { + uint32_t ordinal; + std::string name; + std::string kind; + int64_t size; + int alignment; + bool is_struct; + bool is_union; + bool is_enum; + bool is_typedef; + bool is_func; + bool is_ptr; + bool is_array; + std::string definition; + std::string resolved; // For typedefs: what it resolves to +}; + +void collect_types(std::vector& rows); + +// ============================================================================ +// Member Entry Cache +// ============================================================================ + +struct MemberEntry { + uint32_t type_ordinal; + std::string type_name; + int member_index; + std::string member_name; + int64_t offset; + int64_t offset_bits; + int64_t size; + int64_t size_bits; + std::string member_type; + bool is_bitfield; + bool is_baseclass; + std::string comment; + // Member type classification (for efficient filtering) + bool mt_is_struct; + bool mt_is_union; + bool mt_is_enum; + bool mt_is_ptr; + bool mt_is_array; + int member_type_ordinal; // -1 if member type not in local types +}; + +int get_type_ordinal_by_name(til_t* ti, const char* type_name); + +void classify_member_type(const tinfo_t& mtype, til_t* ti, + bool& is_struct, bool& is_union, bool& is_enum, + bool& is_ptr, bool& is_array, int& type_ordinal); + +void collect_members(std::vector& rows); + +// ============================================================================ +// Enum Value Entry Cache +// ============================================================================ + +struct EnumValueEntry { + uint32_t type_ordinal; + std::string type_name; + int value_index; + std::string value_name; + int64_t value; + uint64_t uvalue; + std::string comment; +}; + +void collect_enum_values(std::vector& rows); + +// ============================================================================ +// Type Classification (for func args) +// ============================================================================ + +struct TypeClassification { + // Surface-level classification (literal type as written) + bool is_ptr = false; + bool is_int = false; // Exactly int type + bool is_integral = false; // Int-like family (int, long, short, char, bool) + bool is_float = false; + bool is_void = false; + bool is_struct = false; + bool is_array = false; + int ptr_depth = 0; + std::string base_type; // Type name with pointers stripped + + // Resolved classification (after typedef resolution) + bool is_ptr_resolved = false; + bool is_int_resolved = false; + bool is_integral_resolved = false; + bool is_float_resolved = false; + bool is_void_resolved = false; + int ptr_depth_resolved = 0; + std::string base_type_resolved; +}; + +int get_ptr_depth(tinfo_t tif); +std::string get_base_type_name(tinfo_t tif); + +void classify_tinfo(const tinfo_t& tif, + bool& is_ptr, bool& is_int, bool& is_integral, + bool& is_float, bool& is_void, bool& is_struct, + bool& is_array, int& ptr_depth, std::string& base_type); + +bool is_surface_typedef(const tinfo_t& tif); + +void classify_surface(const tinfo_t& tif, + bool& is_ptr, bool& is_int, bool& is_integral, + bool& is_float, bool& is_void, bool& is_struct, + bool& is_array, int& ptr_depth, std::string& base_type); + +TypeClassification classify_arg_type(const tinfo_t& tif); + +// ============================================================================ +// Func Arg Entry Cache +// ============================================================================ + +struct FuncArgEntry { + uint32_t type_ordinal; + std::string type_name; + int arg_index; // -1 for return type + std::string arg_name; + std::string arg_type; + std::string calling_conv; // Only set on arg_index=-1 row + + // Type classification + TypeClassification tc; +}; + +const char* get_calling_convention_name(cm_t cc); + +void collect_func_args(std::vector& rows); + +// ============================================================================ +// Helper structs for write operations +// ============================================================================ + +struct TypeMemberRef { + tinfo_t tif; + udt_type_data_t udt; + bool valid; + uint32_t ordinal; + + TypeMemberRef(uint32_t ord); + bool save(); +}; + +bool build_member_entry(uint32_t ordinal, int member_index, MemberEntry& entry); + +struct EnumTypeRef { + tinfo_t tif; + enum_type_data_t ei; + bool valid; + uint32_t ordinal; + + EnumTypeRef(uint32_t ord); + bool save(); +}; + +bool build_enum_value_entry(uint32_t ordinal, int value_index, EnumValueEntry& entry); + +// ============================================================================ +// Iterators (for constraint pushdown: WHERE type_ordinal = X) +// ============================================================================ + +/** + * Iterator for members of a specific type. + * Used when query has: WHERE type_ordinal = X + */ +class MembersInTypeIterator : public xsql::RowIterator { + uint32_t type_ordinal_; + std::string type_name_; + udt_type_data_t udt_; + int idx_ = -1; + bool valid_ = false; + bool has_data_ = false; + +public: + explicit MembersInTypeIterator(uint32_t ordinal); + bool next() override; + bool eof() const override; + void column(xsql::FunctionContext& ctx, int col) override; + int64_t rowid() const override; +}; + +/** + * Iterator for enum values of a specific enum type. + * Used when query has: WHERE type_ordinal = X + */ +class EnumValuesInTypeIterator : public xsql::RowIterator { + uint32_t type_ordinal_; + std::string type_name_; + enum_type_data_t ei_; + int idx_ = -1; + bool valid_ = false; + bool has_data_ = false; + +public: + explicit EnumValuesInTypeIterator(uint32_t ordinal); + bool next() override; + bool eof() const override; + void column(xsql::FunctionContext& ctx, int col) override; + int64_t rowid() const override; +}; + +/** + * Iterator for function args of a specific function type. + * Used when query has: WHERE type_ordinal = X + */ +class FuncArgsInTypeIterator : public xsql::RowIterator { + uint32_t type_ordinal_; + std::string type_name_; + func_type_data_t fi_; + int idx_ = -2; // Start at -2, first next() moves to -1 (return type) + bool valid_ = false; + bool has_data_ = false; + +public: + explicit FuncArgsInTypeIterator(uint32_t ordinal); + bool next() override; + bool eof() const override; + void column(xsql::FunctionContext& ctx, int col) override; + int64_t rowid() const override; +}; + +// ============================================================================ +// Table Definitions +// ============================================================================ + +CachedTableDef define_types(); +CachedTableDef define_types_members(); +CachedTableDef define_types_enum_values(); +CachedTableDef define_types_func_args(); + +// ============================================================================ +// Types Registry +// ============================================================================ + +struct TypesRegistry { + CachedTableDef types; + CachedTableDef types_members; + CachedTableDef types_enum_values; + CachedTableDef types_func_args; + + TypesRegistry(); + void register_all(xsql::Database& db); + +private: + void create_views(xsql::Database& db); +}; + +} // namespace types +} // namespace idasql diff --git a/src/lib/include/idasql/functions.hpp b/src/lib/src/functions.cpp similarity index 81% rename from src/lib/include/idasql/functions.hpp rename to src/lib/src/functions.cpp index d1f2065..66e4fc7 100644 --- a/src/lib/include/idasql/functions.hpp +++ b/src/lib/src/functions.cpp @@ -1,111 +1,7 @@ -/** - * functions.hpp - Custom SQL functions for IDA operations - * - * Disassembly: - * - disasm_at(address) - Canonical listing line at address head (code or data) - * - disasm_at(address, context) - Canonical listing line with +/- context heads - * - disasm(address) - Single disassembly line - * - disasm(address, count) - Next N instructions from address - * - disasm_range(start, end) - All instructions in [start, end) - * - disasm_func(address) - Full function disassembly - * - bytes(address, count) - Bytes as hex string - * - bytes_raw(address, count) - Bytes as blob - * - mnemonic(address) - Instruction mnemonic only - * - operand(address, n) - Operand text (n=0-7) - * - * Binary Search: - * - search_bytes(pattern) - Find all byte pattern matches (JSON array) - * - search_bytes(pattern, start, end) - Search within range - * - search_first(pattern) - First match address (or NULL) - * - search_first(pattern, start, end) - First match in range - * - * Names & Navigation: - * - name_at(address) - Name at address - * - func_at(address) - Function name containing address - * - func_start(address) - Start of containing function - * - func_end(address) - End of containing function - * - segment_at(address) - Segment name containing address - * - * Comments & Naming: - * - comment_at(address) - Get comment at address - * - set_comment(address, text) - Set comment at address - * - set_name(address, name) - Set name at address - * - type_at(address) - Get type declaration at address - * - set_type(address, decl) - Apply C declaration/type at address - * - parse_decls(text) - Import C declarations into local types - * - * Cross-References: - * - xrefs_to(address) - Xrefs to address (JSON array) - * - xrefs_from(address) - Xrefs from address (JSON array) - * - * Decompiler: - * - decompile(address) - Decompiled pseudocode for function - * - decompile(address, refresh) - Decompiled pseudocode with refresh option - * - list_lvars(address) - Local variables as JSON - * - rename_lvar(func, idx, new) - Rename local by index - * - rename_lvar_by_name(func, old, new) - Rename local by name - * - set_lvar_comment(func, idx, comment) - Set local variable comment by index - * - set_union_selection(func, ea, path) - Set/clear user union selection by ea - * - set_union_selection_item(func, item, path) - Set/clear selection by ctree item id - * - set_union_selection_ea_arg(func, ea, arg_idx, path[, callee]) - Set/clear by call arg coordinate - * - call_arg_item(func, ea, arg_idx[, callee]) - Resolve call arg coordinate to ctree item id - * - ctree_item_at(func, ea[, op_name[, nth]]) - Resolve generic expression coordinate to ctree item id - * - set_union_selection_ea_expr(func, ea, path[, op_name[, nth]]) - Set/clear by expression coordinate - * - get_union_selection(func, ea) - Get union path JSON by ea - * - get_union_selection_item(func, item) - Get union path JSON by ctree item id - * - get_union_selection_ea_arg(func, ea, arg_idx[, callee]) - Get union path JSON by call arg coordinate - * - get_union_selection_ea_expr(func, ea[, op_name[, nth]]) - Get union path JSON by expression coordinate - * - set_numform(func, ea, opnum, spec) - Set/clear decompiler numform by ea/opnum - * - set_numform_item(func, item, opnum, spec) - Set/clear decompiler numform by ctree item - * - set_numform_ea_arg(func, ea, arg_idx, opnum, spec[, callee]) - Set/clear numform by call arg coordinate - * - set_numform_ea_expr(func, ea, opnum, spec[, op_name[, nth]]) - Set/clear numform by expression coordinate - * - get_numform(func, ea, opnum) - Get decompiler numform JSON by ea/opnum - * - get_numform_item(func, item, opnum) - Get decompiler numform JSON by ctree item - * - get_numform_ea_arg(func, ea, arg_idx, opnum[, callee]) - Get numform JSON by call arg coordinate - * - get_numform_ea_expr(func, ea, opnum[, op_name[, nth]]) - Get numform JSON by expression coordinate - * - * Byte Patching: - * - patch_byte(addr, val) - Patch single byte - * - patch_word(addr, val) - Patch word (2 bytes) - * - patch_dword(addr, val) - Patch dword (4 bytes) - * - patch_qword(addr, val) - Patch qword (8 bytes) - * - revert_byte(addr) - Revert patched byte - * - get_original_byte(addr) - Get pre-patch byte value - * - * Function Index (O(1)): - * - func_qty() - Total function count - * - func_at_index(n) - Function address at index n - * - * Instruction Decoding: - * - itype(address) - Instruction type code - * - decode_insn(address) - Full instruction info (JSON) - * - operand_type(address, n) - Operand type (0-5) - * - operand_value(address, n) - Operand value/address - * - * Entity Search: - * - grep(pattern) - Search entities (JSON, default limit=50) - * - grep(pattern, limit) - Search entities with custom limit - * - grep(pattern, limit, offset) - Search entities with pagination - * - * String List: - * - rebuild_strings() - Rebuild string cache - * - string_count() - Get cached string count - * - * File Generation: - * - gen_asm_file(ea1, ea2, path) - Generate assembly file - * - gen_lst_file(ea1, ea2, path) - Generate listing file - * - gen_map_file(path) - Generate MAP file - * - gen_idc_file(ea1, ea2, path) - Generate IDC script - * - gen_html_file(ea1, ea2, path)- Generate HTML listing - * - gen_cfg_dot(address) - CFG as DOT string - * - gen_cfg_dot_file(addr, path) - CFG DOT to file - * - gen_schema_dot() - Schema diagram as DOT - * - * Database: - * - save_database() - Persist changes to .i64 file - */ - -#pragma once +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +#include "functions.hpp" #include @@ -120,35 +16,27 @@ #include #include #include +#include +#include +#include -#include - -// IDA SDK headers (order matters) -#include -#include -#include // Must come early -#include // insn_t, decode_insn for instruction decoding -#include -#include -#include -#include -#include -#include -#include // gen_file -#include // qfile_t for file operations -#include // FlowChart for CFG generation -#include // String list functions -#include // String type constants -#include // parse_decls/apply_cdecl/print_type - -// Hex-Rays decompiler - always included, runtime detection -#include -#include // For hexrays_available() -#include // Operand apply spec parser/helpers +#include "ida_headers.hpp" +#include +#include "decompiler.hpp" +#include "entities.hpp" +#include "idapython_exec.hpp" +#include namespace idasql { namespace functions { +static bool resolve_address_arg( + xsql::FunctionContext& ctx, + xsql::FunctionArg* argv, + int arg_index, + const char* arg_name, + ea_t& out_ea); + // ============================================================================ // Disassembly Functions // ============================================================================ @@ -192,7 +80,10 @@ static void sql_disasm(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } int count = (argc >= 2) ? argv[1].as_int() : 1; if (count < 1) count = 1; if (count > 1000) count = 1000; // Safety limit @@ -219,7 +110,10 @@ static void sql_disasm_at(xsql::FunctionContext& ctx, int argc, xsql::FunctionAr return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } int context = (argc >= 2) ? argv[1].as_int() : 0; if (context < 0) context = 0; if (context > 64) context = 64; // Safety cap @@ -294,8 +188,14 @@ static void sql_disasm_range(xsql::FunctionContext& ctx, int argc, xsql::Functio ctx.result_error("disasm_range requires 2 arguments (start, end)"); return; } - ea_t start = static_cast(argv[0].as_int64()); - ea_t end = static_cast(argv[1].as_int64()); + ea_t start = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "start", start)) { + return; + } + ea_t end = BADADDR; + if (!resolve_address_arg(ctx, argv, 1, "end", end)) { + return; + } auto str = disasm_range_impl(start, end); str.empty() ? ctx.result_null() : ctx.result_text(str); } @@ -306,7 +206,10 @@ static void sql_disasm_func(xsql::FunctionContext& ctx, int argc, xsql::Function ctx.result_error("disasm_func requires 1 argument (address)"); return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } func_t* func = get_func(ea); if (!func) { ctx.result_null(); @@ -327,7 +230,10 @@ static void sql_bytes_hex(xsql::FunctionContext& ctx, int argc, xsql::FunctionAr return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } size_t count = static_cast(argv[1].as_int()); if (count > 4096) count = 4096; // Safety limit @@ -350,7 +256,10 @@ static void sql_bytes_raw(xsql::FunctionContext& ctx, int argc, xsql::FunctionAr return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } size_t count = static_cast(argv[1].as_int()); if (count > 4096) count = 4096; // Safety limit @@ -362,25 +271,191 @@ static void sql_bytes_raw(xsql::FunctionContext& ctx, int argc, xsql::FunctionAr ctx.result_blob(data.data(), static_cast(data.size())); } +// load_file_bytes(path, file_offset, address, size [, patchable]) +// Load bytes from a file into the IDB at the target address range. +static void sql_load_file_bytes(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { + if (argc < 4 || argc > 5) { + ctx.result_error("load_file_bytes requires 4-5 arguments (path, file_offset, address, size, [patchable])"); + return; + } + + const char* path = argv[0].as_c_str(); + if (path == nullptr || path[0] == '\0') { + ctx.result_error("path must be a non-empty file path"); + return; + } + + const int64_t file_offset_raw = argv[1].as_int64(); + if (file_offset_raw < 0) { + ctx.result_error("file_offset must be >= 0"); + return; + } + + ea_t start_ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 2, "address", start_ea)) { + return; + } + + const int64_t size_raw = argv[3].as_int64(); + if (size_raw <= 0) { + ctx.result_error("size must be > 0"); + return; + } + + const uint64_t size_u64 = static_cast(size_raw); + const uint64_t max_ea = static_cast((std::numeric_limits::max)()); + if (start_ea == BADADDR || static_cast(start_ea) > max_ea || size_u64 > max_ea) { + ctx.result_error("invalid target address range"); + return; + } + if (size_u64 > (max_ea - static_cast(start_ea))) { + ctx.result_error("target address range overflows ea_t"); + return; + } + + const ea_t end_ea = static_cast(static_cast(start_ea) + size_u64); + int patchable = FILEREG_PATCHABLE; + if (argc >= 5 && !argv[4].is_null()) { + patchable = argv[4].as_int() ? FILEREG_PATCHABLE : FILEREG_NOTPATCHABLE; + } + + linput_t* li = open_linput(path, false); + if (li == nullptr) { + ctx.result_error(std::string("failed to open file: ") + path); + return; + } + + auto_wait(); + const int ok = file2base( + li, + static_cast(file_offset_raw), + start_ea, + end_ea, + patchable); + close_linput(li); + auto_wait(); + + ctx.result_int(ok ? 1 : 0); +} + // patch_byte(ea, value) - Patch a single byte (preserves original) static void sql_patch_byte(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { if (argc < 2) { ctx.result_error("patch_byte requires 2 arguments (address, value)"); return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } uint64 val = static_cast(argv[1].as_int64()); bool ok = patch_byte(ea, val); ctx.result_int(ok ? 1 : 0); } +static uint64_t item_size_or_one(ea_t ea) { + const asize_t sz = get_item_size(ea); + if (sz == 0 || sz == BADADDR) return 1; + return static_cast(sz); +} + +// Try to create one instruction at EA and return decoded size on success. +static int create_instruction_at(ea_t ea) { + if (ea == BADADDR || ea == 0) return 0; + + int len = create_insn(ea); + if (len > 0) return len; + + const uint64_t sz = item_size_or_one(ea); + del_items(ea, DELIT_SIMPLE, static_cast(sz)); + return create_insn(ea); +} + +static int make_code_range_impl(ea_t start, ea_t end) { + int created = 0; + ea_t cursor = start; + while (cursor < end && cursor != BADADDR) { + uint64_t step = 1; + + if (is_code(get_flags(cursor))) { + step = item_size_or_one(cursor); + } else { + const int len = create_instruction_at(cursor); + if (len > 0) { + ++created; + step = static_cast(len); + } + } + + const ea_t next = cursor + static_cast(step); + if (next <= cursor) break; + cursor = next; + } + return created; +} + +// make_code(address) - Create instruction at one address (idempotent). +static void sql_make_code(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { + if (argc < 1) { + ctx.result_error("make_code requires 1 argument (address)"); + return; + } + + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } + if (ea == BADADDR || ea == 0) { + ctx.result_error("Invalid address"); + return; + } + + if (is_code(get_flags(ea))) { + auto_wait(); + ctx.result_int(1); + return; + } + + const int len = create_instruction_at(ea); + auto_wait(); + ctx.result_int(len > 0 ? 1 : 0); +} + +// make_code_range(start, end) - Create instructions in [start, end). +static void sql_make_code_range(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { + if (argc < 2) { + ctx.result_error("make_code_range requires 2 arguments (start, end)"); + return; + } + + ea_t start = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "start", start)) { + return; + } + ea_t end = BADADDR; + if (!resolve_address_arg(ctx, argv, 1, "end", end)) { + return; + } + if (start == BADADDR || end == BADADDR || start >= end) { + ctx.result_error("make_code_range requires start < end"); + return; + } + + const int created = make_code_range_impl(start, end); + auto_wait(); + ctx.result_int(created); +} + // patch_word(ea, value) - Patch a word (2 bytes, preserves original) static void sql_patch_word(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { if (argc < 2) { ctx.result_error("patch_word requires 2 arguments (address, value)"); return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } uint64 val = static_cast(argv[1].as_int64()); bool ok = patch_word(ea, val); ctx.result_int(ok ? 1 : 0); @@ -392,7 +467,10 @@ static void sql_patch_dword(xsql::FunctionContext& ctx, int argc, xsql::Function ctx.result_error("patch_dword requires 2 arguments (address, value)"); return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } uint64 val = static_cast(argv[1].as_int64()); bool ok = patch_dword(ea, val); ctx.result_int(ok ? 1 : 0); @@ -404,7 +482,10 @@ static void sql_patch_qword(xsql::FunctionContext& ctx, int argc, xsql::Function ctx.result_error("patch_qword requires 2 arguments (address, value)"); return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } uint64 val = static_cast(argv[1].as_int64()); bool ok = patch_qword(ea, val); ctx.result_int(ok ? 1 : 0); @@ -416,7 +497,10 @@ static void sql_revert_byte(xsql::FunctionContext& ctx, int argc, xsql::Function ctx.result_error("revert_byte requires 1 argument (address)"); return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } bool ok = revert_byte(ea); ctx.result_int(ok ? 1 : 0); } @@ -427,7 +511,10 @@ static void sql_get_original_byte(xsql::FunctionContext& ctx, int argc, xsql::Fu ctx.result_error("get_original_byte requires 1 argument (address)"); return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } ctx.result_int(static_cast(get_original_byte(ea))); } @@ -442,7 +529,10 @@ static void sql_name_at(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } qstring name; if (get_name(&name, ea) > 0 && !name.empty()) { ctx.result_text(name.c_str()); @@ -458,7 +548,10 @@ static void sql_func_at(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } func_t* func = get_func(ea); if (func) { qstring name; @@ -477,7 +570,10 @@ static void sql_func_start(xsql::FunctionContext& ctx, int argc, xsql::FunctionA return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } func_t* func = get_func(ea); if (func) { ctx.result_int64(func->start_ea); @@ -493,7 +589,10 @@ static void sql_func_end(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } func_t* func = get_func(ea); if (func) { ctx.result_int64(func->end_ea); @@ -545,7 +644,10 @@ static void sql_set_name(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } const char* name = argv[1].as_c_str(); bool success = set_name(ea, name, SN_CHECK) != 0; @@ -560,7 +662,10 @@ static void sql_type_at(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } qstring out; if (print_type(&out, ea, PRTYPE_1LINE | PRTYPE_SEMI)) { ctx.result_text(out.c_str()); @@ -577,7 +682,10 @@ static void sql_set_type(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } const char* decl = argv[1].as_c_str(); if (decl == nullptr || *decl == '\0') { // Empty declaration clears user-provided type. @@ -608,7 +716,7 @@ static void sql_parse_decls(xsql::FunctionContext& ctx, int argc, xsql::Function return; } - // Allow redeclarations; keep parser strict enough for agent feedback. + // Allow redeclarations while keeping parser behavior deterministic. const int errors = parse_decls(nullptr, decls, nullptr, HTI_DCL | HTI_HIGH | HTI_SEMICOLON); ctx.result_int(errors == 0 ? 1 : 0); } @@ -624,7 +732,10 @@ static void sql_segment_at(xsql::FunctionContext& ctx, int argc, xsql::FunctionA return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } segment_t* seg = getseg(ea); if (seg) { qstring name; @@ -647,7 +758,10 @@ static void sql_comment_at(xsql::FunctionContext& ctx, int argc, xsql::FunctionA return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } qstring cmt; if (get_cmt(&cmt, ea, false) > 0) { ctx.result_text(cmt.c_str()); @@ -667,7 +781,10 @@ static void sql_set_comment(xsql::FunctionContext& ctx, int argc, xsql::Function return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } const char* cmt = argv[1].as_c_str(); bool repeatable = (argc >= 3) ? argv[2].as_int() != 0 : false; @@ -686,7 +803,10 @@ static void sql_xrefs_to(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } xsql::json arr = xsql::json::array(); xrefblk_t xb; @@ -705,7 +825,10 @@ static void sql_xrefs_from(xsql::FunctionContext& ctx, int argc, xsql::FunctionA return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } xsql::json arr = xsql::json::array(); xrefblk_t xb; @@ -854,7 +977,10 @@ static void sql_decompile(xsql::FunctionContext& ctx, int argc, xsql::FunctionAr return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } func_t* func = get_func(ea); if (!func) { @@ -888,7 +1014,10 @@ static void sql_decompile_2(xsql::FunctionContext& ctx, int argc, xsql::Function return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } int refresh = argv[1].as_int(); func_t* func = get_func(ea); @@ -924,7 +1053,10 @@ static void sql_next_head(xsql::FunctionContext& ctx, int argc, xsql::FunctionAr return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } ea_t next = next_head(ea, BADADDR); if (next != BADADDR) { ctx.result_int64(next); @@ -940,7 +1072,10 @@ static void sql_prev_head(xsql::FunctionContext& ctx, int argc, xsql::FunctionAr return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } ea_t prev = prev_head(ea, 0); if (prev != BADADDR) { ctx.result_int64(prev); @@ -974,7 +1109,10 @@ static void sql_item_type(xsql::FunctionContext& ctx, int argc, xsql::FunctionAr return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } flags64_t f = get_flags(ea); const char* type = "unknown"; @@ -994,7 +1132,10 @@ static void sql_item_size(xsql::FunctionContext& ctx, int argc, xsql::FunctionAr return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } asize_t size = get_item_size(ea); ctx.result_int64(size); } @@ -1006,7 +1147,10 @@ static void sql_is_code(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } ctx.result_int(is_code(get_flags(ea)) ? 1 : 0); } @@ -1017,7 +1161,10 @@ static void sql_is_data(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } ctx.result_int(is_data(get_flags(ea)) ? 1 : 0); } @@ -1028,7 +1175,10 @@ static void sql_mnemonic(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } if (!is_code(get_flags(ea))) { ctx.result_null(); return; @@ -1046,7 +1196,10 @@ static void sql_operand(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } int n = argv[1].as_int(); if (!is_code(get_flags(ea)) || n < 0 || n > 5) { @@ -1071,7 +1224,10 @@ static void sql_flags_at(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } ctx.result_int64(get_flags(ea)); } @@ -1101,7 +1257,10 @@ static void sql_itype(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* a return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } if (!is_code(get_flags(ea))) { ctx.result_null(); @@ -1123,7 +1282,10 @@ static void sql_decode_insn(xsql::FunctionContext& ctx, int argc, xsql::Function return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } if (!is_code(get_flags(ea))) { ctx.result_null(); @@ -1184,7 +1346,10 @@ static void sql_operand_type(xsql::FunctionContext& ctx, int argc, xsql::Functio return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } int n = argv[1].as_int(); if (!is_code(get_flags(ea)) || n < 0 || n >= UA_MAXOP) { @@ -1213,7 +1378,10 @@ static void sql_operand_value(xsql::FunctionContext& ctx, int argc, xsql::Functi return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } int n = argv[1].as_int(); if (!is_code(get_flags(ea)) || n < 0 || n >= UA_MAXOP) { @@ -1265,98 +1433,22 @@ static int gen_file_helper(ofile_type_t ofile_type, const char* filepath, ea_t e return result; } -// gen_asm_file(ea1, ea2, path) - Generate assembly file -static void sql_gen_asm_file(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 3) { - ctx.result_error("gen_asm_file requires 3 arguments (ea1, ea2, path)"); - return; - } - - ea_t ea1 = static_cast(argv[0].as_int64()); - ea_t ea2 = static_cast(argv[1].as_int64()); - const char* path = argv[2].as_c_str(); - if (!path) { - ctx.result_error("Invalid path"); - return; - } - - int result = gen_file_helper(OFILE_ASM, path, ea1, ea2, 0); - ctx.result_int(result); -} - -// gen_lst_file(ea1, ea2, path) - Generate listing file with addresses -static void sql_gen_lst_file(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 3) { - ctx.result_error("gen_lst_file requires 3 arguments (ea1, ea2, path)"); - return; - } - - ea_t ea1 = static_cast(argv[0].as_int64()); - ea_t ea2 = static_cast(argv[1].as_int64()); - const char* path = argv[2].as_c_str(); - if (!path) { - ctx.result_error("Invalid path"); - return; - } - - int result = gen_file_helper(OFILE_LST, path, ea1, ea2, 0); - ctx.result_int(result); -} - -// gen_map_file(path) - Generate MAP file -static void sql_gen_map_file(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 1) { - ctx.result_error("gen_map_file requires 1 argument (path)"); +// gen_listing(path) - Generate full-database listing file +static void sql_gen_listing(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { + if (argc != 1) { + ctx.result_error("gen_listing requires 1 argument (path)"); return; } const char* path = argv[0].as_c_str(); - if (!path) { - ctx.result_error("Invalid path"); - return; - } - - // MAP files ignore ea1/ea2, use GENFLG_MAPSEG | GENFLG_MAPNAME - int flags = GENFLG_MAPSEG | GENFLG_MAPNAME | GENFLG_MAPDMNG; - int result = gen_file_helper(OFILE_MAP, path, 0, BADADDR, flags); - ctx.result_int(result); -} - -// gen_idc_file(ea1, ea2, path) - Generate IDC script -static void sql_gen_idc_file(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 3) { - ctx.result_error("gen_idc_file requires 3 arguments (ea1, ea2, path)"); - return; - } - - ea_t ea1 = static_cast(argv[0].as_int64()); - ea_t ea2 = static_cast(argv[1].as_int64()); - const char* path = argv[2].as_c_str(); - if (!path) { - ctx.result_error("Invalid path"); - return; - } - - int result = gen_file_helper(OFILE_IDC, path, ea1, ea2, 0); - ctx.result_int(result); -} - -// gen_html_file(ea1, ea2, path) - Generate HTML listing -static void sql_gen_html_file(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { - if (argc < 3) { - ctx.result_error("gen_html_file requires 3 arguments (ea1, ea2, path)"); - return; - } - - ea_t ea1 = static_cast(argv[0].as_int64()); - ea_t ea2 = static_cast(argv[1].as_int64()); - const char* path = argv[2].as_c_str(); - if (!path) { + if (path == nullptr || path[0] == '\0') { ctx.result_error("Invalid path"); return; } - int result = gen_file_helper(OFILE_LST, path, ea1, ea2, GENFLG_GENHTML); + const ea_t ea1 = inf_get_min_ea(); + const ea_t ea2 = inf_get_max_ea(); + const int result = gen_file_helper(OFILE_LST, path, ea1, ea2, 0); ctx.result_int(result); } @@ -1367,7 +1459,10 @@ static void sql_gen_cfg_dot(xsql::FunctionContext& ctx, int argc, xsql::Function return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } func_t* func = get_func(ea); if (!func) { ctx.result_error("No function at address"); @@ -1420,7 +1515,10 @@ static void sql_gen_cfg_dot_file(xsql::FunctionContext& ctx, int argc, xsql::Fun return; } - ea_t ea = static_cast(argv[0].as_int64()); + ea_t ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "address", ea)) { + return; + } const char* path = argv[1].as_c_str(); if (!path) { ctx.result_error("Invalid path"); @@ -1582,6 +1680,21 @@ inline xsql::json lvar_rename_result_json(const decompiler::LvarRenameResult& r) return j; } +inline xsql::json label_rename_result_json(const decompiler::LabelRenameResult& r) { + xsql::json j = { + {"success", r.success}, + {"applied", r.applied}, + {"func_addr", r.func_addr}, + {"label_num", r.label_num}, + {"requested_name", r.requested_name}, + {"before_name", r.before_name}, + {"after_name", r.after_name}, + {"reason", r.reason.empty() ? xsql::json(nullptr) : xsql::json(r.reason)} + }; + j["warnings"] = r.warnings; + return j; +} + // rename_lvar(func_addr, lvar_idx, new_name) - Rename a local variable // Uses locator-based rename_lvar_at() for precise identification by index. // Returns JSON with result details. @@ -1591,7 +1704,10 @@ static void sql_rename_lvar(xsql::FunctionContext& ctx, int argc, xsql::Function return; } - ea_t func_addr = static_cast(argv[0].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } int lvar_idx = argv[1].as_int(); const char* new_name = argv[2].as_c_str(); @@ -1613,7 +1729,10 @@ static void sql_rename_lvar_by_name(xsql::FunctionContext& ctx, int argc, xsql:: return; } - ea_t func_addr = static_cast(argv[0].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } const char* old_name = argv[1].as_c_str(); const char* new_name = argv[2].as_c_str(); @@ -1627,6 +1746,30 @@ static void sql_rename_lvar_by_name(xsql::FunctionContext& ctx, int argc, xsql:: ctx.result_text(out); } +// rename_label(func_addr, label_num, new_name) - Rename a decompiler label. +// Returns JSON with success/applied details, explicit reasons, and warnings. +static void sql_rename_label(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { + if (argc < 3) { + ctx.result_error("rename_label requires 3 arguments (func_addr, label_num, new_name)"); + return; + } + + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } + int label_num = argv[1].as_int(); + const char* new_name = argv[2].as_c_str(); + if (!new_name) { + ctx.result_error("Invalid label name"); + return; + } + + decompiler::LabelRenameResult result = decompiler::rename_label_ex(func_addr, label_num, new_name); + std::string out = label_rename_result_json(result).dump(); + ctx.result_text(out); +} + // set_lvar_comment(func_addr, lvar_idx, comment) - Set local variable comment by index. // Returns 1 on success, 0 on failure. static void sql_set_lvar_comment(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { @@ -1635,7 +1778,10 @@ static void sql_set_lvar_comment(xsql::FunctionContext& ctx, int argc, xsql::Fun return; } - ea_t func_addr = static_cast(argv[0].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } int lvar_idx = argv[1].as_int(); const char* comment = argv[2].is_null() ? "" : argv[2].as_c_str(); @@ -1655,6 +1801,90 @@ static std::string trim_copy(const std::string& text) { return text.substr(begin, end - begin); } +static bool parse_numeric_ea_text(const std::string& text, ea_t& out_ea) { + const std::string token = trim_copy(text); + if (token.empty()) { + return false; + } + + errno = 0; + char* end_ptr = nullptr; + const unsigned long long value = strtoull(token.c_str(), &end_ptr, 0); + if (errno == ERANGE || end_ptr == token.c_str() || end_ptr == nullptr || *end_ptr != '\0') { + return false; + } + const unsigned long long max_ea = static_cast((std::numeric_limits::max)()); + if (value > max_ea) { + return false; + } + + out_ea = static_cast(value); + return true; +} + +static bool resolve_address_arg( + xsql::FunctionContext& ctx, + xsql::FunctionArg* argv, + int arg_index, + const char* arg_name, + ea_t& out_ea) { + out_ea = BADADDR; + + if (arg_index < 0) { + ctx.result_error("Internal error: invalid address argument index"); + return false; + } + + xsql::FunctionArg& arg = argv[arg_index]; + const int sqlite_type = arg.type(); + + if (sqlite_type == SQLITE_INTEGER) { + out_ea = static_cast(arg.as_int64()); + return true; + } + + if (sqlite_type == SQLITE_FLOAT) { + const char* name = (arg_name && *arg_name) ? arg_name : "address"; + ctx.result_error(std::string(name) + " must be an integer, numeric string, or symbol name"); + return false; + } + + if (sqlite_type == SQLITE_TEXT) { + const std::string raw = arg.as_text(); + const std::string text = trim_copy(raw); + if (text.empty()) { + const char* name = (arg_name && *arg_name) ? arg_name : "address"; + ctx.result_error(std::string(name) + " must not be empty"); + return false; + } + + ea_t parsed = BADADDR; + if (parse_numeric_ea_text(text, parsed)) { + out_ea = parsed; + return true; + } + + const ea_t resolved = get_name_ea(BADADDR, text.c_str()); + if (resolved != BADADDR) { + out_ea = resolved; + return true; + } + + ctx.result_error("Could not resolve name to address: " + text); + return false; + } + + if (sqlite_type == SQLITE_NULL) { + const char* name = (arg_name && *arg_name) ? arg_name : "address"; + ctx.result_error(std::string(name) + " must not be NULL"); + return false; + } + + const char* name = (arg_name && *arg_name) ? arg_name : "address"; + ctx.result_error(std::string(name) + " must be an integer, numeric string, or symbol name"); + return false; +} + static bool parse_int_token(const std::string& token, int& out_value) { const std::string t = trim_copy(token); if (t.empty()) return false; @@ -2033,9 +2263,12 @@ static bool synthesize_numform_from_operand_representation(ea_t target_ea, int o } out_fmt = number_format_t(opnum); - const flags64_t flags = get_flags(target_ea); - out_fmt.flags = flags; - out_fmt.flags32 = static_cast(flags); + if (kind == "enum") { + out_fmt.flags = enum_flag(); + } else { + out_fmt.flags = stroff_flag(); + } + out_fmt.flags32 = static_cast(out_fmt.flags); out_fmt.props = static_cast(NF_FIXED | NF_VALID); out_fmt.serial = 0; out_fmt.type_name = entities::operand_repr_type_name_text(target_ea, opnum).c_str(); @@ -2099,10 +2332,7 @@ static bool set_user_numform_at_ea(ea_t requested_func_addr, ea_t target_ea, int user_numforms_free(numforms); } decompiler::invalidate_decompiler_cache(func_addr); - // Representation apply already succeeded above; treat numform persistence as best-effort. - // Some databases may not persist user_numforms for specific operands, but callers should - // still observe the live representation change via decompile()/instructions. - return true; + return metadata_ok; } static bool get_user_numform_at_ea(ea_t requested_func_addr, ea_t target_ea, int opnum, number_format_t& out_fmt) { @@ -2172,8 +2402,14 @@ static void sql_set_union_selection(xsql::FunctionContext& ctx, int argc, xsql:: return; } - ea_t func_addr = static_cast(argv[0].as_int64()); - ea_t target_ea = static_cast(argv[1].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } + ea_t target_ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 1, "ea", target_ea)) { + return; + } const char* path_spec = argv[2].is_null() ? "" : argv[2].as_c_str(); intvec_t path; @@ -2194,7 +2430,10 @@ static void sql_set_union_selection_item(xsql::FunctionContext& ctx, int argc, x return; } - ea_t func_addr = static_cast(argv[0].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } int item_id = argv[1].as_int(); const char* path_spec = argv[2].is_null() ? "" : argv[2].as_c_str(); @@ -2216,8 +2455,14 @@ static void sql_get_union_selection(xsql::FunctionContext& ctx, int argc, xsql:: return; } - ea_t func_addr = static_cast(argv[0].as_int64()); - ea_t target_ea = static_cast(argv[1].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } + ea_t target_ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 1, "ea", target_ea)) { + return; + } intvec_t path; bool found = decompiler::get_union_selection_at_ea(func_addr, target_ea, path); @@ -2236,7 +2481,10 @@ static void sql_get_union_selection_item(xsql::FunctionContext& ctx, int argc, x return; } - ea_t func_addr = static_cast(argv[0].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } int item_id = argv[1].as_int(); ea_t target_ea = BADADDR; if (!decompiler::get_ctree_item_ea(func_addr, item_id, target_ea)) { @@ -2261,8 +2509,14 @@ static void sql_set_union_selection_ea_arg(xsql::FunctionContext& ctx, int argc, return; } - const ea_t func_addr = static_cast(argv[0].as_int64()); - const ea_t target_ea = static_cast(argv[1].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } + ea_t target_ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 1, "ea", target_ea)) { + return; + } const int arg_idx = argv[2].as_int(); const char* path_spec = argv[3].is_null() ? "" : argv[3].as_c_str(); const char* callee = (argc >= 5 && !argv[4].is_null()) ? argv[4].as_c_str() : ""; @@ -2291,8 +2545,14 @@ static void sql_get_union_selection_ea_arg(xsql::FunctionContext& ctx, int argc, return; } - const ea_t func_addr = static_cast(argv[0].as_int64()); - const ea_t target_ea = static_cast(argv[1].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } + ea_t target_ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 1, "ea", target_ea)) { + return; + } const int arg_idx = argv[2].as_int(); const char* callee = (argc >= 4 && !argv[3].is_null()) ? argv[3].as_c_str() : ""; @@ -2324,8 +2584,14 @@ static void sql_call_arg_item(xsql::FunctionContext& ctx, int argc, xsql::Functi return; } - const ea_t func_addr = static_cast(argv[0].as_int64()); - const ea_t target_ea = static_cast(argv[1].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } + ea_t target_ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 1, "ea", target_ea)) { + return; + } const int arg_idx = argv[2].as_int(); const char* callee = (argc >= 4 && !argv[3].is_null()) ? argv[3].as_c_str() : ""; @@ -2345,8 +2611,14 @@ static void sql_ctree_item_at(xsql::FunctionContext& ctx, int argc, xsql::Functi return; } - const ea_t func_addr = static_cast(argv[0].as_int64()); - const ea_t target_ea = static_cast(argv[1].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } + ea_t target_ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 1, "ea", target_ea)) { + return; + } const char* op_name = (argc >= 3 && !argv[2].is_null()) ? argv[2].as_c_str() : ""; const bool nth_explicit = (argc >= 4); const int nth = nth_explicit ? argv[3].as_int() : 0; @@ -2368,8 +2640,14 @@ static void sql_set_union_selection_ea_expr(xsql::FunctionContext& ctx, int argc return; } - const ea_t func_addr = static_cast(argv[0].as_int64()); - const ea_t target_ea = static_cast(argv[1].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } + ea_t target_ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 1, "ea", target_ea)) { + return; + } const char* path_spec = argv[2].is_null() ? "" : argv[2].as_c_str(); const char* op_name = (argc >= 4 && !argv[3].is_null()) ? argv[3].as_c_str() : ""; const bool nth_explicit = (argc >= 5); @@ -2400,8 +2678,14 @@ static void sql_get_union_selection_ea_expr(xsql::FunctionContext& ctx, int argc return; } - const ea_t func_addr = static_cast(argv[0].as_int64()); - const ea_t target_ea = static_cast(argv[1].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } + ea_t target_ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 1, "ea", target_ea)) { + return; + } const char* op_name = (argc >= 3 && !argv[2].is_null()) ? argv[2].as_c_str() : ""; const bool nth_explicit = (argc >= 4); const int nth = nth_explicit ? argv[3].as_int() : 0; @@ -2439,8 +2723,14 @@ static void sql_set_numform(xsql::FunctionContext& ctx, int argc, xsql::Function return; } - ea_t func_addr = static_cast(argv[0].as_int64()); - ea_t target_ea = static_cast(argv[1].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } + ea_t target_ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 1, "ea", target_ea)) { + return; + } int opnum = argv[2].as_int(); const char* spec = argv[3].is_null() ? "" : argv[3].as_c_str(); @@ -2461,7 +2751,10 @@ static void sql_set_numform_item(xsql::FunctionContext& ctx, int argc, xsql::Fun return; } - ea_t func_addr = static_cast(argv[0].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } int item_id = argv[1].as_int(); int opnum = argv[2].as_int(); const char* spec = argv[3].is_null() ? "" : argv[3].as_c_str(); @@ -2489,8 +2782,14 @@ static void sql_set_numform_ea_arg(xsql::FunctionContext& ctx, int argc, xsql::F return; } - const ea_t func_addr = static_cast(argv[0].as_int64()); - const ea_t target_ea = static_cast(argv[1].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } + ea_t target_ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 1, "ea", target_ea)) { + return; + } const int arg_idx = argv[2].as_int(); const int opnum = argv[3].as_int(); const char* spec = argv[4].is_null() ? "" : argv[4].as_c_str(); @@ -2525,8 +2824,14 @@ static void sql_set_numform_ea_expr(xsql::FunctionContext& ctx, int argc, xsql:: return; } - const ea_t func_addr = static_cast(argv[0].as_int64()); - const ea_t target_ea = static_cast(argv[1].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } + ea_t target_ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 1, "ea", target_ea)) { + return; + } const int opnum = argv[2].as_int(); const char* spec = argv[3].is_null() ? "" : argv[3].as_c_str(); const char* op_name = (argc >= 5 && !argv[4].is_null()) ? argv[4].as_c_str() : ""; @@ -2563,8 +2868,14 @@ static void sql_get_numform(xsql::FunctionContext& ctx, int argc, xsql::Function return; } - ea_t func_addr = static_cast(argv[0].as_int64()); - ea_t target_ea = static_cast(argv[1].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } + ea_t target_ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 1, "ea", target_ea)) { + return; + } int opnum = argv[2].as_int(); number_format_t nf(opnum); @@ -2583,7 +2894,10 @@ static void sql_get_numform_item(xsql::FunctionContext& ctx, int argc, xsql::Fun return; } - ea_t func_addr = static_cast(argv[0].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } int item_id = argv[1].as_int(); int opnum = argv[2].as_int(); @@ -2609,8 +2923,14 @@ static void sql_get_numform_ea_arg(xsql::FunctionContext& ctx, int argc, xsql::F return; } - const ea_t func_addr = static_cast(argv[0].as_int64()); - const ea_t target_ea = static_cast(argv[1].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } + ea_t target_ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 1, "ea", target_ea)) { + return; + } const int arg_idx = argv[2].as_int(); const int opnum = argv[3].as_int(); const char* callee = (argc >= 5 && !argv[4].is_null()) ? argv[4].as_c_str() : ""; @@ -2642,8 +2962,14 @@ static void sql_get_numform_ea_expr(xsql::FunctionContext& ctx, int argc, xsql:: return; } - const ea_t func_addr = static_cast(argv[0].as_int64()); - const ea_t target_ea = static_cast(argv[1].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } + ea_t target_ea = BADADDR; + if (!resolve_address_arg(ctx, argv, 1, "ea", target_ea)) { + return; + } const int opnum = argv[2].as_int(); const char* op_name = (argc >= 4 && !argv[3].is_null()) ? argv[3].as_c_str() : ""; const bool nth_explicit = (argc >= 5); @@ -2677,7 +3003,10 @@ static void sql_list_lvars(xsql::FunctionContext& ctx, int argc, xsql::FunctionA return; } - ea_t func_addr = static_cast(argv[0].as_int64()); + ea_t func_addr = BADADDR; + if (!resolve_address_arg(ctx, argv, 0, "func_addr", func_addr)) { + return; + } // Check cached Hex-Rays availability if (!decompiler::hexrays_available()) { @@ -2805,6 +3134,80 @@ static void sql_grep(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* ar ctx.result_text(arr.dump()); } +// ============================================================================ +// IDAPython Execution Functions +// ============================================================================ + +static bool ensure_idapython_enabled(xsql::FunctionContext& ctx) { + if (runtime_settings().enable_idapython()) { + return true; + } + ctx.result_error("idapython is disabled (enable via PRAGMA idasql.enable_idapython = 1)"); + return false; +} + +static void sql_idapython_snippet(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { + if (argc < 1 || argc > 2) { + ctx.result_error("idapython_snippet requires 1-2 arguments (code, [sandbox])"); + return; + } + if (!ensure_idapython_enabled(ctx)) { + return; + } + + const char* code = argv[0].as_c_str(); + if (code == nullptr || code[0] == '\0') { + ctx.result_error("idapython_snippet requires non-empty code"); + return; + } + + std::string sandbox; + if (argc >= 2 && !argv[1].is_null()) { + const char* raw_sandbox = argv[1].as_c_str(); + if (raw_sandbox != nullptr) { + sandbox = raw_sandbox; + } + } + + idapython::ExecutionResult result = idapython::execute_snippet(code, sandbox); + if (!result.success) { + ctx.result_error(result.error.empty() ? "idapython_snippet failed" : result.error); + return; + } + ctx.result_text(result.output); +} + +static void sql_idapython_file(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { + if (argc < 1 || argc > 2) { + ctx.result_error("idapython_file requires 1-2 arguments (path, [sandbox])"); + return; + } + if (!ensure_idapython_enabled(ctx)) { + return; + } + + const char* path = argv[0].as_c_str(); + if (path == nullptr || path[0] == '\0') { + ctx.result_error("idapython_file requires non-empty path"); + return; + } + + std::string sandbox; + if (argc >= 2 && !argv[1].is_null()) { + const char* raw_sandbox = argv[1].as_c_str(); + if (raw_sandbox != nullptr) { + sandbox = raw_sandbox; + } + } + + idapython::ExecutionResult result = idapython::execute_file(path, sandbox); + if (!result.success) { + ctx.result_error(result.error.empty() ? "idapython_file failed" : result.error); + return; + } + ctx.result_text(result.output); +} + // ============================================================================ // String List Functions // ============================================================================ @@ -2886,7 +3289,7 @@ static void sql_save_database(xsql::FunctionContext& ctx, int /*argc*/, xsql::Fu // Registration // ============================================================================ -inline bool register_sql_functions(xsql::Database& db) { +void register_sql_functions(xsql::Database& db) { // Disassembly db.register_function("disasm_at", 1, xsql::ScalarFn(sql_disasm_at)); db.register_function("disasm_at", 2, xsql::ScalarFn(sql_disasm_at)); @@ -2894,10 +3297,14 @@ inline bool register_sql_functions(xsql::Database& db) { db.register_function("disasm", 2, xsql::ScalarFn(sql_disasm)); db.register_function("disasm_range", 2, xsql::ScalarFn(sql_disasm_range)); db.register_function("disasm_func", 1, xsql::ScalarFn(sql_disasm_func)); + db.register_function("make_code", 1, xsql::ScalarFn(sql_make_code)); + db.register_function("make_code_range", 2, xsql::ScalarFn(sql_make_code_range)); // Bytes db.register_function("bytes", 2, xsql::ScalarFn(sql_bytes_hex)); db.register_function("bytes_raw", 2, xsql::ScalarFn(sql_bytes_raw)); + db.register_function("load_file_bytes", 4, xsql::ScalarFn(sql_load_file_bytes)); + db.register_function("load_file_bytes", 5, xsql::ScalarFn(sql_load_file_bytes)); // Byte patching db.register_function("patch_byte", 2, xsql::ScalarFn(sql_patch_byte)); @@ -2940,6 +3347,7 @@ inline bool register_sql_functions(xsql::Database& db) { db.register_function("list_lvars", 1, xsql::ScalarFn(sql_list_lvars)); db.register_function("rename_lvar", 3, xsql::ScalarFn(sql_rename_lvar)); db.register_function("rename_lvar_by_name", 3, xsql::ScalarFn(sql_rename_lvar_by_name)); + db.register_function("rename_label", 3, xsql::ScalarFn(sql_rename_label)); db.register_function("set_lvar_comment", 3, xsql::ScalarFn(sql_set_lvar_comment)); db.register_function("set_union_selection", 3, xsql::ScalarFn(sql_set_union_selection)); db.register_function("set_union_selection_item", 3, xsql::ScalarFn(sql_set_union_selection_item)); @@ -2997,11 +3405,7 @@ inline bool register_sql_functions(xsql::Database& db) { db.register_function("operand_value", 2, xsql::ScalarFn(sql_operand_value)); // File generation - db.register_function("gen_asm_file", 3, xsql::ScalarFn(sql_gen_asm_file)); - db.register_function("gen_lst_file", 3, xsql::ScalarFn(sql_gen_lst_file)); - db.register_function("gen_map_file", 1, xsql::ScalarFn(sql_gen_map_file)); - db.register_function("gen_idc_file", 3, xsql::ScalarFn(sql_gen_idc_file)); - db.register_function("gen_html_file", 3, xsql::ScalarFn(sql_gen_html_file)); + db.register_function("gen_listing", 1, xsql::ScalarFn(sql_gen_listing)); // Graph generation db.register_function("gen_cfg_dot", 1, xsql::ScalarFn(sql_gen_cfg_dot)); @@ -3013,6 +3417,12 @@ inline bool register_sql_functions(xsql::Database& db) { db.register_function("grep", 2, xsql::ScalarFn(sql_grep)); db.register_function("grep", 3, xsql::ScalarFn(sql_grep)); + // Python execution + db.register_function("idapython_snippet", 1, xsql::ScalarFn(sql_idapython_snippet)); + db.register_function("idapython_snippet", 2, xsql::ScalarFn(sql_idapython_snippet)); + db.register_function("idapython_file", 1, xsql::ScalarFn(sql_idapython_file)); + db.register_function("idapython_file", 2, xsql::ScalarFn(sql_idapython_file)); + // String list functions db.register_function("rebuild_strings", 0, xsql::ScalarFn(sql_rebuild_strings)); db.register_function("rebuild_strings", 1, xsql::ScalarFn(sql_rebuild_strings)); @@ -3021,8 +3431,6 @@ inline bool register_sql_functions(xsql::Database& db) { // Database persistence db.register_function("save_database", 0, xsql::ScalarFn(sql_save_database)); - - return true; } } // namespace functions diff --git a/src/lib/src/functions.hpp b/src/lib/src/functions.hpp new file mode 100644 index 0000000..18ecd84 --- /dev/null +++ b/src/lib/src/functions.hpp @@ -0,0 +1,18 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +/** + * functions.hpp - Custom SQL functions for IDA operations + */ + +#pragma once + +#include + +namespace idasql { +namespace functions { + +void register_sql_functions(xsql::Database& db); + +} // namespace functions +} // namespace idasql diff --git a/src/lib/src/ida_headers.hpp b/src/lib/src/ida_headers.hpp new file mode 100644 index 0000000..fb05e46 --- /dev/null +++ b/src/lib/src/ida_headers.hpp @@ -0,0 +1,83 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +/** + * ida_headers.hpp - Precompiled header for IDA SDK includes + * + * Contains the full superset of all IDA SDK headers used across the library. + * Include this instead of individual IDA headers in private .cpp/.hpp files. + * + * Usage: #include "ida_headers.hpp" + * + * Note: platform.hpp must still be included BEFORE standard library headers + * in each TU (it sets up macOS typedef redirects that affect system headers). + * This file handles the corresponding platform_undef.hpp cleanup. + */ + +#pragma once + +// Platform fixups (macOS typedef redirects) — must precede system headers +#include + +// Standard library headers — include before IDA SDK so that IDA's pro.h +// (which pulls in C-style) doesn't prevent from +// placing _strtoui64 / strtoull into namespace std. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Platform fixup cleanup (undoes platform.hpp macOS typedef redirects) +#include + +// IDA SDK -- full superset used across the library +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// IDA SDK's pro.h defines: #define strtoull _strtoui64 (VS2010 compat shim). +// Modern MSVC has strtoull natively, and the macro breaks nlohmann/json which +// uses std::strtoull (macro-expanded to std::_strtoui64 which doesn't exist). +// Undo the macro so downstream headers see the real std::strtoull. +#ifdef _MSC_VER +#undef strtoull +#endif diff --git a/src/lib/src/idapython_exec.cpp b/src/lib/src/idapython_exec.cpp new file mode 100644 index 0000000..f4f851e --- /dev/null +++ b/src/lib/src/idapython_exec.cpp @@ -0,0 +1,275 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +#include "idapython_exec.hpp" + +namespace idasql { +namespace idapython { + +std::string hex_encode(const std::string& input) { + static const char* kHex = "0123456789abcdef"; + std::string out; + out.reserve(input.size() * 2); + for (unsigned char c : input) { + out.push_back(kHex[c >> 4]); + out.push_back(kHex[c & 0x0f]); + } + return out; +} + +UiMessageCapture& UiMessageCapture::instance() { + static UiMessageCapture capture; + return capture; +} + +bool UiMessageCapture::acquire_runtime(std::string* error) { + std::lock_guard lock(mutex_); + if (!ensure_hook_locked(error)) { + return false; + } + ++runtime_refcount_; + return true; +} + +void UiMessageCapture::release_runtime() { + std::lock_guard lock(mutex_); + if (runtime_refcount_ == 0) { + return; + } + --runtime_refcount_; + maybe_unhook_locked(); +} + +bool UiMessageCapture::begin_capture(std::string* error) { + std::lock_guard lock(mutex_); + if (!ensure_hook_locked(error)) { + return false; + } + if (capturing_) { + if (error != nullptr) { + *error = "Python output capture is already active"; + } + return false; + } + buffer_.str(""); + buffer_.clear(); + capturing_ = true; + return true; +} + +std::string UiMessageCapture::end_capture() { + std::lock_guard lock(mutex_); + capturing_ = false; + std::string out = buffer_.str(); + buffer_.str(""); + buffer_.clear(); + maybe_unhook_locked(); + return out; +} + +ssize_t idaapi UiMessageCapture::on_event(ssize_t code, va_list va) { + if (code != ui_msg) { + return 0; + } + + { + std::lock_guard lock(mutex_); + if (!capturing_) { + return 0; + } + } + + const char* format = va_arg(va, const char*); + if (format == nullptr) { + return 0; + } + + // On GCC/Clang, va_list is an array type that decays to a pointer when + // passed through variadic args. On MSVC, va_list is char* (passed by value). + va_list copy; +#ifdef _MSC_VER + va_list format_args = va_arg(va, va_list); + va_copy(copy, format_args); +#else + va_list* format_args = va_arg(va, va_list*); + va_copy(copy, *format_args); +#endif + qstring formatted; + formatted.vsprnt(format, copy); + va_end(copy); + + std::lock_guard lock(mutex_); + if (!capturing_) { + return 0; + } + + buffer_ << formatted.c_str(); + return 1; +} + +bool UiMessageCapture::ensure_hook_locked(std::string* error) { + if (hooked_) { + return true; + } + if (!::hook_event_listener(HT_UI, this, nullptr)) { + if (error != nullptr) { + *error = "Failed to install UI message capture hook"; + } + return false; + } + hooked_ = true; + return true; +} + +void UiMessageCapture::maybe_unhook_locked() { + if (hooked_ && runtime_refcount_ == 0 && !capturing_) { + (void)::unhook_event_listener(HT_UI, this); + hooked_ = false; + } +} + +bool runtime_acquire(std::string* error) { + return UiMessageCapture::instance().acquire_runtime(error); +} + +void runtime_release() { + UiMessageCapture::instance().release_runtime(); +} + +ScopedCapture::ScopedCapture() : active_(UiMessageCapture::instance().begin_capture(&error_)) {} + +ScopedCapture::~ScopedCapture() { + if (active_ && !finished_) { + output_ = UiMessageCapture::instance().end_capture(); + finished_ = true; + } +} + +std::string ScopedCapture::finish() { + if (active_ && !finished_) { + output_ = UiMessageCapture::instance().end_capture(); + finished_ = true; + } + return output_; +} + +extlang_t* get_python_extlang() { + static std::mutex mutex; + static extlang_t* cached = nullptr; + static bool tried = false; + + std::lock_guard lock(mutex); + if (!tried) { + tried = true; + extlang_object_t obj = find_extlang_by_name("Python"); + cached = obj; + } + return cached; +} + +std::string build_namespace_preamble(const std::string& sandbox) { + const std::string sandbox_hex = hex_encode(sandbox); + std::ostringstream wrapped; + wrapped << "import builtins\n" + << "__idasql_sandbox = bytes.fromhex('" << sandbox_hex << "').decode('utf-8')\n" + << "if not hasattr(builtins, '__idasql_namespaces__'):\n" + << " builtins.__idasql_namespaces__ = {}\n" + << "if __idasql_sandbox not in builtins.__idasql_namespaces__:\n" + << " builtins.__idasql_namespaces__[__idasql_sandbox] = globals().copy()\n"; + return wrapped.str(); +} + +std::string build_namespaced_snippet(const std::string& code, const std::string& sandbox) { + const std::string code_hex = hex_encode(code); + std::ostringstream wrapped; + wrapped << build_namespace_preamble(sandbox) + << "__idasql_code = bytes.fromhex('" << code_hex << "').decode('utf-8')\n" + << "exec(__idasql_code, builtins.__idasql_namespaces__[__idasql_sandbox])\n" + << "del __idasql_code\n" + << "del __idasql_sandbox\n"; + return wrapped.str(); +} + +std::string build_namespaced_file_snippet(const std::string& path, const std::string& sandbox) { + const std::string path_hex = hex_encode(path); + std::ostringstream wrapped; + wrapped << "__idasql_path = bytes.fromhex('" << path_hex << "').decode('utf-8')\n" + << "with open(__idasql_path, 'r', encoding='utf-8') as __idasql_file:\n" + << " __idasql_code = __idasql_file.read()\n" + << build_namespace_preamble(sandbox) + << "exec(__idasql_code, builtins.__idasql_namespaces__[__idasql_sandbox])\n" + << "del __idasql_path\n" + << "del __idasql_code\n" + << "del __idasql_sandbox\n"; + return wrapped.str(); +} + +ExecutionResult execute_snippet(const std::string& code, const std::string& sandbox) { + ExecutionResult result; + extlang_t* py = get_python_extlang(); + if (py == nullptr || py->eval_snippet == nullptr) { + result.error = "Python interpreter not available"; + return result; + } + + ScopedCapture capture; + if (!capture.ok()) { + result.error = capture.error(); + return result; + } + + qstring errbuf; + const bool ok = sandbox.empty() + ? py->eval_snippet(code.c_str(), &errbuf) + : py->eval_snippet(build_namespaced_snippet(code, sandbox).c_str(), &errbuf); + + result.output = capture.finish(); + result.success = ok; + if (!ok) { + result.error = errbuf.c_str(); + } + return result; +} + +ExecutionResult execute_file(const std::string& path, const std::string& sandbox) { + ExecutionResult result; + extlang_t* py = get_python_extlang(); + if (py == nullptr) { + result.error = "Python interpreter not available"; + return result; + } + + if (sandbox.empty()) { + if (py->compile_file == nullptr) { + result.error = "Python file execution is not available"; + return result; + } + } else if (py->eval_snippet == nullptr) { + result.error = "Python snippet execution is not available"; + return result; + } + + ScopedCapture capture; + if (!capture.ok()) { + result.error = capture.error(); + return result; + } + + qstring errbuf; + bool ok = false; + if (sandbox.empty()) { + ok = py->compile_file(path.c_str(), nullptr, &errbuf); + } else { + ok = py->eval_snippet(build_namespaced_file_snippet(path, sandbox).c_str(), &errbuf); + } + + result.output = capture.finish(); + result.success = ok; + if (!ok) { + result.error = errbuf.c_str(); + } + return result; +} + +} // namespace idapython +} // namespace idasql diff --git a/src/lib/src/idapython_exec.hpp b/src/lib/src/idapython_exec.hpp new file mode 100644 index 0000000..84a6a05 --- /dev/null +++ b/src/lib/src/idapython_exec.hpp @@ -0,0 +1,81 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +/** + * idapython_exec.hpp - IDAPython bridge + */ + +#pragma once + +#include + +#include +#include +#include +#include +#include + +#include "ida_headers.hpp" + +namespace idasql { +namespace idapython { + +struct ExecutionResult { + bool success = false; + std::string output; + std::string error; +}; + +std::string hex_encode(const std::string& input); + +class UiMessageCapture : public event_listener_t { +public: + static UiMessageCapture& instance(); + + bool acquire_runtime(std::string* error = nullptr); + void release_runtime(); + bool begin_capture(std::string* error = nullptr); + std::string end_capture(); + + virtual ssize_t idaapi on_event(ssize_t code, va_list va) override; + +private: + UiMessageCapture() = default; + + bool ensure_hook_locked(std::string* error); + void maybe_unhook_locked(); + + std::mutex mutex_; + bool hooked_ = false; + bool capturing_ = false; + size_t runtime_refcount_ = 0; + std::ostringstream buffer_; +}; + +bool runtime_acquire(std::string* error = nullptr); +void runtime_release(); + +class ScopedCapture { +public: + ScopedCapture(); + ~ScopedCapture(); + bool ok() const { return active_; } + const std::string& error() const { return error_; } + std::string finish(); + +private: + bool active_ = false; + bool finished_ = false; + std::string error_; + std::string output_; +}; + +extlang_t* get_python_extlang(); +std::string build_namespace_preamble(const std::string& sandbox); +std::string build_namespaced_snippet(const std::string& code, const std::string& sandbox); +std::string build_namespaced_file_snippet(const std::string& path, const std::string& sandbox); +ExecutionResult execute_snippet(const std::string& code, const std::string& sandbox); +ExecutionResult execute_file(const std::string& path, const std::string& sandbox); + +} // namespace idapython +} // namespace idasql diff --git a/src/lib/include/idasql/metadata.hpp b/src/lib/src/metadata.cpp similarity index 57% rename from src/lib/include/idasql/metadata.hpp rename to src/lib/src/metadata.cpp index 97b523d..14ae00d 100644 --- a/src/lib/include/idasql/metadata.hpp +++ b/src/lib/src/metadata.cpp @@ -1,46 +1,20 @@ -/** - * metadata.hpp - IDA database metadata as virtual tables - * - * These tables provide metadata about the database itself, not entities within it. - * Many of these work even without a fully loaded database. - * - * Tables: - * db_info - Database information (processor, file type, etc.) - * ida_info - IDA analysis settings and flags - */ - -#pragma once +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT #include -#include -#include +#include +#include -#include +#include "metadata.hpp" -// IDA SDK headers -#include -#include -#include +#include "ida_headers.hpp" namespace idasql { namespace metadata { +namespace { -// ============================================================================ -// Helper: Key-Value pair for metadata tables -// ============================================================================ - -struct MetadataItem { - std::string key; - std::string value; - std::string type; // "string", "int", "hex", "bool" -}; - -// ============================================================================ -// DB_INFO Table - Database information -// ============================================================================ - -inline void collect_db_info(std::vector& rows) { +static void collect_db_info(std::vector& rows) { rows.clear(); auto add_str = [&](const char* k, const std::string& v) { @@ -58,30 +32,26 @@ inline void collect_db_info(std::vector& rows) { rows.push_back({k, v ? "true" : "false", "bool"}); }; - // Processor info add_str("processor", inf_get_procname().c_str()); add_int("filetype", inf_get_filetype()); add_int("ostype", inf_get_ostype()); add_int("apptype", inf_get_apptype()); - // Address info add_hex("min_ea", inf_get_min_ea()); add_hex("max_ea", inf_get_max_ea()); add_hex("start_ea", inf_get_start_ea()); add_hex("main_ea", inf_get_main()); - // Addressing add_int("cc_id", inf_get_cc_id()); add_bool("is_32bit", !inf_is_64bit()); add_bool("is_64bit", inf_is_64bit()); add_bool("is_be", inf_is_be()); - // Database info add_int("database_change_count", inf_get_database_change_count()); add_int("version", IDA_SDK_VERSION); } -inline CachedTableDef define_db_info() { +static CachedTableDef define_db_info() { return cached_table("db_info") .no_shared_cache() .estimate_rows([]() -> size_t { return 16; }) @@ -100,11 +70,7 @@ inline CachedTableDef define_db_info() { .build(); } -// ============================================================================ -// IDA_INFO Table - IDA analysis flags (from inf structure) -// ============================================================================ - -inline void collect_ida_info(std::vector& rows) { +static void collect_ida_info(std::vector& rows) { rows.clear(); auto add_bool = [&](const char* k, bool v) { @@ -114,23 +80,20 @@ inline void collect_ida_info(std::vector& rows) { rows.push_back({k, std::to_string(v), "int"}); }; - // Analysis flags - add_bool("show_auto", inf_should_create_stkvars()); // approximate + add_bool("show_auto", inf_should_create_stkvars()); add_bool("show_void", inf_is_graph_view()); add_bool("is_dll", inf_is_dll()); add_bool("is_flat", inf_is_flat_off32()); add_bool("wide_fids", inf_is_wide_high_byte_first()); - // Naming add_int("long_demnames", inf_get_long_demnames()); add_int("short_demnames", inf_get_short_demnames()); add_int("demnames", inf_get_demnames()); - // Limits add_int("max_autoname_len", inf_get_max_autoname_len()); } -inline CachedTableDef define_ida_info() { +static CachedTableDef define_ida_info() { return cached_table("ida_info") .no_shared_cache() .estimate_rows([]() -> size_t { return 16; }) @@ -149,27 +112,23 @@ inline CachedTableDef define_ida_info() { .build(); } -// ============================================================================ -// Metadata Registry -// ============================================================================ +} // namespace -struct MetadataRegistry { - CachedTableDef db_info; - CachedTableDef ida_info; +MetadataRegistry::MetadataRegistry() + : db_info(define_db_info()) + , ida_info(define_ida_info()) + , welcome(define_welcome()) {} - MetadataRegistry() - : db_info(define_db_info()) - , ida_info(define_ida_info()) - {} +void MetadataRegistry::register_all(xsql::Database& db) { + db.register_cached_table("ida_db_info", &db_info); + db.create_table("db_info", "ida_db_info"); - void register_all(xsql::Database& db) { - db.register_cached_table("ida_db_info", &db_info); - db.create_table("db_info", "ida_db_info"); + db.register_cached_table("ida_ida_info", &ida_info); + db.create_table("ida_info", "ida_ida_info"); - db.register_cached_table("ida_ida_info", &ida_info); - db.create_table("ida_info", "ida_ida_info"); - } -}; + db.register_cached_table("ida_welcome", &welcome); + db.create_table("welcome", "ida_welcome"); +} } // namespace metadata } // namespace idasql diff --git a/src/lib/src/metadata.hpp b/src/lib/src/metadata.hpp new file mode 100644 index 0000000..4553892 --- /dev/null +++ b/src/lib/src/metadata.hpp @@ -0,0 +1,37 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +/** + * metadata.hpp - IDA database metadata as virtual tables + * + * Tables: db_info, ida_info, welcome + */ + +#pragma once + +#include "metadata_welcome.hpp" +#include +#include + +#include + +namespace idasql { +namespace metadata { + +struct MetadataItem { + std::string key; + std::string value; + std::string type; // "string", "int", "hex", "bool" +}; + +struct MetadataRegistry { + CachedTableDef db_info; + CachedTableDef ida_info; + CachedTableDef welcome; + + MetadataRegistry(); + void register_all(xsql::Database& db); +}; + +} // namespace metadata +} // namespace idasql diff --git a/src/lib/src/metadata_welcome.cpp b/src/lib/src/metadata_welcome.cpp new file mode 100644 index 0000000..4459f9e --- /dev/null +++ b/src/lib/src/metadata_welcome.cpp @@ -0,0 +1,93 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +#include + +#include +#include +#include + +#include "metadata_welcome.hpp" + +#include "ida_headers.hpp" + +namespace idasql { +namespace metadata { +namespace { + +static std::string format_hex_u64(uint64_t value) { + char buf[32]; + qsnprintf(buf, sizeof(buf), "0x%llX", (unsigned long long)value); + return std::string(buf); +} + +static std::string get_primary_entry_name() { + if (get_entry_qty() <= 0) { + return ""; + } + qstring name; + const uval_t ord = get_entry_ordinal(0); + get_entry_name(&name, ord); + return std::string(name.c_str()); +} + +static void collect_welcome(std::vector& rows) { + rows.clear(); + + WelcomeRow row; + row.processor = inf_get_procname().c_str(); + row.is_64bit = inf_is_64bit() ? 1 : 0; + row.min_ea = format_hex_u64(static_cast(inf_get_min_ea())); + row.max_ea = format_hex_u64(static_cast(inf_get_max_ea())); + row.start_ea = format_hex_u64(static_cast(inf_get_start_ea())); + + row.entry_name = get_primary_entry_name(); + if (row.entry_name.empty()) { + qstring fallback_name; + if (get_name(&fallback_name, inf_get_start_ea()) > 0) { + row.entry_name = fallback_name.c_str(); + } + } + + row.funcs_count = static_cast(get_func_qty()); + row.segments_count = static_cast(get_segm_qty()); + row.names_count = static_cast(get_nlist_size()); + + std::ostringstream summary; + summary << row.processor << " " << (row.is_64bit ? "64-bit" : "32-bit"); + if (!row.entry_name.empty()) { + summary << " | entry: " << row.entry_name << " @ " << row.start_ea; + } else { + summary << " | start: " << row.start_ea; + } + summary << " | funcs: " << row.funcs_count; + summary << " | segs: " << row.segments_count; + row.summary = summary.str(); + + rows.push_back(std::move(row)); +} + +} // namespace + +CachedTableDef define_welcome() { + return cached_table("welcome") + .no_shared_cache() + .estimate_rows([]() -> size_t { return 1; }) + .cache_builder([](std::vector& rows) { + collect_welcome(rows); + }) + .column_text("summary", [](const WelcomeRow& row) -> std::string { return row.summary; }) + .column_text("processor", [](const WelcomeRow& row) -> std::string { return row.processor; }) + .column_int("is_64bit", [](const WelcomeRow& row) -> int { return row.is_64bit; }) + .column_text("min_ea", [](const WelcomeRow& row) -> std::string { return row.min_ea; }) + .column_text("max_ea", [](const WelcomeRow& row) -> std::string { return row.max_ea; }) + .column_text("start_ea", [](const WelcomeRow& row) -> std::string { return row.start_ea; }) + .column_text("entry_name", [](const WelcomeRow& row) -> std::string { return row.entry_name; }) + .column_int("funcs_count", [](const WelcomeRow& row) -> int { return row.funcs_count; }) + .column_int("segments_count", [](const WelcomeRow& row) -> int { return row.segments_count; }) + .column_int("names_count", [](const WelcomeRow& row) -> int { return row.names_count; }) + .build(); +} + +} // namespace metadata +} // namespace idasql diff --git a/src/lib/src/metadata_welcome.hpp b/src/lib/src/metadata_welcome.hpp new file mode 100644 index 0000000..80a9eea --- /dev/null +++ b/src/lib/src/metadata_welcome.hpp @@ -0,0 +1,31 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include + +#include + +namespace idasql { +namespace metadata { + +struct WelcomeRow { + std::string summary; + + std::string processor; + int is_64bit = 0; + std::string min_ea; + std::string max_ea; + std::string start_ea; + std::string entry_name; + int funcs_count = 0; + int segments_count = 0; + int names_count = 0; +}; + +CachedTableDef define_welcome(); + +} // namespace metadata +} // namespace idasql diff --git a/src/lib/include/idasql/search_bytes.hpp b/src/lib/src/search_bytes.cpp similarity index 63% rename from src/lib/include/idasql/search_bytes.hpp rename to src/lib/src/search_bytes.cpp index c13c151..a8df7a7 100644 --- a/src/lib/include/idasql/search_bytes.hpp +++ b/src/lib/src/search_bytes.cpp @@ -1,97 +1,32 @@ -// Copyright (c) 2025 Elias Bachaalany +// Copyright (c) Elias Bachaalany // SPDX-License-Identifier: MIT -/** - * search_bytes.hpp - Binary pattern search functions for IDASQL - * - * Provides search_bytes() and search_first() functions for finding byte patterns. - * - * Pattern syntax (IDA native): - * - "48 8B 05" - Exact bytes (hex, space-separated) - * - "48 ? 05" - ? = any byte wildcard (whole byte only) - * - "48 ?? 05" - ?? = same as ? (any byte) - * - "(01 02 03)" - Alternatives (match any of these bytes) - * - * SQL usage: - * SELECT search_bytes('48 8B ? 00'); -- Returns JSON array - * SELECT search_bytes('48 8B ? 00', 0x401000, 0x402000); -- With range - * SELECT search_first('48 8B ? 00'); -- Returns first address - * - * Unlike Binary Ninja: - * - No nibble wildcards (? always means full byte) - * - No regex support - * - Supports alternatives like (01 02 03) - */ - -#pragma once - -#include - -#include -#include -#include -#include -#include +#include "search_bytes.hpp" + #include #include -#include - -// IDA SDK -#include -#include -#include -#include -#include - namespace idasql { namespace search { -// ============================================================================ -// Search Result -// ============================================================================ - -struct ByteSearchResult { - ea_t address; - std::vector matched_bytes; - std::string matched_hex; -}; - -// ============================================================================ -// Binary Pattern Search Implementation -// ============================================================================ - -/** - * Find all matches for a byte pattern in the given range. - * - * @param pattern Pattern string like "48 8B ? 00" or "48 ?? (01 02) 00" - * @param start_ea Start of search range (default: inf_get_min_ea()) - * @param end_ea End of search range (default: inf_get_max_ea()) - * @param results Vector to store results - * @param max_results Maximum results to return (0 = unlimited) - * @return Number of matches found - */ -inline size_t find_byte_pattern( +size_t find_byte_pattern( const char* pattern, ea_t start_ea, ea_t end_ea, std::vector& results, - size_t max_results = 0) + size_t max_results) { if (!pattern || !*pattern) return 0; - // Parse the pattern string compiled_binpat_vec_t binpat; qstring errbuf; if (!parse_binpat_str(&binpat, start_ea, pattern, 16, PBSENC_DEF1BPU, &errbuf)) { - // Pattern parse failed return 0; } if (binpat.empty()) return 0; - // Get pattern length for reading matched bytes size_t pattern_len = binpat[0].bytes.size(); ea_t ea = start_ea; @@ -104,13 +39,11 @@ inline size_t find_byte_pattern( ByteSearchResult result; result.address = found; - // Read matched bytes result.matched_bytes.resize(pattern_len); for (size_t i = 0; i < pattern_len; i++) { result.matched_bytes[i] = get_byte(found + i); } - // Build hex string std::ostringstream hex; hex << std::hex << std::setfill('0'); for (size_t i = 0; i < pattern_len; i++) { @@ -124,18 +57,13 @@ inline size_t find_byte_pattern( if (max_results > 0 && count >= max_results) break; - ea = found + 1; // Move past this match + ea = found + 1; } return count; } -/** - * Find first match for a byte pattern. - * - * @return Address of first match, or BADADDR if not found - */ -inline ea_t find_first_pattern(const char* pattern, ea_t start_ea, ea_t end_ea) { +ea_t find_first_pattern(const char* pattern, ea_t start_ea, ea_t end_ea) { if (!pattern || !*pattern) return BADADDR; compiled_binpat_vec_t binpat; @@ -151,10 +79,9 @@ inline ea_t find_first_pattern(const char* pattern, ea_t start_ea, ea_t end_ea) } // ============================================================================ -// SQL Function Registration +// SQL Function Implementations // ============================================================================ -// search_bytes(pattern) - Returns JSON array of all matches static void sql_search_bytes_1(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { if (argc < 1) { ctx.result_error("search_bytes requires pattern argument"); @@ -173,7 +100,6 @@ static void sql_search_bytes_1(xsql::FunctionContext& ctx, int argc, xsql::Funct std::vector results; find_byte_pattern(pattern, start_ea, end_ea, results); - // Build JSON array xsql::json arr = xsql::json::array(); for (const auto& r : results) { arr.push_back({ @@ -187,7 +113,6 @@ static void sql_search_bytes_1(xsql::FunctionContext& ctx, int argc, xsql::Funct ctx.result_text(result); } -// search_bytes(pattern, start, end) - Returns JSON array within range static void sql_search_bytes_3(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { if (argc < 3) { ctx.result_error("search_bytes requires (pattern, start, end) arguments"); @@ -206,7 +131,6 @@ static void sql_search_bytes_3(xsql::FunctionContext& ctx, int argc, xsql::Funct std::vector results; find_byte_pattern(pattern, start_ea, end_ea, results); - // Build JSON array xsql::json arr = xsql::json::array(); for (const auto& r : results) { arr.push_back({ @@ -220,7 +144,6 @@ static void sql_search_bytes_3(xsql::FunctionContext& ctx, int argc, xsql::Funct ctx.result_text(result); } -// search_first(pattern) - Returns first match address static void sql_search_first_1(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { if (argc < 1) { ctx.result_error("search_first requires pattern argument"); @@ -241,7 +164,6 @@ static void sql_search_first_1(xsql::FunctionContext& ctx, int argc, xsql::Funct } } -// search_first(pattern, start, end) - Returns first match in range static void sql_search_first_3(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* argv) { if (argc < 3) { ctx.result_error("search_first requires (pattern, start, end) arguments"); @@ -265,10 +187,7 @@ static void sql_search_first_3(xsql::FunctionContext& ctx, int argc, xsql::Funct } } -/** - * Register all search_bytes SQL functions. - */ -inline bool register_search_bytes(xsql::Database& db) { +bool register_search_bytes(xsql::Database& db) { db.register_function("search_bytes", 1, xsql::ScalarFn(sql_search_bytes_1)); db.register_function("search_bytes", 3, xsql::ScalarFn(sql_search_bytes_3)); db.register_function("search_first", 1, xsql::ScalarFn(sql_search_first_1)); diff --git a/src/lib/src/search_bytes.hpp b/src/lib/src/search_bytes.hpp new file mode 100644 index 0000000..15ede3e --- /dev/null +++ b/src/lib/src/search_bytes.hpp @@ -0,0 +1,41 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +/** + * search_bytes.hpp - Binary pattern search functions for IDASQL + */ + +#pragma once + +#include + +#include +#include +#include +#include +#include + +#include "ida_headers.hpp" + +namespace idasql { +namespace search { + +struct ByteSearchResult { + ea_t address; + std::vector matched_bytes; + std::string matched_hex; +}; + +size_t find_byte_pattern( + const char* pattern, + ea_t start_ea, + ea_t end_ea, + std::vector& results, + size_t max_results = 0); + +ea_t find_first_pattern(const char* pattern, ea_t start_ea, ea_t end_ea); + +bool register_search_bytes(xsql::Database& db); + +} // namespace search +} // namespace idasql diff --git a/src/lib/src/ui_context_provider.cpp b/src/lib/src/ui_context_provider.cpp new file mode 100644 index 0000000..68f95c1 --- /dev/null +++ b/src/lib/src/ui_context_provider.cpp @@ -0,0 +1,917 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +#include + +#include +#include +#include +#include +#include +#include + +#include "ida_headers.hpp" + +#include +#include "idapython_exec.hpp" +#include + +namespace idasql { +namespace ui_context { +namespace { + +static const int k_selection_preview_lines = 10; +static const char* k_capture_action_name = "idasql:capture_ui_context"; +static const char* k_snapshot_marker = "__IDASQL_UI_CTX__"; + +struct UiActionContextSnapshot { + bool valid = false; + + int widget_type = -1; + std::string widget_title; + + bool has_cur_ea = false; + uint64_t cur_ea = 0; + + bool focus_known = false; + bool focus = false; + + uint64_t sequence = 0; + int64_t timestamp_ms = 0; + + xsql::json chooser_rows = xsql::json::array(); +}; + +struct CaptureMetadata { + std::string source = "viewer_fallback"; + bool fresh = false; + bool have_sequence = false; + uint64_t sequence = 0; + bool have_timestamp = false; + int64_t timestamp_ms = 0; + std::string error; +}; + +struct ContextSourceData { + bool has_widget = false; + twidget_type_t widget_type = static_cast(-1); + bool have_title = false; + std::string widget_title; + + bool focus_known = false; + bool focus = false; + + ea_t current_ea = BADADDR; + TWidget* viewer = nullptr; + + bool have_chooser_rows = false; + xsql::json chooser_rows = xsql::json::array(); +}; + +static const char* k_install_helper_code = R"PY( +import builtins +import json +import time +import idaapi +import ida_kernwin + +_ACTION_NAME = "idasql:capture_ui_context" +_SNAPSHOT_KEY = "__idasql_ui_ctx_snapshot_json__" +_SEQUENCE_KEY = "__idasql_ui_ctx_sequence__" +_HANDLER_KEY = "__idasql_ui_ctx_handlers__" + +def _idasql_unregister_action(): + try: + if ida_kernwin.get_action_state(_ACTION_NAME) is not None: + ida_kernwin.unregister_action(_ACTION_NAME) + except Exception: + try: + ida_kernwin.unregister_action(_ACTION_NAME) + except Exception: + pass + +class _IDASQLCaptureHandler(ida_kernwin.action_handler_t): + def __init__(self): + ida_kernwin.action_handler_t.__init__(self) + + def activate(self, ctx): + snap = {} + seq = int(getattr(builtins, _SEQUENCE_KEY, 0)) + 1 + setattr(builtins, _SEQUENCE_KEY, seq) + + snap["sequence"] = seq + snap["timestamp_ms"] = int(time.time() * 1000) + snap["widget_type"] = None + snap["widget_title"] = "" + snap["cur_ea"] = None + snap["focus"] = bool(getattr(ctx, "focus")) if hasattr(ctx, "focus") else None + + widget = getattr(ctx, "widget", None) + if widget is not None: + try: + wt = ida_kernwin.get_widget_type(widget) + snap["widget_type"] = int(wt) + except Exception: + pass + + try: + title = ida_kernwin.get_widget_title(widget) + if title is not None: + snap["widget_title"] = str(title) + except Exception: + pass + + if snap["widget_type"] is None and hasattr(ctx, "widget_type"): + try: + snap["widget_type"] = int(getattr(ctx, "widget_type")) + except Exception: + pass + + if not snap["widget_title"] and hasattr(ctx, "widget_title"): + try: + title = getattr(ctx, "widget_title") + if title is not None: + snap["widget_title"] = str(title) + except Exception: + pass + + try: + cur_ea = int(getattr(ctx, "cur_ea", idaapi.BADADDR)) + if cur_ea != idaapi.BADADDR: + snap["cur_ea"] = cur_ea + except Exception: + pass + + rows = [] + title = snap.get("widget_title", "") + if title: + selected = None + try: + selected = ida_kernwin.get_chooser_rows(title, ida_kernwin.GCRF_SELECTION) + except Exception: + selected = None + if not selected: + try: + selected = ida_kernwin.get_chooser_rows(title, ida_kernwin.GCRF_CURRENT) + except Exception: + selected = None + if selected: + for row in selected: + cols = [] + for text in getattr(row, "texts", []): + cols.append(str(text)) + icon = getattr(row, "icon", -1) + try: + icon = int(icon) + except Exception: + icon = -1 + rows.append({"columns": cols, "icon": icon}) + + snap["chooser_rows"] = rows + setattr(builtins, _SNAPSHOT_KEY, json.dumps(snap, ensure_ascii=True)) + return 1 + + def update(self, ctx): + return ida_kernwin.AST_ENABLE_ALWAYS + +_idasql_unregister_action() +handler = _IDASQLCaptureHandler() +desc = ida_kernwin.action_desc_t( + _ACTION_NAME, + "idasql: Capture UI context", + handler, + None, + "Capture UI context for SQL context queries", + -1 +) +if not ida_kernwin.register_action(desc): + raise RuntimeError("Failed to register idasql capture action") + +if not hasattr(builtins, _HANDLER_KEY): + setattr(builtins, _HANDLER_KEY, {}) +getattr(builtins, _HANDLER_KEY)[_ACTION_NAME] = handler +)PY"; + +static const char* k_uninstall_helper_code = R"PY( +import builtins +import ida_kernwin + +_ACTION_NAME = "idasql:capture_ui_context" +_HANDLER_KEY = "__idasql_ui_ctx_handlers__" + +try: + if ida_kernwin.get_action_state(_ACTION_NAME) is not None: + ida_kernwin.unregister_action(_ACTION_NAME) +except Exception: + try: + ida_kernwin.unregister_action(_ACTION_NAME) + except Exception: + pass + +if hasattr(builtins, _HANDLER_KEY): + handlers = getattr(builtins, _HANDLER_KEY) + if isinstance(handlers, dict) and _ACTION_NAME in handlers: + del handlers[_ACTION_NAME] +)PY"; + +static const char* k_read_snapshot_code = R"PY( +import builtins +snapshot = getattr(builtins, "__idasql_ui_ctx_snapshot_json__", "") +print("__IDASQL_UI_CTX__" + str(snapshot)) +)PY"; + +static std::mutex g_capture_mutex; +static bool g_helper_installed = false; + +static bool is_custom_view_widget_type(twidget_type_t widget_type) { + return widget_type == BWN_DISASM || + widget_type == BWN_HEXVIEW || + widget_type == BWN_PSEUDOCODE || + widget_type == BWN_TILIST; +} + +static bool is_address_widget_type(twidget_type_t widget_type) { + return widget_type == BWN_DISASM || + widget_type == BWN_HEXVIEW || + widget_type == BWN_PSEUDOCODE; +} + +static bool is_chooser_like_widget_type(twidget_type_t widget_type) { + return is_chooser_widget(widget_type) || + widget_type == BWN_FUNCS || + widget_type == BWN_NAMES || + widget_type == BWN_IMPORTS || + widget_type == BWN_BPTS || + widget_type == BWN_TITREE; +} + +static const char* widget_type_name(twidget_type_t widget_type) { + switch (widget_type) { + case BWN_DISASM: return "BWN_DISASM"; + case BWN_HEXVIEW: return "BWN_HEXVIEW"; + case BWN_PSEUDOCODE: return "BWN_PSEUDOCODE"; + case BWN_TILIST: return "BWN_TILIST"; + case BWN_OUTPUT: return "BWN_OUTPUT"; + case BWN_CLI: return "BWN_CLI"; + case BWN_FUNCS: return "BWN_FUNCS"; + case BWN_NAMES: return "BWN_NAMES"; + case BWN_IMPORTS: return "BWN_IMPORTS"; + case BWN_BPTS: return "BWN_BPTS"; + case BWN_TITREE: return "BWN_TITREE"; + default: break; + } + return nullptr; +} + +static std::string widget_type_name_or_unknown(twidget_type_t widget_type) { + const char* known = widget_type_name(widget_type); + if (known != nullptr) { + return known; + } + std::ostringstream out; + out << "UNKNOWN_" << static_cast(widget_type); + return out.str(); +} + +static std::string format_hex_ea(ea_t ea) { + std::ostringstream out; + out << "0x" << std::hex << static_cast(ea); + return out.str(); +} + +static xsql::json make_selection_object(const char* kind) { + return xsql::json{ + {"kind", kind}, + {"start", nullptr}, + {"end", nullptr}, + {"text_lines", xsql::json::array()}, + {"line_count", 0}, + {"rows", xsql::json::array()}, + {"row_count", 0} + }; +} + +static TWidget* get_authoritative_main_viewer() { + TWidget* current_viewer = get_current_viewer(); + if (current_viewer != nullptr) { + return current_viewer; + } + return get_current_widget(); +} + +static void merge_from_active_widget(ContextSourceData& source, TWidget* viewer) { + if (viewer == nullptr) { + return; + } + + source.viewer = viewer; + source.widget_type = get_widget_type(viewer); + source.has_widget = true; + + qstring title; + if (get_widget_title(&title, viewer)) { + source.widget_title = title.c_str(); + source.have_title = !source.widget_title.empty(); + } else { + source.widget_title.clear(); + source.have_title = false; + } + + if (!source.has_widget || !is_address_widget_type(source.widget_type)) { + // Do not keep stale action-context addresses for non-address widgets. + source.current_ea = BADADDR; + return; + } + + const ea_t candidate_ea = get_screen_ea(); + if (candidate_ea != BADADDR) { + source.current_ea = candidate_ea; + } +} + +static bool append_chooser_row( + const std::vector& columns, + int icon, + xsql::json& selection) { + if (columns.empty()) { + return false; + } + + xsql::json columns_json = xsql::json::array(); + std::string display; + for (const std::string& value : columns) { + columns_json.push_back(value); + if (!display.empty()) { + display += " | "; + } + display += value; + } + + xsql::json icon_value = nullptr; + if (icon >= 0) { + icon_value = icon; + } + + selection["rows"].push_back({ + {"columns", std::move(columns_json)}, + {"display", display}, + {"icon", std::move(icon_value)} + }); + selection["text_lines"].push_back(display); + return true; +} + +static bool extract_selection_preview_lines( + TWidget* viewer, + const twinpos_t& selection_start, + const twinpos_t& selection_end, + xsql::json& out_lines) { + if (viewer == nullptr || selection_start.at == nullptr || selection_end.at == nullptr) { + return false; + } + + void* user_data = get_viewer_user_data(viewer); + if (user_data == nullptr) { + return false; + } + + linearray_t line_array(user_data); + line_array.set_place(selection_start.at); + + int line_count = 0; + while (line_count < k_selection_preview_lines) { + const place_t* current_place = line_array.get_place(); + if (current_place == nullptr) { + break; + } + + const int first_line_ref = l_compare2(current_place, selection_start.at, user_data); + const int last_line_ref = l_compare2(current_place, selection_end.at, user_data); + if (last_line_ref > 0) { + break; + } + + const qstring* tagged_line = line_array.down(); + if (tagged_line == nullptr) { + break; + } + + qstring plain = *tagged_line; + tag_remove(&plain); + std::string line = plain.c_str(); + + if (last_line_ref == 0 && selection_end.x >= 0) { + const size_t end_column = static_cast(selection_end.x); + if (end_column < line.size()) { + line.resize(end_column); + } + } else if (first_line_ref == 0 && selection_start.x > 0) { + const size_t start_column = static_cast(selection_start.x); + if (start_column < line.size()) { + line = std::string(start_column, ' ') + line.substr(start_column); + } else { + line = std::string(start_column, ' '); + } + } + + out_lines.push_back(line); + ++line_count; + } + + return line_count > 0; +} + +static bool extract_selection_preview_lines_from_eas( + const twinpos_t& selection_start, + const twinpos_t& selection_end, + xsql::json& out_lines) { + if (selection_start.at == nullptr || selection_end.at == nullptr) { + return false; + } + + ea_t start_ea = selection_start.at->toea(); + ea_t end_ea = selection_end.at->toea(); + if (start_ea == BADADDR || end_ea == BADADDR) { + return false; + } + + if (end_ea < start_ea) { + std::swap(start_ea, end_ea); + } + + const size_t original_size = out_lines.size(); + const size_t max_lines = static_cast(k_selection_preview_lines); + ea_t ea = start_ea; + while (ea != BADADDR && ea <= end_ea && out_lines.size() < max_lines) { + qstring plain_line; + if (generate_disasm_line(&plain_line, ea, GENDSM_REMOVE_TAGS | GENDSM_UNHIDE)) { + out_lines.push_back(std::string(plain_line.c_str())); + } + + const ea_t next = next_head(ea, end_ea + 1); + if (next == BADADDR || next <= ea) { + break; + } + ea = next; + } + + return out_lines.size() > original_size; +} + +static bool extract_listing_selection(TWidget* viewer, xsql::json& out_selection) { + twinpos_t selection_start; + twinpos_t selection_end; + if (!read_selection(viewer, &selection_start, &selection_end)) { + return false; + } + + xsql::json selection = make_selection_object("listing"); + if (selection_start.at != nullptr) { + const ea_t start_ea = selection_start.at->toea(); + if (start_ea != BADADDR) { + selection["start"] = format_hex_ea(start_ea); + } + } + + if (selection_end.at != nullptr) { + const ea_t end_ea = selection_end.at->toea(); + if (end_ea != BADADDR) { + selection["end"] = format_hex_ea(end_ea); + } + } + + (void) extract_selection_preview_lines(viewer, selection_start, selection_end, selection["text_lines"]); + if (selection["text_lines"].empty()) { + (void) extract_selection_preview_lines_from_eas(selection_start, selection_end, selection["text_lines"]); + } + selection["line_count"] = selection["text_lines"].size(); + + const bool has_address_range = !selection["start"].is_null() || !selection["end"].is_null(); + const bool has_text_lines = !selection["text_lines"].empty(); + if (!has_address_range && !has_text_lines) { + return false; + } + + out_selection = std::move(selection); + return true; +} + +static bool extract_chooser_selection_from_widget_title( + const std::string& widget_title, + xsql::json& out_selection) { + if (widget_title.empty()) { + return false; + } + + chooser_row_info_vec_t rows; + bool have_rows = get_chooser_rows(&rows, widget_title.c_str(), GCRF_SELECTION); + if (!have_rows || rows.empty()) { + have_rows = get_chooser_rows(&rows, widget_title.c_str(), GCRF_CURRENT); + } + if (!have_rows || rows.empty()) { + return false; + } + + xsql::json selection = make_selection_object("chooser"); + for (const chooser_row_info_t& row : rows) { + std::vector columns; + columns.reserve(row.texts.size()); + for (const qstring& text : row.texts) { + columns.push_back(text.c_str()); + } + (void) append_chooser_row(columns, row.icon, selection); + } + + selection["line_count"] = selection["text_lines"].size(); + selection["row_count"] = selection["rows"].size(); + if (selection["row_count"].get() == 0) { + return false; + } + + out_selection = std::move(selection); + return true; +} + +static bool extract_chooser_selection_from_snapshot_rows( + const xsql::json& chooser_rows, + xsql::json& out_selection) { + if (!chooser_rows.is_array() || chooser_rows.empty()) { + return false; + } + + xsql::json selection = make_selection_object("chooser"); + for (const xsql::json& row : chooser_rows) { + if (!row.is_object()) { + continue; + } + + std::vector columns; + if (row.contains("columns") && row["columns"].is_array()) { + for (const xsql::json& col : row["columns"]) { + if (col.is_string()) { + columns.push_back(col.get()); + } else if (!col.is_null()) { + columns.push_back(col.dump()); + } + } + } + + int icon = -1; + if (row.contains("icon") && row["icon"].is_number_integer()) { + icon = row["icon"].get(); + } + + (void) append_chooser_row(columns, icon, selection); + } + + selection["line_count"] = selection["text_lines"].size(); + selection["row_count"] = selection["rows"].size(); + if (selection["row_count"].get() == 0) { + return false; + } + + out_selection = std::move(selection); + return true; +} + +static bool execute_python_helper( + const char* snippet, + std::string* error, + std::string* output) { + idasql::idapython::ExecutionResult result = + idasql::idapython::execute_snippet(snippet, "idasql.ui_context_capture"); + if (output != nullptr) { + *output = result.output; + } + if (!result.success) { + if (error != nullptr) { + if (!result.error.empty()) { + *error = result.error; + } else { + *error = "Python helper execution failed"; + } + } + return false; + } + return true; +} + +static bool install_helper_unlocked(std::string* error) { + if (g_helper_installed) { + return true; + } + + if (!execute_python_helper(k_install_helper_code, error, nullptr)) { + return false; + } + + g_helper_installed = true; + return true; +} + +static bool extract_marker_payload( + const std::string& output, + const char* marker, + std::string* payload) { + const std::string marker_text = marker; + const size_t marker_pos = output.rfind(marker_text); + if (marker_pos == std::string::npos) { + return false; + } + + const size_t payload_start = marker_pos + marker_text.size(); + size_t payload_end = output.find('\n', payload_start); + if (payload_end == std::string::npos) { + payload_end = output.size(); + } + + *payload = output.substr(payload_start, payload_end - payload_start); + return true; +} + +static bool parse_snapshot_json( + const std::string& output, + UiActionContextSnapshot& out_snapshot, + std::string* error) { + std::string payload; + if (!extract_marker_payload(output, k_snapshot_marker, &payload)) { + if (error != nullptr) { + *error = "Context snapshot marker not found in Python output"; + } + return false; + } + + if (payload.empty()) { + if (error != nullptr) { + *error = "No action context snapshot available"; + } + return false; + } + + xsql::json snapshot_json; + try { + snapshot_json = xsql::json::parse(payload); + } catch (const std::exception& ex) { + if (error != nullptr) { + *error = std::string("Failed to parse action snapshot JSON: ") + ex.what(); + } + return false; + } + + UiActionContextSnapshot parsed; + parsed.valid = true; + + if (snapshot_json.contains("widget_type") && snapshot_json["widget_type"].is_number_integer()) { + parsed.widget_type = snapshot_json["widget_type"].get(); + } + if (snapshot_json.contains("widget_title") && snapshot_json["widget_title"].is_string()) { + parsed.widget_title = snapshot_json["widget_title"].get(); + } + if (snapshot_json.contains("cur_ea") && snapshot_json["cur_ea"].is_number_integer()) { + parsed.has_cur_ea = true; + parsed.cur_ea = snapshot_json["cur_ea"].get(); + } + if (snapshot_json.contains("focus") && snapshot_json["focus"].is_boolean()) { + parsed.focus_known = true; + parsed.focus = snapshot_json["focus"].get(); + } + if (snapshot_json.contains("sequence") && snapshot_json["sequence"].is_number_integer()) { + const int64_t seq = snapshot_json["sequence"].get(); + if (seq > 0) { + parsed.sequence = static_cast(seq); + } + } + if (snapshot_json.contains("timestamp_ms") && snapshot_json["timestamp_ms"].is_number_integer()) { + parsed.timestamp_ms = snapshot_json["timestamp_ms"].get(); + } + if (snapshot_json.contains("chooser_rows") && snapshot_json["chooser_rows"].is_array()) { + parsed.chooser_rows = snapshot_json["chooser_rows"]; + } + + out_snapshot = std::move(parsed); + return true; +} + +static bool capture_ui_action_context(UiActionContextSnapshot& out_snapshot, std::string* error) { + std::lock_guard lock(g_capture_mutex); + + if (!install_helper_unlocked(error)) { + return false; + } + + if (!process_ui_action(k_capture_action_name)) { + if (error != nullptr) { + *error = "Failed to dispatch UI capture action"; + } + return false; + } + + std::string output; + if (!execute_python_helper(k_read_snapshot_code, error, &output)) { + return false; + } + + return parse_snapshot_json(output, out_snapshot, error); +} + +static bool resolve_action_context_source(ContextSourceData& source, CaptureMetadata& capture) { + UiActionContextSnapshot snapshot; + std::string capture_error; + if (!capture_ui_action_context(snapshot, &capture_error)) { + capture.error = capture_error; + return false; + } + + capture.source = "action"; + capture.fresh = true; + if (snapshot.sequence > 0) { + capture.have_sequence = true; + capture.sequence = snapshot.sequence; + } + if (snapshot.timestamp_ms > 0) { + capture.have_timestamp = true; + capture.timestamp_ms = snapshot.timestamp_ms; + } + + if (snapshot.widget_type >= 0) { + source.has_widget = true; + source.widget_type = static_cast(snapshot.widget_type); + } + if (!snapshot.widget_title.empty()) { + source.have_title = true; + source.widget_title = snapshot.widget_title; + } + if (snapshot.has_cur_ea) { + source.current_ea = static_cast(snapshot.cur_ea); + } + if (snapshot.focus_known) { + source.focus_known = true; + source.focus = snapshot.focus; + } + if (snapshot.chooser_rows.is_array() && !snapshot.chooser_rows.empty()) { + source.have_chooser_rows = true; + source.chooser_rows = snapshot.chooser_rows; + } + + merge_from_active_widget(source, get_authoritative_main_viewer()); + return true; +} + +static void resolve_viewer_fallback_source(ContextSourceData& source) { + merge_from_active_widget(source, get_authoritative_main_viewer()); +} + +static void populate_code_context_json(const ContextSourceData& source, xsql::json& out_code_context) { + if (source.current_ea != BADADDR) { + xsql::json code_context = { + {"has_address", true}, + {"address", format_hex_ea(source.current_ea)} + }; + + if (func_t* current_func = get_func(source.current_ea); current_func != nullptr) { + qstring func_name; + const bool have_func_name = get_func_name(&func_name, current_func->start_ea) > 0; + code_context["function"] = { + {"name", have_func_name ? func_name.c_str() : ""}, + {"start", format_hex_ea(current_func->start_ea)}, + {"end", format_hex_ea(current_func->end_ea)}, + {"size", static_cast(current_func->end_ea - current_func->start_ea)} + }; + } + + if (segment_t* current_seg = getseg(source.current_ea); current_seg != nullptr) { + qstring seg_name; + const bool have_seg_name = get_segm_name(&seg_name, current_seg) > 0; + code_context["segment"] = { + {"name", have_seg_name ? seg_name.c_str() : ""}, + {"start", format_hex_ea(current_seg->start_ea)}, + {"end", format_hex_ea(current_seg->end_ea)} + }; + } + + out_code_context = std::move(code_context); + return; + } + + if (!source.has_widget) { + out_code_context = { + {"has_address", false}, + {"reason", "No active widget"} + }; + return; + } + + std::ostringstream reason; + reason << "Not in address view (type: " << widget_type_name_or_unknown(source.widget_type) << ")"; + out_code_context = { + {"has_address", false}, + {"reason", reason.str()} + }; +} + +static xsql::json build_ui_context_json(const ContextSourceData& source, const CaptureMetadata& capture) { + const bool is_custom_view = source.has_widget && is_custom_view_widget_type(source.widget_type); + const bool is_chooser_like = source.has_widget && is_chooser_like_widget_type(source.widget_type); + + xsql::json type_id = nullptr; + std::string type_name = "unknown"; + if (source.has_widget) { + type_id = static_cast(source.widget_type); + type_name = widget_type_name_or_unknown(source.widget_type); + } + + xsql::json result = { + {"capture", { + {"source", capture.source}, + {"fresh", capture.fresh}, + {"sequence", capture.have_sequence ? xsql::json(capture.sequence) : xsql::json(nullptr)}, + {"timestamp_ms", capture.have_timestamp ? xsql::json(capture.timestamp_ms) : xsql::json(nullptr)}, + {"error", capture.error.empty() ? xsql::json(nullptr) : xsql::json(capture.error)} + }}, + {"focused_widget", { + {"type_id", type_id}, + {"type_name", type_name}, + {"title", source.have_title ? source.widget_title : std::string()}, + {"is_custom_view", is_custom_view}, + {"is_chooser_like", is_chooser_like} + }}, + {"main_viewer", { + {"type_id", type_id}, + {"type_name", type_name}, + {"title", source.have_title ? source.widget_title : std::string()}, + {"is_custom_view", is_custom_view} + }}, + {"code_context", xsql::json::object()}, + {"selection", nullptr} + }; + + if (source.focus_known) { + result["focused_widget"]["focus"] = source.focus; + } + + populate_code_context_json(source, result["code_context"]); + + xsql::json selection; + bool have_selection = false; + if (source.viewer != nullptr) { + have_selection = extract_listing_selection(source.viewer, selection); + } + if (!have_selection && source.have_chooser_rows && + source.has_widget && is_chooser_like_widget_type(source.widget_type)) { + have_selection = extract_chooser_selection_from_snapshot_rows(source.chooser_rows, selection); + } + if (!have_selection && source.have_title && + (!source.has_widget || is_chooser_like_widget_type(source.widget_type))) { + have_selection = extract_chooser_selection_from_widget_title(source.widget_title, selection); + } + if (have_selection) { + result["selection"] = std::move(selection); + } + + return result; +} + +} // namespace + +bool initialize_capture_helper(std::string* error) { + std::lock_guard lock(g_capture_mutex); + return install_helper_unlocked(error); +} + +void shutdown_capture_helper() { + std::lock_guard lock(g_capture_mutex); + if (!g_helper_installed) { + return; + } + std::string ignored; + (void) execute_python_helper(k_uninstall_helper_code, &ignored, nullptr); + g_helper_installed = false; +} + +xsql::json get_ui_context_json() { + ContextSourceData source; + CaptureMetadata capture; + + if (!resolve_action_context_source(source, capture)) { + capture.source = "viewer_fallback"; + capture.fresh = false; + if (capture.error.empty()) { + capture.error = "Action capture unavailable"; + } + resolve_viewer_fallback_source(source); + } + + if (!capture.fresh && !source.has_widget) { + const std::string unavailable_hint = "UI context unavailable in current runtime (no active widget)"; + if (capture.error.empty()) { + capture.error = unavailable_hint; + } else if (capture.error.find(unavailable_hint) == std::string::npos) { + capture.error += "; " + unavailable_hint; + } + } + + return build_ui_context_json(source, capture); +} + +} // namespace ui_context +} // namespace idasql diff --git a/src/plugin/CMakeLists.txt b/src/plugin/CMakeLists.txt index ecc34fd..4a3fb71 100644 --- a/src/plugin/CMakeLists.txt +++ b/src/plugin/CMakeLists.txt @@ -3,7 +3,9 @@ # Plugin sources ida_add_plugin(idasql_plugin - SOURCES main.cpp + SOURCES + main.cpp + ui_context_function.cpp ) # Output as idasql.dll (not idasql_plugin.dll) @@ -13,11 +15,11 @@ set_target_properties(idasql_plugin PROPERTIES OUTPUT_NAME "idasql") target_include_directories(idasql_plugin PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../lib/include ) -target_link_libraries(idasql_plugin PRIVATE xsql::xsql) +target_link_libraries(idasql_plugin PRIVATE idasql xsql::xsql) # USE_DANGEROUS_FUNCTIONS: Disable IDA's safe function macros (pro.h: strcpy, sprintf, getenv, etc.) # USE_STANDARD_FILE_FUNCTIONS: Disable IDA's file function macros (fpro.h: fgetc, fputc, etc.) -# Required for std::getenv and file I/O in agent_settings.hpp +# Required for std::lib/file APIs used by plugin and shared libraries. target_compile_definitions(idasql_plugin PRIVATE USE_DANGEROUS_FUNCTIONS USE_STANDARD_FILE_FUNCTIONS @@ -39,33 +41,25 @@ endif() target_compile_definitions(idasql_plugin PRIVATE USE_HEXRAYS) # ============================================================================ -# AI Agent support (from parent project) +# MCP support (from parent project) # ============================================================================ -if(IDASQL_WITH_AI_AGENT) - # Add ai_agent and mcp_server source files +if(IDASQL_WITH_MCP) + # Add MCP server source files target_sources(idasql_plugin PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/../common/ai_agent.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../common/mcp_server.cpp ) target_include_directories(idasql_plugin PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common - ${CMAKE_CURRENT_SOURCE_DIR}/../../external/libagents/external/fastmcpp/include - ${IDASQL_GENERATED_INCLUDE_DIR} ) - # Link against libagents and fastmcpp - target_link_libraries(idasql_plugin PRIVATE libagents fastmcpp_core) + # Link against fastmcpp + target_link_libraries(idasql_plugin PRIVATE fastmcpp_core) # Define the feature flag - target_compile_definitions(idasql_plugin PRIVATE IDASQL_HAS_AI_AGENT) + target_compile_definitions(idasql_plugin PRIVATE IDASQL_HAS_MCP) - # Add dependency on prompt generation - if(TARGET generate_prompt) - add_dependencies(idasql_plugin generate_prompt) - endif() - - message(STATUS "idasql_plugin: AI agent support enabled") + message(STATUS "idasql_plugin: MCP support enabled") endif() # ============================================================================ @@ -74,6 +68,7 @@ endif() target_sources(idasql_plugin PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common/http_server.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../common/welcome_query.cpp ) target_include_directories(idasql_plugin PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common diff --git a/src/plugin/main.cpp b/src/plugin/main.cpp index 5f501ba..72d4ae1 100644 --- a/src/plugin/main.cpp +++ b/src/plugin/main.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + /** * idasql_plugin - IDA plugin providing SQL interface to IDA databases * @@ -34,23 +37,22 @@ #include #ifdef _WIN32 -// Include shlobj.h BEFORE IDA headers: agent_settings.hpp pulls in -// which defines CM_MASK/CM_STATE enums in shobjidl_core.h. IDA's typeinf.hpp -// also defines CM_MASK (const uchar). Including Windows headers first lets -// IDA's definition shadow the Windows enum without conflict. -#include #include #include #include #include #include #include +#include +#include #else #include #include #include #include #include +#include +#include #include #endif @@ -59,18 +61,19 @@ // Plugin control codes #include "../common/plugin_control.hpp" +#include "ui_context_function.hpp" // Version info #include "../common/idasql_version.hpp" -// MCP server (when AI agent is enabled) -#ifdef IDASQL_HAS_AI_AGENT +// MCP server (optional) +#ifdef IDASQL_HAS_MCP #include "../common/mcp_server.hpp" -#include "../common/ai_agent.hpp" #endif // HTTP server for .http REPL command #include "../common/http_server.hpp" +#include "../common/json_utils.hpp" //============================================================================= // IDA execute_sync wrapper @@ -118,10 +121,10 @@ struct idasql_plugmod_t : public plugmod_t std::mutex query_meta_mutex_; std::string active_query_; std::chrono::steady_clock::time_point active_query_started_{}; + bool idapython_runtime_acquired_ = false; -#ifdef IDASQL_HAS_AI_AGENT +#ifdef IDASQL_HAS_MCP idasql::IDAMCPServer mcp_server_; - std::unique_ptr mcp_agent_; // AI agent for MCP #endif idasql::IDAHTTPServer http_server_; @@ -152,8 +155,22 @@ struct idasql_plugmod_t : public plugmod_t { engine_ = std::make_unique(); if (engine_->is_valid()) { + if (!idasql::plugin_functions::register_ui_context_sql_functions(engine_->database())) { + msg("IDASQL: Failed to register get_ui_context_json()\n"); + } msg("IDASQL v" IDASQL_VERSION_STRING ": Query engine initialized\n"); + std::string py_capture_error; + idapython_runtime_acquired_ = idasql::idapython::runtime_acquire(&py_capture_error); + if (!idapython_runtime_acquired_) { + msg("IDASQL: IDAPython capture runtime init failed: %s\n", py_capture_error.c_str()); + } + + std::string ui_context_capture_error; + if (!idasql::ui_context::initialize_capture_helper(&ui_context_capture_error)) { + msg("IDASQL: UI context capture helper init failed: %s\n", ui_context_capture_error.c_str()); + } + // SQL executor that uses execute_sync for thread safety auto sql_executor = [this](const std::string& sql) -> std::string { idasql::QueryResult result = run_query_sync(sql); @@ -167,11 +184,11 @@ struct idasql_plugmod_t : public plugmod_t // Create CLI with execute_sync wrapper for thread safety cli_ = std::make_unique(sql_executor); -#ifdef IDASQL_HAS_AI_AGENT +#ifdef IDASQL_HAS_MCP // Setup MCP callbacks cli_->session().callbacks().mcp_status = [this]() -> std::string { if (mcp_server_.is_running()) { - return idasql::format_mcp_status(mcp_server_.port(), true); + return idasql::format_mcp_status(mcp_server_.port(), true, mcp_server_.bind_addr()); } else { // Auto-start if not running return start_mcp_server(); @@ -185,7 +202,6 @@ struct idasql_plugmod_t : public plugmod_t cli_->session().callbacks().mcp_stop = [this]() -> std::string { if (mcp_server_.is_running()) { mcp_server_.stop(); - mcp_agent_.reset(); return "MCP server stopped"; } else { return "MCP server not running"; @@ -196,7 +212,7 @@ struct idasql_plugmod_t : public plugmod_t // Setup HTTP server callbacks cli_->session().callbacks().http_status = [this]() -> std::string { if (http_server_.is_running()) { - return idasql::format_http_status(http_server_.port(), true); + return idasql::format_http_status(http_server_.port(), true, http_server_.bind_addr()); } else { return "HTTP server not running\nUse '.http start' to start\n"; } @@ -223,11 +239,11 @@ struct idasql_plugmod_t : public plugmod_t } } -#ifdef IDASQL_HAS_AI_AGENT +#ifdef IDASQL_HAS_MCP std::string start_mcp_server(int req_port = 0, const std::string& bind_addr = "127.0.0.1") { if (mcp_server_.is_running()) { - return idasql::format_mcp_status(mcp_server_.port(), true); + return idasql::format_mcp_status(mcp_server_.port(), true, mcp_server_.bind_addr()); } // SQL executor that uses execute_sync for thread safety @@ -240,62 +256,26 @@ struct idasql_plugmod_t : public plugmod_t } }; - // Create AI agent for MCP (runs on MCP thread, SQL via execute_sync) - mcp_agent_ = std::make_unique(sql_executor); - mcp_agent_->start(); - - // MCP ask callback - agent runs on MCP thread - idasql::AskCallback ask_cb = [this](const std::string& question) -> std::string { - if (!mcp_agent_) return "Error: AI agent not available"; - return mcp_agent_->query(question); - }; - // Start MCP server - int port = mcp_server_.start(req_port, sql_executor, ask_cb, bind_addr); + int port = mcp_server_.start(req_port, sql_executor, bind_addr); if (port <= 0) { - mcp_agent_.reset(); return "Error: Failed to start MCP server"; } - return idasql::format_mcp_info(port, true); + return idasql::format_mcp_info(port, mcp_server_.bind_addr()); } #endif std::string start_http_server(int req_port = 0, const std::string& bind_addr = "127.0.0.1") { if (http_server_.is_running()) { - return idasql::format_http_status(http_server_.port(), true); + return idasql::format_http_status(http_server_.port(), true, http_server_.bind_addr()); } // SQL executor that uses execute_sync for thread safety and returns JSON idasql::HTTPQueryCallback sql_cb = [this](const std::string& sql) -> std::string { idasql::QueryResult result = run_query_sync(sql); - - xsql::json j = {{"success", result.success}}; - if (result.success) { - j["columns"] = result.columns; - xsql::json rows = xsql::json::array(); - for (const auto& row : result.rows) { - rows.push_back(row.values); - } - j["rows"] = rows; - j["row_count"] = result.rows.size(); - if (!result.warnings.empty()) { - j["warnings"] = result.warnings; - } - if (result.timed_out) { - j["timed_out"] = true; - } - if (result.partial) { - j["partial"] = true; - } - if (result.elapsed_ms > 0) { - j["elapsed_ms"] = result.elapsed_ms; - } - } else { - j["error"] = result.error; - } - return j.dump(); + return idasql::query_result_to_json_safe(result); }; // Start HTTP server, no queue (plugin mode) @@ -304,23 +284,28 @@ struct idasql_plugmod_t : public plugmod_t return "Error: Failed to start HTTP server"; } - return idasql::format_http_info(port, "Type '.http stop' to stop the server."); + return idasql::format_http_info( + port, http_server_.bind_addr(), "Type '.http stop' to stop the server."); } ~idasql_plugmod_t() { -#ifdef IDASQL_HAS_AI_AGENT +#ifdef IDASQL_HAS_MCP // Stop MCP server before destroying engine if (mcp_server_.is_running()) { mcp_server_.stop(); } - mcp_agent_.reset(); #endif // Stop HTTP server before destroying engine if (http_server_.is_running()) { http_server_.stop(); } if (cli_) cli_->uninstall(); + idasql::ui_context::shutdown_capture_helper(); + if (idapython_runtime_acquired_) { + idasql::idapython::runtime_release(); + idapython_runtime_acquired_ = false; + } engine_.reset(); msg("IDASQL: Plugin terminated\n"); } @@ -332,7 +317,11 @@ struct idasql_plugmod_t : public plugmod_t switch (arg) { case 0: msg("IDASQL v" IDASQL_VERSION_STRING " - SQL interface for IDA database\n"); - msg("Use dot commands: .http, .mcp, .help\n"); + msg("Use dot commands: .http"); +#ifdef IDASQL_HAS_MCP + msg(", .mcp"); +#endif + msg(", .help\n"); return true; case PLUGIN_ARG_TOGGLE_CLI: @@ -378,7 +367,9 @@ plugin_t PLUGIN = "\n" "Auto-installs CLI on load. Use dot commands:\n" " .http start/stop - HTTP REST server\n" +#ifdef IDASQL_HAS_MCP " .mcp start/stop - MCP server\n" +#endif " .help - Show all commands\n" "\n" "run(23): Toggle CLI (command line interface)", diff --git a/src/plugin/ui_context_function.cpp b/src/plugin/ui_context_function.cpp new file mode 100644 index 0000000..0a74e28 --- /dev/null +++ b/src/plugin/ui_context_function.cpp @@ -0,0 +1,33 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +#include + +#include +#include + +#include "ui_context_function.hpp" + +namespace idasql { +namespace plugin_functions { +namespace { + +static void sql_get_ui_context_json(xsql::FunctionContext& ctx, int argc, xsql::FunctionArg* /*argv*/) { + if (argc != 0) { + ctx.result_error("get_ui_context_json requires 0 arguments"); + return; + } + ctx.result_text(idasql::ui_context::get_ui_context_json().dump()); +} + +} // namespace + +bool register_ui_context_sql_functions(xsql::Database& db) { + return xsql::is_ok(db.register_function( + "get_ui_context_json", + 0, + xsql::ScalarFn(sql_get_ui_context_json))); +} + +} // namespace plugin_functions +} // namespace idasql diff --git a/src/plugin/ui_context_function.hpp b/src/plugin/ui_context_function.hpp new file mode 100644 index 0000000..fc99c19 --- /dev/null +++ b/src/plugin/ui_context_function.hpp @@ -0,0 +1,14 @@ +// Copyright (c) Elias Bachaalany +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +namespace idasql { +namespace plugin_functions { + +bool register_ui_context_sql_functions(xsql::Database& db); + +} // namespace plugin_functions +} // namespace idasql