From 8c8d46248ed0b7c748bfeae98e7e3f9055199fb0 Mon Sep 17 00:00:00 2001 From: zastlx Date: Tue, 29 Apr 2025 21:53:14 -0500 Subject: [PATCH 01/10] update comment with format for symbols --- TSym.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TSym.py b/TSym.py index db97c9b..48b4389 100644 --- a/TSym.py +++ b/TSym.py @@ -121,7 +121,7 @@ def url_encode(string): #region function writer # function symbols print("Writing functions") -# address;callconv;nmsp_size;[namespace];name;args_size;[args];hasVarArgs; +# address;returnVal;callconv;nmsp_size;[namespace];name;args_size;[args];hasVarArgs; for func in fun_manager.getFunctions(False): # type: ghidra.program.model.listing.Function # closest thing to StringBuilder builder = [] From e27cf7aa172af40b5c5fba212a44f73b15fdf3a7 Mon Sep 17 00:00:00 2001 From: zastlx Date: Tue, 29 Apr 2025 21:54:27 -0500 Subject: [PATCH 02/10] add initial impl of TSym plugin for ida pro --- idaPlugin.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 idaPlugin.py diff --git a/idaPlugin.py b/idaPlugin.py new file mode 100644 index 0000000..c22525f --- /dev/null +++ b/idaPlugin.py @@ -0,0 +1,20 @@ +#region imports +import ida_idaapi +#endregion imports + +class TSymPluginMod(ida_idaapi.plugmod_t): + def run(self, arg): + print("Running TSym plugin with argument:", arg) + +class TSymPlugin(ida_idaapi.plugin_t): + flags = ida_idaapi.PLUGIN_MULTI + comment = "A plugin to export and import TSym symbols" + help = "Export and import TSym symbols" + wanted_name = "TSym" + wanted_hotkey = "Ctrl-Shift-T" + + def init(self): + return TSymPluginMod() + +def PLUGIN_ENTRY(): + return TSymPlugin() \ No newline at end of file From 0bfac3abb86e49100a0d8344e2649fb5e2103a9a Mon Sep 17 00:00:00 2001 From: zastlx Date: Tue, 29 Apr 2025 21:55:22 -0500 Subject: [PATCH 03/10] add utils for parsing TSym format --- idaPlugin.py | 169 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) diff --git a/idaPlugin.py b/idaPlugin.py index c22525f..913ba84 100644 --- a/idaPlugin.py +++ b/idaPlugin.py @@ -2,6 +2,175 @@ import ida_idaapi #endregion imports +# these can probably be put in a common file +#region utils +class CallingConvention(Enum): + CDECL = "__cdecl" + STDCALL = "__stdcall" + FASTCALL = "__fastcall" + THISCALL = "__thiscall" + VECTORCALL= "__vectorcall" + UNKNOWN = "unknown" + +@dataclass +class Argument: + name: str + type: str + +@dataclass +class Symbol: + address: int + return_type: str + call_convention: CallingConvention + namespaces: List[str] + name: str + args: List[Argument] + has_var_args: bool + +def parse_symbols(data: str) -> List[Symbol]: + lines = data.splitlines() + if not lines: + return [] + # skip first line with version info + raw_symbols = "\n".join(lines[1:]).strip().splitlines() + + symbols: List[Symbol] = [] + for line_num, line in enumerate(raw_symbols, start=1): + parts = line.split(";") + lenParts = len(parts) + cur = 0 + + try: + address = int(parts[cur], 16) + except: + print(f"Line {line_num}: Invalid address: {parts[cur]}") + continue + cur += 1 + + try: + return_type = parts[cur] + except: + print(f"Line {line_num}: Missing return type: {parts[cur]}") + continue + cur += 1 + + try: + cc_raw = parts[cur] + call_convention = CallingConvention(cc_raw) + except: + print(f"Line {line_num}: Invalid calling convention: {parts[cur]}") + continue + cur += 1 + + try: + nmsp_size = int(parts[cur]) + except: + print(f"Line {line_num}: Invalid namespace size: {parts[cur]}") + continue + cur += 1 + + namespaces: List[str] = [] + for _ in range(nmsp_size): + if cur < lenParts: + namespaces.append(parts[cur]) + cur += 1 + else: + print(f"Line {line_num}: Namespace entry missing: {parts[cur]}") + break + + if cur < lenParts: + name = parts[cur] + else: + print(f"Line {line_num}: Missing symbol name: {parts[cur]}") + continue + cur += 1 + + try: + args_size = int(parts[cur]) + except: + print(f"Line {line_num}: Invalid args size: {parts[cur]}") + continue + cur += 1 + + args: List[Argument] = [] + for _ in range(args_size): + if cur + 1 < lenParts: + arg_name = parts[cur] + arg_type = parts[cur + 1] + args.append(Argument(name=arg_name, type=arg_type)) + cur += 2 + else: + print(f"Line {line_num}: Incomplete argument entry: {parts[cur]}") + continue + + has_var_args = parts[cur].strip().lower() == "true" + + symbols.append( + Symbol( + address=address, + return_type=return_type, + call_convention=call_convention, + namespaces=namespaces, + name=name, + args=args, + has_var_args=has_var_args + ) + ) + + return symbols + +@dataclass +class Comment: + address: int + comment: str + type: int + +#address;string;type +def parse_comments(data: str): + lines = data.splitlines() + if not lines: + return [] + # skip first line with version info + raw_symbols = "\n".join(lines[1:]).strip().splitlines() + + comments: List[Comment] = [] + for line_num, line in enumerate(raw_symbols, start=1): + parts = line.split(";") + cur = 0 + + try: + address = int(parts[cur], 16) + except: + print(f"Line {line_num}: Invalid address: {parts[cur]}") + continue + cur += 1 + + try: + comment = parts[cur] + except: + print(f"Line {line_num}: Missing comment: {parts[cur]}") + continue + cur += 1 + + try: + type = int(parts[cur]) + except: + print(f"Line {line_num}: Invalid type: {parts[cur]}") + continue + + comments.append( + Comment( + address=address, + comment=comment, + type=type + ) + ) + return + +# TODO: add parsing for labels and types +#endregion utils + + class TSymPluginMod(ida_idaapi.plugmod_t): def run(self, arg): print("Running TSym plugin with argument:", arg) From 95a4b002495eeb9f3eddfa23bbceea824f23d25d Mon Sep 17 00:00:00 2001 From: zastlx Date: Tue, 29 Apr 2025 21:56:33 -0500 Subject: [PATCH 04/10] implement symbol importing --- idaPlugin.py | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 3 deletions(-) diff --git a/idaPlugin.py b/idaPlugin.py index 913ba84..3e676d5 100644 --- a/idaPlugin.py +++ b/idaPlugin.py @@ -1,5 +1,15 @@ #region imports +import idc +import idaapi import ida_idaapi +import ida_hexrays +import ida_name +import ida_kernwin +from enum import Enum +from dataclasses import dataclass +from typing import List +from tkinter import Tk +from tkinter.filedialog import askdirectory #endregion imports # these can probably be put in a common file @@ -166,14 +176,78 @@ def parse_comments(data: str): ) ) return - -# TODO: add parsing for labels and types #endregion utils +def rename_func_var(func: idaapi.cfuncptr_t, offset: int, name: str): + args = func.get_lvars() + if offset >= len(args): + print(f"Offset {offset} is out of range for function {func.entry_ea}") + return + + ida_hexrays.rename_lvar(func.entry_ea, args[offset].name, name) class TSymPluginMod(ida_idaapi.plugmod_t): def run(self, arg): - print("Running TSym plugin with argument:", arg) + option = ida_kernwin.ask_buttons("Export symbols", "Import symbols", "Cancel", 1, "Do you want to export or import TSym symbols?") + if option == 1: + self.export_symbols() + elif option == 0: + self.import_symbols() + + def export_symbols(self): + print("Exporting TSym symbols...") + directory = self.ask_directory("Select folder to export symbols") + if directory: + print(f"Exporting symbols to {directory}...") + # TODO: implement export logic + + def import_symbols(self): + print("Importing TSym symbols...") + # TODO: add comments, labels and types support, should we select each file individually? or just the directory? + file = ida_kernwin.ask_file(0, "*.txt", "Select TSym symbols.txt file") + if file: + print(f"Importing symbols from {file}...") + with open(file, "r") as f: + data = f.read() + #region parse symbols + symbols = parse_symbols(data) + for symbol in symbols: + if symbol.name.startswith("FUN_") or symbol.name.startswith("thunk_FUN_") or symbol.name.startswith("sub_"): + continue + + badChars = ["~", "`", ",", "<", ">", "'", "\"", "*", "=", "!", "^"] + name = symbol.name + namespaces = "::".join([ns for ns in symbol.namespaces if ns != "Global"]) + + for badChar in badChars: + namespaces = namespaces.replace(badChar, "_") + name = name.replace(badChar, "_") + + if namespaces: + print(f"Importing symbol: {namespaces}::{name} at address {hex(symbol.address)}") + idc.set_name(symbol.address, f"{namespaces}::{name}", ida_name.SN_FORCE) + else: + print(f"Importing symbol: {name} at address {hex(symbol.address)}") + idc.set_name(symbol.address, name, ida_name.SN_FORCE) + + for i, arg in enumerate(symbol.args): + defaultNamesStart = ["arg", "var", "unk", "dword", "byte"] + for defaultName in defaultNamesStart: + if arg.name.startswith(defaultName): + continue + + cfunc = idaapi.decompile(symbol.address) + rename_func_var(cfunc, i, arg.name) + #endregion parse symbols + + # ida has a method for asking for a file, but not for a directory ?? + def ask_directory(self, title): + root = Tk() + root.withdraw() + root.attributes('-topmost', True) + directory = askdirectory(title=title) + root.destroy() + return directory class TSymPlugin(ida_idaapi.plugin_t): flags = ida_idaapi.PLUGIN_MULTI From c9b7dd61ff6b311d6024d8c3fbfe7f63de39a8da Mon Sep 17 00:00:00 2001 From: zastlx Date: Tue, 29 Apr 2025 21:57:43 -0500 Subject: [PATCH 05/10] add back todo message --- idaPlugin.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/idaPlugin.py b/idaPlugin.py index 3e676d5..3669714 100644 --- a/idaPlugin.py +++ b/idaPlugin.py @@ -176,6 +176,8 @@ def parse_comments(data: str): ) ) return + + # TODO: add parsing for labels and types #endregion utils def rename_func_var(func: idaapi.cfuncptr_t, offset: int, name: str): From 380383ad4a8b4c5a252324f2db1049a50e1d1e61 Mon Sep 17 00:00:00 2001 From: zastlx Date: Wed, 30 Apr 2025 16:17:09 -0500 Subject: [PATCH 06/10] implement rough parsing of structs --- TSym.py | 2 +- idaPlugin.py | 217 +++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 210 insertions(+), 9 deletions(-) diff --git a/TSym.py b/TSym.py index 48b4389..493cc59 100644 --- a/TSym.py +++ b/TSym.py @@ -121,7 +121,7 @@ def url_encode(string): #region function writer # function symbols print("Writing functions") -# address;returnVal;callconv;nmsp_size;[namespace];name;args_size;[args];hasVarArgs; +# address;retType;callconv;nmsp_size;[namespace];name;args_size;[args];hasVarArgs; for func in fun_manager.getFunctions(False): # type: ghidra.program.model.listing.Function # closest thing to StringBuilder builder = [] diff --git a/idaPlugin.py b/idaPlugin.py index 3669714..95a42f7 100644 --- a/idaPlugin.py +++ b/idaPlugin.py @@ -5,6 +5,8 @@ import ida_hexrays import ida_name import ida_kernwin +import os +import re from enum import Enum from dataclasses import dataclass from typing import List @@ -175,8 +177,75 @@ def parse_comments(data: str): type=type ) ) - return - + return comments + +# i'm so sorry for this function +def parse_helper(data: str, isName: bool = False) -> str: + # remove comments + data = re.sub(r'//.*', '', data) + data = re.sub(r'/\*.*?\*/', '', data, flags=re.DOTALL) + + # access modifiers + data = re.sub(r'public', '', data) + data = re.sub(r'private', '', data) + data = re.sub(r'protected', '', data) + + # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Classes/FUI/RenderNode/fuiRenderNodeEditText.h#L2 + data = re.sub(r':\s+(public)?uint8_t (\*(\s+))+', "", data) + # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Enums/C4JStorage/ESaveIncompleteType.h#L1 + data = re.sub(r':\s+(public)?uint63_t ', "", data) + + data = re.sub(r'::', '__', data) + + # "int *64 entry" to "int entry" to adhere with c style syntax + data = re.sub(r' \*[0-9]+ ', ' ', data) + + # adhere to c syntax + data = re.sub(r'ulonglong', "unsigned long long", data) + data = re.sub(r'longlong', "long long", data) + + # not 100% sure what the "pointer" type is in ghidra + data = re.sub(r'pointer[0-9]*', 'uint64_t', data) + data = re.sub(r'pointer', 'uint64_t', data) + + # ida doesnt like <>. any other ideas for this? + data = re.sub(r'[<>]', '__', data) + + # "wchar_t[8] name" to "wchar_t name[8]" to adhere with c style syntax + data = re.sub(r'\b(?P[\w:<>]+)\s*\[(?P\d+)\]\s*(?P\w+)\b', r'\g \g[\g]', data) + + if isName: + data = re.sub(r'struct', '_struct', data) + data = re.sub(r'union', '_union', data) + data = re.sub(r'enum', '_enum', data) + data = re.sub(r':.+', '', data) # remove inheritance, will be added when we properly parse the types + data = re.sub(r'\*', '', data) + data = re.sub(r',', '_', data) + + data = re.sub(r'\(', '_', data) + data = re.sub(r'\)', '_', data) + # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Classes/IdMapper%253Cclass_Item%252A___ptr64%253E.h#L1 + data = re.sub(r'Item\*', "Item_", data) + # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Classes/TypedBoxed/TypedBoxed%253Cclass_PlanksBlock/Variant%252A___ptr64%253E.h#L2 + data = re.sub(r'Variant\*', "Variant_", data) + # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Classes/struct.h#L1 + data = re.sub(r'struct struct', "struct _struct", data) + # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Enums/enum.h#L1 + data = re.sub(r'enum enum', "enum _enum", data) + + badChars = ["~", "`", "!", "^"] + for badChar in badChars: + data = data.replace(badChar, "") + + if "enum" in data: + data = re.sub(r';', ',', data) + + # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Classes/ResourceLocation.h#L12 + data = re.sub(r'namespace', '_namespace', data) + # colons at the end of defs + data = re.sub(r'}', '};', data) + + return data # TODO: add parsing for labels and types #endregion utils @@ -206,12 +275,12 @@ def export_symbols(self): def import_symbols(self): print("Importing TSym symbols...") # TODO: add comments, labels and types support, should we select each file individually? or just the directory? + #region parse symbols file = ida_kernwin.ask_file(0, "*.txt", "Select TSym symbols.txt file") if file: print(f"Importing symbols from {file}...") with open(file, "r") as f: data = f.read() - #region parse symbols symbols = parse_symbols(data) for symbol in symbols: if symbol.name.startswith("FUN_") or symbol.name.startswith("thunk_FUN_") or symbol.name.startswith("sub_"): @@ -234,16 +303,148 @@ def import_symbols(self): for i, arg in enumerate(symbol.args): defaultNamesStart = ["arg", "var", "unk", "dword", "byte"] - for defaultName in defaultNamesStart: - if arg.name.startswith(defaultName): - continue + if any(arg.name.startswith(default) for default in defaultNamesStart): + continue cfunc = idaapi.decompile(symbol.address) + if cfunc == None: + print(f"Failed to decompile function at {hex(symbol.address)}") + continue + rename_func_var(cfunc, i, arg.name) - #endregion parse symbols + #endregion parse symbols + + #region parse types (structs, enums, etc) + def readDirRecusrive(dir: str): + out = [] + for files in os.listdir(dir): + if os.path.isdir(os.path.join(dir, files)): + out += readDirRecusrive(os.path.join(dir, files)) + else: + if files.endswith(".h"): + out.append(os.path.join(dir, files).replace("\\", "/")) + return out + + @dataclass + class Struct: + type: str + data: str + + def getStructNames(data: str) -> List[Struct]: + out: List[Struct] = [] + structNames = re.findall(r'struct (.+) {', data) + unionNames = re.findall(r'union (.+) {', data) + enumNames = re.findall(r'enum (.+) {', data) + typeDefs = re.findall(r'typedef (\w+) ', data) + + if structNames: + for structName in structNames: + out.append(Struct(type="struct", data=parse_helper(structName, True))) + if unionNames: + for unionName in unionNames: + out.append(Struct(type="union", data=parse_helper(unionName, True))) + if enumNames: + for enumName in enumNames: + out.append(Struct(type="enum", data=parse_helper(enumName, True))) + if typeDefs: + for typedefName in typeDefs: + out.append(Struct(type="typedef", data=parse_helper(typedefName, True))) + + + return out + + + mainDir = self.ask_directory("Select folder to import types") + if not mainDir: + print("No directory selected") + return + + parsed = [] + # caused by https://github.com/DexrnZacAttack/TSym/issues/1 + ignore = ["char[0].h", "char[1].h", "char[2].h", "uchar[8].h", "uchar[1].h", "wchar_t[8].h", "wchar_t[0].h", "ulonglong[2].h", "ulonglong[1].h", "undefined[1].h", "undefined[2].h", "undefined[4].h", "undefined[8].h", "undefined[16].h", "undefined.h", "wchar_t.h", "char.h", "uchar.h", "byte.h", "word.h", "dword.h", "qword.h", "uint8_t.h", "uint16_t.h", "uint32_t.h", "uint64_t.h", "int8_t.h", "int16_t.h", "int32_t.h", "int64_t.h", "ulong.h", "long.h", "ushort.h", "short.h", "uint.h", "int.h", "bool.h", "float.h", "double.h", "uint32.h"] + # ghidra uses this when it doesn't know the type + undefined_to_uint = { + "undefined": "uint8_t", + "undefined1": "uint8_t", + "undefined2": "uint16_t", + "undefined3": "uint32_t", + "undefined4": "uint32_t", + "undefined5": "uint64_t", + "undefined6": "uint64_t", + "undefined7": "uint64_t", + "undefined8": "uint64_t", + } + + def parse_type(dir: str): + if any(dir.lower().endswith(ign) for ign in ignore) or "Other" in dir: + return + dir = re.sub(r'\[.*?\]', '', dir) # "Other/std/shared_ptr%3CMultiplayerLocalPlayer%3E[4].h" -> "Other/std/shared_ptr%3CMultiplayerLocalPlayer%3E.h" + dir = re.sub(r'%3A%3A', "/", dir) # fixes a few import strings + + if dir in parsed or "/functions/" in dir or any(dir.endswith(ign) for ign in ignore): + return + + parsed.append(dir) + + with open(dir, "r") as f: + data = f.read() + + for undefined_type, uint_type in undefined_to_uint.items(): + data = re.sub(re.escape(undefined_type + " "), uint_type + " ", data) + data = re.sub(r'undefined[0-9]*', "uint64_t", data) + + data = re.sub(r'dword', "uint32_t", data) # ida doesnt support dword in structs + + data = parse_helper(data) + + imports = [] + for line in data.splitlines(): + if line.startswith("#include"): + # match inside quotes aka imported file + imports.append(mainDir + "/" + re.search(r'\"(.+)\"', line).group(1)) + data = re.sub(r'#include \"(.+)\"', "", data) + + if len(imports) > 0: + for imported in imports: + parse_type(imported) + + errors = idaapi.parse_decls(None, data, None, idaapi.PT_SIL) + with open(dir + ".out", "w") as f: + f.write(data) + print(f"Errors: {errors} in {dir}") + + + files = readDirRecusrive(mainDir) + + # avoid circular dependencies by pre-defining all structs and unions as empty + deps = [] + for file in files: + if "Other" in file and not "std" in file: # a lot of wack unneeded stuff in Other that would of required a lot of work to parse, all of the actual important types are included + continue + + with open(file, "r") as f: + deps += getStructNames(f.read()) + + depStr = "" + for dep in deps: + if dep.type == "typedef": + depStr += f"typedef {dep.data};\n" + continue + + depStr += f"{dep.type} {dep.data} {{}};\n" + + with open(os.path.join(mainDir, "dep.txt"), "w") as f: + print (f"Writing to {os.path.join(mainDir, 'dep.txt')}") + f.write(depStr) + + idaapi.parse_decls(None, depStr, None, idaapi.PT_SIL) + + for file in files: + parse_type(file) + # ida has a method for asking for a file, but not for a directory ?? - def ask_directory(self, title): + def ask_directory(self, title: str): root = Tk() root.withdraw() root.attributes('-topmost', True) From 56b7726efb28341031112d5bc4d5d6bc21c4dee5 Mon Sep 17 00:00:00 2001 From: zastlx Date: Wed, 30 Apr 2025 16:28:58 -0500 Subject: [PATCH 07/10] extract import pasring impls into their own funs and clean up parsing --- idaPlugin.py | 327 ++++++++++++++++++++++++++------------------------- 1 file changed, 167 insertions(+), 160 deletions(-) diff --git a/idaPlugin.py b/idaPlugin.py index 95a42f7..a8fd409 100644 --- a/idaPlugin.py +++ b/idaPlugin.py @@ -246,8 +246,6 @@ def parse_helper(data: str, isName: bool = False) -> str: data = re.sub(r'}', '};', data) return data - # TODO: add parsing for labels and types -#endregion utils def rename_func_var(func: idaapi.cfuncptr_t, offset: int, name: str): args = func.get_lvars() @@ -257,6 +255,163 @@ def rename_func_var(func: idaapi.cfuncptr_t, offset: int, name: str): ida_hexrays.rename_lvar(func.entry_ea, args[offset].name, name) +def readDirRecusrive(dir: str): + out = [] + for files in os.listdir(dir): + if os.path.isdir(os.path.join(dir, files)): + out += readDirRecusrive(os.path.join(dir, files)) + else: + if files.endswith(".h"): + out.append(os.path.join(dir, files).replace("\\", "/")) + return out + +@dataclass +class Struct: + type: str + data: str + +def getStructNames(data: str) -> List[Struct]: + out: List[Struct] = [] + structNames = re.findall(r'struct (.+) {', data) + unionNames = re.findall(r'union (.+) {', data) + enumNames = re.findall(r'enum (.+) {', data) + typeDefs = re.findall(r'typedef (\w+) ', data) + + if structNames: + for structName in structNames: + out.append(Struct(type="struct", data=parse_helper(structName, True))) + if unionNames: + for unionName in unionNames: + out.append(Struct(type="union", data=parse_helper(unionName, True))) + if enumNames: + for enumName in enumNames: + out.append(Struct(type="enum", data=parse_helper(enumName, True))) + if typeDefs: + for typedefName in typeDefs: + out.append(Struct(type="typedef", data=parse_helper(typedefName, True))) + + + return out + # TODO: add parsing for labels and types +#endregion utils + +#region import/export functions +def import_symbols(file: str): + print(f"Importing symbols from {file}...") + with open(file, "r") as f: + data = f.read() + symbols = parse_symbols(data) + for symbol in symbols: + if symbol.name.startswith("FUN_") or symbol.name.startswith("thunk_FUN_") or symbol.name.startswith("sub_"): + continue + + badChars = ["~", "`", ",", "<", ">", "'", "\"", "*", "=", "!", "^"] + name = symbol.name + namespaces = "::".join([ns for ns in symbol.namespaces if ns != "Global"]) + + for badChar in badChars: + namespaces = namespaces.replace(badChar, "_") + name = name.replace(badChar, "_") + + if namespaces: + print(f"Importing symbol: {namespaces}::{name} at address {hex(symbol.address)}") + idc.set_name(symbol.address, f"{namespaces}::{name}", ida_name.SN_FORCE) + else: + print(f"Importing symbol: {name} at address {hex(symbol.address)}") + idc.set_name(symbol.address, name, ida_name.SN_FORCE) + + for i, arg in enumerate(symbol.args): + defaultNamesStart = ["arg", "var", "unk", "dword", "byte"] + if any(arg.name.startswith(default) for default in defaultNamesStart): + continue + + cfunc = idaapi.decompile(symbol.address) + if cfunc == None: + print(f"Failed to decompile function at {hex(symbol.address)}") + continue + + rename_func_var(cfunc, i, arg.name) + +def import_structs(mainDir: str): + print(f"Importing structs from {mainDir}...") + parsed = [] + # caused by https://github.com/DexrnZacAttack/TSym/issues/1 + ignore = ["char[0].h", "char[1].h", "char[2].h", "uchar[8].h", "uchar[1].h", "wchar_t[8].h", "wchar_t[0].h", "ulonglong[2].h", "ulonglong[1].h", "undefined[1].h", "undefined[2].h", "undefined[4].h", "undefined[8].h", "undefined[16].h", "undefined.h", "wchar_t.h", "char.h", "uchar.h", "byte.h", "word.h", "dword.h", "qword.h", "uint8_t.h", "uint16_t.h", "uint32_t.h", "uint64_t.h", "int8_t.h", "int16_t.h", "int32_t.h", "int64_t.h", "ulong.h", "long.h", "ushort.h", "short.h", "uint.h", "int.h", "bool.h", "float.h", "double.h", "uint32.h"] + # ghidra uses this when it doesn't know the type + undefined_to_uint = { + "undefined": "uint8_t", + "undefined1": "uint8_t", + "undefined2": "uint16_t", + "undefined3": "uint32_t", + "undefined4": "uint32_t", + "undefined5": "uint64_t", + "undefined6": "uint64_t", + "undefined7": "uint64_t", + "undefined8": "uint64_t", + } + + def parse_type(dir: str): + if any(dir.lower().endswith(ign) for ign in ignore) or "Other" in dir: + return + dir = re.sub(r'\[.*?\]', '', dir) # "Other/std/shared_ptr%3CMultiplayerLocalPlayer%3E[4].h" -> "Other/std/shared_ptr%3CMultiplayerLocalPlayer%3E.h" + dir = re.sub(r'%3A%3A', "/", dir) # fixes a few import strings + + if dir in parsed or "/functions/" in dir or any(dir.endswith(ign) for ign in ignore): + return + + parsed.append(dir) + + with open(dir, "r") as f: + data = f.read() + + for undefined_type, uint_type in undefined_to_uint.items(): + data = re.sub(re.escape(undefined_type + " "), uint_type + " ", data) + data = re.sub(r'undefined[0-9]*', "uint64_t", data) + + data = re.sub(r'dword', "uint32_t", data) # ida doesnt support dword in structs + + data = parse_helper(data) + + imports = [] + for line in data.splitlines(): + if line.startswith("#include"): + # match inside quotes aka imported file + imports.append(mainDir + "/" + re.search(r'\"(.+)\"', line).group(1)) + data = re.sub(r'#include \"(.+)\"', "", data) + + if len(imports) > 0: + for imported in imports: + parse_type(imported) + + idaapi.parse_decls(None, data, None, idaapi.PT_SIL) + + + files = readDirRecusrive(mainDir) + + # avoid circular dependencies by pre-defining all structs and unions as empty + deps = [] + for file in files: + if "Other" in file and not "std" in file: # a lot of wack unneeded stuff in Other that would of required a lot of work to parse, all of the actual important types are included + continue + + with open(file, "r") as f: + deps += getStructNames(f.read()) + + depStr = "" + for dep in deps: + if dep.type == "typedef": + depStr += f"typedef {dep.data};\n" + continue + + depStr += f"{dep.type} {dep.data} {{}};\n" + + idaapi.parse_decls(None, depStr, None, idaapi.PT_SIL) + + for file in files: + parse_type(file) + +#endregion import functions + class TSymPluginMod(ida_idaapi.plugmod_t): def run(self, arg): option = ida_kernwin.ask_buttons("Export symbols", "Import symbols", "Cancel", 1, "Do you want to export or import TSym symbols?") @@ -275,172 +430,24 @@ def export_symbols(self): def import_symbols(self): print("Importing TSym symbols...") # TODO: add comments, labels and types support, should we select each file individually? or just the directory? - #region parse symbols + ida_kernwin.info("Select the symbols.txt file") file = ida_kernwin.ask_file(0, "*.txt", "Select TSym symbols.txt file") if file: - print(f"Importing symbols from {file}...") - with open(file, "r") as f: - data = f.read() - symbols = parse_symbols(data) - for symbol in symbols: - if symbol.name.startswith("FUN_") or symbol.name.startswith("thunk_FUN_") or symbol.name.startswith("sub_"): - continue - - badChars = ["~", "`", ",", "<", ">", "'", "\"", "*", "=", "!", "^"] - name = symbol.name - namespaces = "::".join([ns for ns in symbol.namespaces if ns != "Global"]) - - for badChar in badChars: - namespaces = namespaces.replace(badChar, "_") - name = name.replace(badChar, "_") - - if namespaces: - print(f"Importing symbol: {namespaces}::{name} at address {hex(symbol.address)}") - idc.set_name(symbol.address, f"{namespaces}::{name}", ida_name.SN_FORCE) - else: - print(f"Importing symbol: {name} at address {hex(symbol.address)}") - idc.set_name(symbol.address, name, ida_name.SN_FORCE) - - for i, arg in enumerate(symbol.args): - defaultNamesStart = ["arg", "var", "unk", "dword", "byte"] - if any(arg.name.startswith(default) for default in defaultNamesStart): - continue - - cfunc = idaapi.decompile(symbol.address) - if cfunc == None: - print(f"Failed to decompile function at {hex(symbol.address)}") - continue - - rename_func_var(cfunc, i, arg.name) - #endregion parse symbols + import_symbols(file) + else: + ida_kernwin.msg("No file selected") #region parse types (structs, enums, etc) - def readDirRecusrive(dir: str): - out = [] - for files in os.listdir(dir): - if os.path.isdir(os.path.join(dir, files)): - out += readDirRecusrive(os.path.join(dir, files)) - else: - if files.endswith(".h"): - out.append(os.path.join(dir, files).replace("\\", "/")) - return out - @dataclass - class Struct: - type: str - data: str - - def getStructNames(data: str) -> List[Struct]: - out: List[Struct] = [] - structNames = re.findall(r'struct (.+) {', data) - unionNames = re.findall(r'union (.+) {', data) - enumNames = re.findall(r'enum (.+) {', data) - typeDefs = re.findall(r'typedef (\w+) ', data) - - if structNames: - for structName in structNames: - out.append(Struct(type="struct", data=parse_helper(structName, True))) - if unionNames: - for unionName in unionNames: - out.append(Struct(type="union", data=parse_helper(unionName, True))) - if enumNames: - for enumName in enumNames: - out.append(Struct(type="enum", data=parse_helper(enumName, True))) - if typeDefs: - for typedefName in typeDefs: - out.append(Struct(type="typedef", data=parse_helper(typedefName, True))) - - - return out - + ida_kernwin.info("Select folder the folder containing the types (.h files)") mainDir = self.ask_directory("Select folder to import types") - if not mainDir: - print("No directory selected") - return + if mainDir: + import_structs(mainDir) + else: + ida_kernwin.msg("No folder selected") - parsed = [] - # caused by https://github.com/DexrnZacAttack/TSym/issues/1 - ignore = ["char[0].h", "char[1].h", "char[2].h", "uchar[8].h", "uchar[1].h", "wchar_t[8].h", "wchar_t[0].h", "ulonglong[2].h", "ulonglong[1].h", "undefined[1].h", "undefined[2].h", "undefined[4].h", "undefined[8].h", "undefined[16].h", "undefined.h", "wchar_t.h", "char.h", "uchar.h", "byte.h", "word.h", "dword.h", "qword.h", "uint8_t.h", "uint16_t.h", "uint32_t.h", "uint64_t.h", "int8_t.h", "int16_t.h", "int32_t.h", "int64_t.h", "ulong.h", "long.h", "ushort.h", "short.h", "uint.h", "int.h", "bool.h", "float.h", "double.h", "uint32.h"] - # ghidra uses this when it doesn't know the type - undefined_to_uint = { - "undefined": "uint8_t", - "undefined1": "uint8_t", - "undefined2": "uint16_t", - "undefined3": "uint32_t", - "undefined4": "uint32_t", - "undefined5": "uint64_t", - "undefined6": "uint64_t", - "undefined7": "uint64_t", - "undefined8": "uint64_t", - } - - def parse_type(dir: str): - if any(dir.lower().endswith(ign) for ign in ignore) or "Other" in dir: - return - dir = re.sub(r'\[.*?\]', '', dir) # "Other/std/shared_ptr%3CMultiplayerLocalPlayer%3E[4].h" -> "Other/std/shared_ptr%3CMultiplayerLocalPlayer%3E.h" - dir = re.sub(r'%3A%3A', "/", dir) # fixes a few import strings - - if dir in parsed or "/functions/" in dir or any(dir.endswith(ign) for ign in ignore): - return - - parsed.append(dir) - - with open(dir, "r") as f: - data = f.read() - - for undefined_type, uint_type in undefined_to_uint.items(): - data = re.sub(re.escape(undefined_type + " "), uint_type + " ", data) - data = re.sub(r'undefined[0-9]*', "uint64_t", data) - - data = re.sub(r'dword', "uint32_t", data) # ida doesnt support dword in structs - - data = parse_helper(data) - - imports = [] - for line in data.splitlines(): - if line.startswith("#include"): - # match inside quotes aka imported file - imports.append(mainDir + "/" + re.search(r'\"(.+)\"', line).group(1)) - data = re.sub(r'#include \"(.+)\"', "", data) - - if len(imports) > 0: - for imported in imports: - parse_type(imported) - - errors = idaapi.parse_decls(None, data, None, idaapi.PT_SIL) - with open(dir + ".out", "w") as f: - f.write(data) - print(f"Errors: {errors} in {dir}") - - - files = readDirRecusrive(mainDir) - - # avoid circular dependencies by pre-defining all structs and unions as empty - deps = [] - for file in files: - if "Other" in file and not "std" in file: # a lot of wack unneeded stuff in Other that would of required a lot of work to parse, all of the actual important types are included - continue - - with open(file, "r") as f: - deps += getStructNames(f.read()) - - depStr = "" - for dep in deps: - if dep.type == "typedef": - depStr += f"typedef {dep.data};\n" - continue - - depStr += f"{dep.type} {dep.data} {{}};\n" - - with open(os.path.join(mainDir, "dep.txt"), "w") as f: - print (f"Writing to {os.path.join(mainDir, 'dep.txt')}") - f.write(depStr) - - idaapi.parse_decls(None, depStr, None, idaapi.PT_SIL) - - for file in files: - parse_type(file) + # ida has a method for asking for a file, but not for a directory ?? From 87f2a692e36cc05b9c0a3f6ae8c827081ad27935 Mon Sep 17 00:00:00 2001 From: zastlx Date: Wed, 30 Apr 2025 16:31:47 -0500 Subject: [PATCH 08/10] minor clean ups --- idaPlugin.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/idaPlugin.py b/idaPlugin.py index a8fd409..2c485d3 100644 --- a/idaPlugin.py +++ b/idaPlugin.py @@ -12,10 +12,10 @@ from typing import List from tkinter import Tk from tkinter.filedialog import askdirectory -#endregion imports +#endregion # these can probably be put in a common file -#region utils +#region TSym parsing utils class CallingConvention(Enum): CDECL = "__cdecl" STDCALL = "__stdcall" @@ -179,6 +179,10 @@ def parse_comments(data: str): ) return comments + # TODO: add parsing for labels +#endregion + +#region utils # i'm so sorry for this function def parse_helper(data: str, isName: bool = False) -> str: # remove comments @@ -292,8 +296,7 @@ def getStructNames(data: str) -> List[Struct]: return out - # TODO: add parsing for labels and types -#endregion utils +#endregion #region import/export functions def import_symbols(file: str): @@ -409,8 +412,7 @@ def parse_type(dir: str): for file in files: parse_type(file) - -#endregion import functions +#endregion class TSymPluginMod(ida_idaapi.plugmod_t): def run(self, arg): @@ -427,18 +429,15 @@ def export_symbols(self): print(f"Exporting symbols to {directory}...") # TODO: implement export logic + # TODO: add comments and labels, should we select each file individually? or just the directory? def import_symbols(self): print("Importing TSym symbols...") - # TODO: add comments, labels and types support, should we select each file individually? or just the directory? ida_kernwin.info("Select the symbols.txt file") file = ida_kernwin.ask_file(0, "*.txt", "Select TSym symbols.txt file") if file: import_symbols(file) else: ida_kernwin.msg("No file selected") - - #region parse types (structs, enums, etc) - ida_kernwin.info("Select folder the folder containing the types (.h files)") mainDir = self.ask_directory("Select folder to import types") @@ -447,9 +446,6 @@ def import_symbols(self): else: ida_kernwin.msg("No folder selected") - - - # ida has a method for asking for a file, but not for a directory ?? def ask_directory(self, title: str): root = Tk() @@ -470,4 +466,4 @@ def init(self): return TSymPluginMod() def PLUGIN_ENTRY(): - return TSymPlugin() \ No newline at end of file + return TSymPlugin() From 8d655225d2e1169b345cd6313f821fae7901dca0 Mon Sep 17 00:00:00 2001 From: zastlx Date: Wed, 30 Apr 2025 16:57:51 -0500 Subject: [PATCH 09/10] refactor parse_helper to be more modular with regex rules --- idaPlugin.py | 101 ++++++++++++++++++++++----------------------------- 1 file changed, 43 insertions(+), 58 deletions(-) diff --git a/idaPlugin.py b/idaPlugin.py index 2c485d3..3db2376 100644 --- a/idaPlugin.py +++ b/idaPlugin.py @@ -183,72 +183,57 @@ def parse_comments(data: str): #endregion #region utils -# i'm so sorry for this function def parse_helper(data: str, isName: bool = False) -> str: - # remove comments - data = re.sub(r'//.*', '', data) - data = re.sub(r'/\*.*?\*/', '', data, flags=re.DOTALL) - - # access modifiers - data = re.sub(r'public', '', data) - data = re.sub(r'private', '', data) - data = re.sub(r'protected', '', data) - - # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Classes/FUI/RenderNode/fuiRenderNodeEditText.h#L2 - data = re.sub(r':\s+(public)?uint8_t (\*(\s+))+', "", data) - # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Enums/C4JStorage/ESaveIncompleteType.h#L1 - data = re.sub(r':\s+(public)?uint63_t ', "", data) - - data = re.sub(r'::', '__', data) - - # "int *64 entry" to "int entry" to adhere with c style syntax - data = re.sub(r' \*[0-9]+ ', ' ', data) - - # adhere to c syntax - data = re.sub(r'ulonglong', "unsigned long long", data) - data = re.sub(r'longlong', "long long", data) - - # not 100% sure what the "pointer" type is in ghidra - data = re.sub(r'pointer[0-9]*', 'uint64_t', data) - data = re.sub(r'pointer', 'uint64_t', data) - - # ida doesnt like <>. any other ideas for this? - data = re.sub(r'[<>]', '__', data) - - # "wchar_t[8] name" to "wchar_t name[8]" to adhere with c style syntax - data = re.sub(r'\b(?P[\w:<>]+)\s*\[(?P\d+)\]\s*(?P\w+)\b', r'\g \g[\g]', data) + regex_rules = [ + (r'//.*', ''), # single line comments + (r'/\*.*?\*/', '', re.DOTALL), # multi line comments + (r'public', ''), # access modifiers + (r'protected', ''), + (r'private', ''), + (r':\s+(public)?uint8_t (\*(\s+))+', ""), # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Classes/FUI/RenderNode/fuiRenderNodeEditText.h#L2 + (r':\s+(public)?uint63_t ', ""), # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Enums/C4JStorage/ESaveIncompleteType.h#L1 + (r'::', '__'), # replace :: with __ + (r' \*[0-9]+ ', ' '), # "int *64 entry" to "int entry" to adhere with c style syntax + (r'ulonglong', "unsigned long long"), # adhere to c syntax + (r'longlong', "long long"), + (r'pointer[0-9]*', 'uint64_t'), # not 100% sure what the "pointer" type is in ghidra + (r'pointer', 'uint64_t'), + (r'[<>]', '__'), # ida doesnt like <>. any other ideas for this? + (r'\b(?P[\w:<>]+)\s*\[(?P\d+)\]\s*(?P\w+)\b', r'\g \g[\g]'), # "wchar_t[8] name" to "wchar_t name[8]" to adhere with c style syntax + (r'\(', '_'), + (r'\)', '_'), + (r'Item\*', "Item_"), # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Classes/IdMapper%253Cclass_Item%252A___ptr64%253E.h#L1 + (r'Variant\*', "Variant_"), # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Classes/TypedBoxed/TypedBoxed%253Cclass_PlanksBlock/Variant%252A___ptr64%253E.h#L2 + (r'struct struct', "struct _struct"), # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Classes/struct.h#L1 + (r'enum enum', "enum _enum"), # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Enums/enum.h#L1 + (r'namespace', '_namespace'), # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Classes/ResourceLocation.h#L12 + (r'}', '};'), # colons at the end of defs + ] + name_only_rules = [ + (r'struct', '_struct'), + (r'union', '_union'), + (r'enum', '_enum'), + (r':.+', ''), # remove inheritance, will be added when we properly parse the types + (r'\*', ''), + (r',', '_'), + ] + badChars = ["~", "`", "!", "^"] + for pattern, repl, *f in regex_rules: + flags = f[0] if f else 0 + data = re.sub(pattern, repl, data, flags=flags) + if isName: - data = re.sub(r'struct', '_struct', data) - data = re.sub(r'union', '_union', data) - data = re.sub(r'enum', '_enum', data) - data = re.sub(r':.+', '', data) # remove inheritance, will be added when we properly parse the types - data = re.sub(r'\*', '', data) - data = re.sub(r',', '_', data) + for pattern, repl, *f in name_only_rules: + flags = f[0] if f else 0 + data = re.sub(pattern, repl, data, flags=flags) - data = re.sub(r'\(', '_', data) - data = re.sub(r'\)', '_', data) - # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Classes/IdMapper%253Cclass_Item%252A___ptr64%253E.h#L1 - data = re.sub(r'Item\*', "Item_", data) - # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Classes/TypedBoxed/TypedBoxed%253Cclass_PlanksBlock/Variant%252A___ptr64%253E.h#L2 - data = re.sub(r'Variant\*', "Variant_", data) - # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Classes/struct.h#L1 - data = re.sub(r'struct struct', "struct _struct", data) - # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Enums/enum.h#L1 - data = re.sub(r'enum enum', "enum _enum", data) - - badChars = ["~", "`", "!", "^"] for badChar in badChars: data = data.replace(badChar, "") if "enum" in data: - data = re.sub(r';', ',', data) - - # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Classes/ResourceLocation.h#L12 - data = re.sub(r'namespace', '_namespace', data) - # colons at the end of defs - data = re.sub(r'}', '};', data) - + data = re.sub(r'(? Date: Wed, 30 Apr 2025 16:59:32 -0500 Subject: [PATCH 10/10] minor clean up --- idaPlugin.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/idaPlugin.py b/idaPlugin.py index 3db2376..d80d27f 100644 --- a/idaPlugin.py +++ b/idaPlugin.py @@ -355,7 +355,7 @@ def parse_type(dir: str): for undefined_type, uint_type in undefined_to_uint.items(): data = re.sub(re.escape(undefined_type + " "), uint_type + " ", data) data = re.sub(r'undefined[0-9]*', "uint64_t", data) - + data = re.sub(r'dword', "uint32_t", data) # ida doesnt support dword in structs data = parse_helper(data) @@ -373,7 +373,6 @@ def parse_type(dir: str): idaapi.parse_decls(None, data, None, idaapi.PT_SIL) - files = readDirRecusrive(mainDir) # avoid circular dependencies by pre-defining all structs and unions as empty @@ -400,7 +399,7 @@ def parse_type(dir: str): #endregion class TSymPluginMod(ida_idaapi.plugmod_t): - def run(self, arg): + def run(self, _): option = ida_kernwin.ask_buttons("Export symbols", "Import symbols", "Cancel", 1, "Do you want to export or import TSym symbols?") if option == 1: self.export_symbols() @@ -416,7 +415,6 @@ def export_symbols(self): # TODO: add comments and labels, should we select each file individually? or just the directory? def import_symbols(self): - print("Importing TSym symbols...") ida_kernwin.info("Select the symbols.txt file") file = ida_kernwin.ask_file(0, "*.txt", "Select TSym symbols.txt file") if file: