diff --git a/TSym.py b/TSym.py index db97c9b..493cc59 100644 --- a/TSym.py +++ b/TSym.py @@ -121,7 +121,7 @@ def url_encode(string): #region function writer # function symbols print("Writing functions") -# address;callconv;nmsp_size;[namespace];name;args_size;[args];hasVarArgs; +# address;retType;callconv;nmsp_size;[namespace];name;args_size;[args];hasVarArgs; for func in fun_manager.getFunctions(False): # type: ghidra.program.model.listing.Function # closest thing to StringBuilder builder = [] diff --git a/idaPlugin.py b/idaPlugin.py new file mode 100644 index 0000000..d80d27f --- /dev/null +++ b/idaPlugin.py @@ -0,0 +1,452 @@ +#region imports +import idc +import idaapi +import ida_idaapi +import ida_hexrays +import ida_name +import ida_kernwin +import os +import re +from enum import Enum +from dataclasses import dataclass +from typing import List +from tkinter import Tk +from tkinter.filedialog import askdirectory +#endregion + +# these can probably be put in a common file +#region TSym parsing utils +class CallingConvention(Enum): + CDECL = "__cdecl" + STDCALL = "__stdcall" + FASTCALL = "__fastcall" + THISCALL = "__thiscall" + VECTORCALL= "__vectorcall" + UNKNOWN = "unknown" + +@dataclass +class Argument: + name: str + type: str + +@dataclass +class Symbol: + address: int + return_type: str + call_convention: CallingConvention + namespaces: List[str] + name: str + args: List[Argument] + has_var_args: bool + +def parse_symbols(data: str) -> List[Symbol]: + lines = data.splitlines() + if not lines: + return [] + # skip first line with version info + raw_symbols = "\n".join(lines[1:]).strip().splitlines() + + symbols: List[Symbol] = [] + for line_num, line in enumerate(raw_symbols, start=1): + parts = line.split(";") + lenParts = len(parts) + cur = 0 + + try: + address = int(parts[cur], 16) + except: + print(f"Line {line_num}: Invalid address: {parts[cur]}") + continue + cur += 1 + + try: + return_type = parts[cur] + except: + print(f"Line {line_num}: Missing return type: {parts[cur]}") + continue + cur += 1 + + try: + cc_raw = parts[cur] + call_convention = CallingConvention(cc_raw) + except: + print(f"Line {line_num}: Invalid calling convention: {parts[cur]}") + continue + cur += 1 + + try: + nmsp_size = int(parts[cur]) + except: + print(f"Line {line_num}: Invalid namespace size: {parts[cur]}") + continue + cur += 1 + + namespaces: List[str] = [] + for _ in range(nmsp_size): + if cur < lenParts: + namespaces.append(parts[cur]) + cur += 1 + else: + print(f"Line {line_num}: Namespace entry missing: {parts[cur]}") + break + + if cur < lenParts: + name = parts[cur] + else: + print(f"Line {line_num}: Missing symbol name: {parts[cur]}") + continue + cur += 1 + + try: + args_size = int(parts[cur]) + except: + print(f"Line {line_num}: Invalid args size: {parts[cur]}") + continue + cur += 1 + + args: List[Argument] = [] + for _ in range(args_size): + if cur + 1 < lenParts: + arg_name = parts[cur] + arg_type = parts[cur + 1] + args.append(Argument(name=arg_name, type=arg_type)) + cur += 2 + else: + print(f"Line {line_num}: Incomplete argument entry: {parts[cur]}") + continue + + has_var_args = parts[cur].strip().lower() == "true" + + symbols.append( + Symbol( + address=address, + return_type=return_type, + call_convention=call_convention, + namespaces=namespaces, + name=name, + args=args, + has_var_args=has_var_args + ) + ) + + return symbols + +@dataclass +class Comment: + address: int + comment: str + type: int + +#address;string;type +def parse_comments(data: str): + lines = data.splitlines() + if not lines: + return [] + # skip first line with version info + raw_symbols = "\n".join(lines[1:]).strip().splitlines() + + comments: List[Comment] = [] + for line_num, line in enumerate(raw_symbols, start=1): + parts = line.split(";") + cur = 0 + + try: + address = int(parts[cur], 16) + except: + print(f"Line {line_num}: Invalid address: {parts[cur]}") + continue + cur += 1 + + try: + comment = parts[cur] + except: + print(f"Line {line_num}: Missing comment: {parts[cur]}") + continue + cur += 1 + + try: + type = int(parts[cur]) + except: + print(f"Line {line_num}: Invalid type: {parts[cur]}") + continue + + comments.append( + Comment( + address=address, + comment=comment, + type=type + ) + ) + return comments + + # TODO: add parsing for labels +#endregion + +#region utils +def parse_helper(data: str, isName: bool = False) -> str: + regex_rules = [ + (r'//.*', ''), # single line comments + (r'/\*.*?\*/', '', re.DOTALL), # multi line comments + (r'public', ''), # access modifiers + (r'protected', ''), + (r'private', ''), + (r':\s+(public)?uint8_t (\*(\s+))+', ""), # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Classes/FUI/RenderNode/fuiRenderNodeEditText.h#L2 + (r':\s+(public)?uint63_t ', ""), # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Enums/C4JStorage/ESaveIncompleteType.h#L1 + (r'::', '__'), # replace :: with __ + (r' \*[0-9]+ ', ' '), # "int *64 entry" to "int entry" to adhere with c style syntax + (r'ulonglong', "unsigned long long"), # adhere to c syntax + (r'longlong', "long long"), + (r'pointer[0-9]*', 'uint64_t'), # not 100% sure what the "pointer" type is in ghidra + (r'pointer', 'uint64_t'), + (r'[<>]', '__'), # ida doesnt like <>. any other ideas for this? + (r'\b(?P[\w:<>]+)\s*\[(?P\d+)\]\s*(?P\w+)\b', r'\g \g[\g]'), # "wchar_t[8] name" to "wchar_t name[8]" to adhere with c style syntax + (r'\(', '_'), + (r'\)', '_'), + (r'Item\*', "Item_"), # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Classes/IdMapper%253Cclass_Item%252A___ptr64%253E.h#L1 + (r'Variant\*', "Variant_"), # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Classes/TypedBoxed/TypedBoxed%253Cclass_PlanksBlock/Variant%252A___ptr64%253E.h#L2 + (r'struct struct', "struct _struct"), # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Classes/struct.h#L1 + (r'enum enum', "enum _enum"), # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Enums/enum.h#L1 + (r'namespace', '_namespace'), # https://github.com/DexrnZacAttack/MCXB1-Syms/blob/a4d1aa8a9d8b0a062cda36ef95c0424bec1360c5/types/Minecraft/Classes/ResourceLocation.h#L12 + (r'}', '};'), # colons at the end of defs + ] + name_only_rules = [ + (r'struct', '_struct'), + (r'union', '_union'), + (r'enum', '_enum'), + (r':.+', ''), # remove inheritance, will be added when we properly parse the types + (r'\*', ''), + (r',', '_'), + ] + badChars = ["~", "`", "!", "^"] + + for pattern, repl, *f in regex_rules: + flags = f[0] if f else 0 + data = re.sub(pattern, repl, data, flags=flags) + + if isName: + for pattern, repl, *f in name_only_rules: + flags = f[0] if f else 0 + data = re.sub(pattern, repl, data, flags=flags) + + for badChar in badChars: + data = data.replace(badChar, "") + + if "enum" in data: + data = re.sub(r'(?= len(args): + print(f"Offset {offset} is out of range for function {func.entry_ea}") + return + + ida_hexrays.rename_lvar(func.entry_ea, args[offset].name, name) + +def readDirRecusrive(dir: str): + out = [] + for files in os.listdir(dir): + if os.path.isdir(os.path.join(dir, files)): + out += readDirRecusrive(os.path.join(dir, files)) + else: + if files.endswith(".h"): + out.append(os.path.join(dir, files).replace("\\", "/")) + return out + +@dataclass +class Struct: + type: str + data: str + +def getStructNames(data: str) -> List[Struct]: + out: List[Struct] = [] + structNames = re.findall(r'struct (.+) {', data) + unionNames = re.findall(r'union (.+) {', data) + enumNames = re.findall(r'enum (.+) {', data) + typeDefs = re.findall(r'typedef (\w+) ', data) + + if structNames: + for structName in structNames: + out.append(Struct(type="struct", data=parse_helper(structName, True))) + if unionNames: + for unionName in unionNames: + out.append(Struct(type="union", data=parse_helper(unionName, True))) + if enumNames: + for enumName in enumNames: + out.append(Struct(type="enum", data=parse_helper(enumName, True))) + if typeDefs: + for typedefName in typeDefs: + out.append(Struct(type="typedef", data=parse_helper(typedefName, True))) + + + return out +#endregion + +#region import/export functions +def import_symbols(file: str): + print(f"Importing symbols from {file}...") + with open(file, "r") as f: + data = f.read() + symbols = parse_symbols(data) + for symbol in symbols: + if symbol.name.startswith("FUN_") or symbol.name.startswith("thunk_FUN_") or symbol.name.startswith("sub_"): + continue + + badChars = ["~", "`", ",", "<", ">", "'", "\"", "*", "=", "!", "^"] + name = symbol.name + namespaces = "::".join([ns for ns in symbol.namespaces if ns != "Global"]) + + for badChar in badChars: + namespaces = namespaces.replace(badChar, "_") + name = name.replace(badChar, "_") + + if namespaces: + print(f"Importing symbol: {namespaces}::{name} at address {hex(symbol.address)}") + idc.set_name(symbol.address, f"{namespaces}::{name}", ida_name.SN_FORCE) + else: + print(f"Importing symbol: {name} at address {hex(symbol.address)}") + idc.set_name(symbol.address, name, ida_name.SN_FORCE) + + for i, arg in enumerate(symbol.args): + defaultNamesStart = ["arg", "var", "unk", "dword", "byte"] + if any(arg.name.startswith(default) for default in defaultNamesStart): + continue + + cfunc = idaapi.decompile(symbol.address) + if cfunc == None: + print(f"Failed to decompile function at {hex(symbol.address)}") + continue + + rename_func_var(cfunc, i, arg.name) + +def import_structs(mainDir: str): + print(f"Importing structs from {mainDir}...") + parsed = [] + # caused by https://github.com/DexrnZacAttack/TSym/issues/1 + ignore = ["char[0].h", "char[1].h", "char[2].h", "uchar[8].h", "uchar[1].h", "wchar_t[8].h", "wchar_t[0].h", "ulonglong[2].h", "ulonglong[1].h", "undefined[1].h", "undefined[2].h", "undefined[4].h", "undefined[8].h", "undefined[16].h", "undefined.h", "wchar_t.h", "char.h", "uchar.h", "byte.h", "word.h", "dword.h", "qword.h", "uint8_t.h", "uint16_t.h", "uint32_t.h", "uint64_t.h", "int8_t.h", "int16_t.h", "int32_t.h", "int64_t.h", "ulong.h", "long.h", "ushort.h", "short.h", "uint.h", "int.h", "bool.h", "float.h", "double.h", "uint32.h"] + # ghidra uses this when it doesn't know the type + undefined_to_uint = { + "undefined": "uint8_t", + "undefined1": "uint8_t", + "undefined2": "uint16_t", + "undefined3": "uint32_t", + "undefined4": "uint32_t", + "undefined5": "uint64_t", + "undefined6": "uint64_t", + "undefined7": "uint64_t", + "undefined8": "uint64_t", + } + + def parse_type(dir: str): + if any(dir.lower().endswith(ign) for ign in ignore) or "Other" in dir: + return + dir = re.sub(r'\[.*?\]', '', dir) # "Other/std/shared_ptr%3CMultiplayerLocalPlayer%3E[4].h" -> "Other/std/shared_ptr%3CMultiplayerLocalPlayer%3E.h" + dir = re.sub(r'%3A%3A', "/", dir) # fixes a few import strings + + if dir in parsed or "/functions/" in dir or any(dir.endswith(ign) for ign in ignore): + return + + parsed.append(dir) + + with open(dir, "r") as f: + data = f.read() + + for undefined_type, uint_type in undefined_to_uint.items(): + data = re.sub(re.escape(undefined_type + " "), uint_type + " ", data) + data = re.sub(r'undefined[0-9]*', "uint64_t", data) + + data = re.sub(r'dword', "uint32_t", data) # ida doesnt support dword in structs + + data = parse_helper(data) + + imports = [] + for line in data.splitlines(): + if line.startswith("#include"): + # match inside quotes aka imported file + imports.append(mainDir + "/" + re.search(r'\"(.+)\"', line).group(1)) + data = re.sub(r'#include \"(.+)\"', "", data) + + if len(imports) > 0: + for imported in imports: + parse_type(imported) + + idaapi.parse_decls(None, data, None, idaapi.PT_SIL) + + files = readDirRecusrive(mainDir) + + # avoid circular dependencies by pre-defining all structs and unions as empty + deps = [] + for file in files: + if "Other" in file and not "std" in file: # a lot of wack unneeded stuff in Other that would of required a lot of work to parse, all of the actual important types are included + continue + + with open(file, "r") as f: + deps += getStructNames(f.read()) + + depStr = "" + for dep in deps: + if dep.type == "typedef": + depStr += f"typedef {dep.data};\n" + continue + + depStr += f"{dep.type} {dep.data} {{}};\n" + + idaapi.parse_decls(None, depStr, None, idaapi.PT_SIL) + + for file in files: + parse_type(file) +#endregion + +class TSymPluginMod(ida_idaapi.plugmod_t): + def run(self, _): + option = ida_kernwin.ask_buttons("Export symbols", "Import symbols", "Cancel", 1, "Do you want to export or import TSym symbols?") + if option == 1: + self.export_symbols() + elif option == 0: + self.import_symbols() + + def export_symbols(self): + print("Exporting TSym symbols...") + directory = self.ask_directory("Select folder to export symbols") + if directory: + print(f"Exporting symbols to {directory}...") + # TODO: implement export logic + + # TODO: add comments and labels, should we select each file individually? or just the directory? + def import_symbols(self): + ida_kernwin.info("Select the symbols.txt file") + file = ida_kernwin.ask_file(0, "*.txt", "Select TSym symbols.txt file") + if file: + import_symbols(file) + else: + ida_kernwin.msg("No file selected") + + ida_kernwin.info("Select folder the folder containing the types (.h files)") + mainDir = self.ask_directory("Select folder to import types") + if mainDir: + import_structs(mainDir) + else: + ida_kernwin.msg("No folder selected") + + # ida has a method for asking for a file, but not for a directory ?? + def ask_directory(self, title: str): + root = Tk() + root.withdraw() + root.attributes('-topmost', True) + directory = askdirectory(title=title) + root.destroy() + return directory + +class TSymPlugin(ida_idaapi.plugin_t): + flags = ida_idaapi.PLUGIN_MULTI + comment = "A plugin to export and import TSym symbols" + help = "Export and import TSym symbols" + wanted_name = "TSym" + wanted_hotkey = "Ctrl-Shift-T" + + def init(self): + return TSymPluginMod() + +def PLUGIN_ENTRY(): + return TSymPlugin()