diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..915fb32 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*/__pycache__/* +__pycache__/* +data/ +*.bak + diff --git a/Parser/parse_v8cache.py b/Parser/parse_v8cache.py index 24925d6..6eb7ae2 100644 --- a/Parser/parse_v8cache.py +++ b/Parser/parse_v8cache.py @@ -30,17 +30,24 @@ def run_disassembler_binary(binary_path, file_name, out_file_name): ) # Open the output file in write mode - with open(out_file_name, 'w') as outfile: + with open(out_file_name, 'w', encoding="utf-8", errors="replace") as outfile: # Call the binary with the file name as argument and pipe the output to the file - try: - result = subprocess.run([binary_path, file_name], stdout=outfile, stderr=subprocess.PIPE, text=True) - - # Check the return status code - if result.stderr: - raise RuntimeError( - f"Binary execution failed with status code {result.returncode}: {result.stderr.strip()}") - except subprocess.CalledProcessError as e: - raise RuntimeError(f"Error calling the binary: {e}") + result = subprocess.run( + [binary_path, file_name], + stdout=outfile, + stderr=subprocess.PIPE, + text=True, + ) + + # Treat only non-zero exit codes as failure. Some tools may emit warnings to stderr on success. + if result.returncode != 0: + err = (result.stderr or "").strip() + raise RuntimeError( + f"Binary execution failed with status code {result.returncode}." + (f" Stderr: {err}" if err else "") + ) + + if result.stderr: + print(f"[!] Disassembler stderr: {result.stderr.strip()}") def parse_v8cache_file(file_name, out_name, view8_dir, binary_path): diff --git a/Parser/sfi_file_parser.py b/Parser/sfi_file_parser.py index ac5d780..180b3fb 100644 --- a/Parser/sfi_file_parser.py +++ b/Parser/sfi_file_parser.py @@ -1,6 +1,7 @@ from Parser.shared_function_info import SharedFunctionInfo, CodeLine from parse import parse import re +import json all_functions = {} repeat_last_line = False @@ -12,7 +13,7 @@ def set_repeat_line_flag(flag): def get_next_line(file): - with open(file) as f: + with open(file, encoding='utf-8', errors='ignore') as f: for line in f: line = line.strip() if not line: @@ -75,8 +76,9 @@ def parse_const_line(lines, func_name): if not address: return var_idx, value if value.startswith(" ').replace('"', '\\"') - return var_idx, f'"{value}"' + value = json.dumps(value.split("#", 1)[-1].rstrip('> ')) #.replace('"', '\\"') + #return var_idx, f'"{value}"' + return var_idx, value if value.startswith(" ') if " " in value else "" return var_idx, parse_shared_function_info(lines, value, func_name) diff --git a/Parser/shared_function_info.py b/Parser/shared_function_info.py index ff373e2..1349c31 100644 --- a/Parser/shared_function_info.py +++ b/Parser/shared_function_info.py @@ -1,16 +1,83 @@ from Translate.translate import translate_bytecode +from Translate.jump_blocks import CodeLine from Simplify.simplify import simplify_translated_bytecode +import re +import pickle +from typing import Dict, List, Optional, Union -class CodeLine: - def __init__(self, opcode="", line="", inst="", translated="", decompiled=""): - self.v8_opcode = opcode - self.line_num = line - self.v8_instruction = inst - self.translated = translated - self.decompiled = decompiled - self.visible = True +### + +class GlobalVars: + _STRING_RE = re.compile(r'"([^"\\]*(?:\\.[^"\\]*)*)"') + _FUNC_RE = re.compile(r'\b(func_([A-Za-z0-9_$]+)_0x[0-9a-fA-F]+)\b') + + def __init__(self): + self.strings_set = None + self.funcs_map = None + + def parse(self, value) -> bool: + is_parsed = False + + strings = set(self._STRING_RE.findall(value)) + funcs = list(self._FUNC_RE.finditer(value)) + + if strings: + is_parsed = True + self.strings_set = (self.strings_set or set()) + self.strings_set.update(strings) + + if funcs: + is_parsed = True + self.funcs_map = (self.funcs_map or {}) + + for match in funcs: + full_name = match.group(1) + short_name = match.group(2) + self.funcs_map[short_name] = full_name + + return is_parsed + + def is_filled(self) -> bool: + if self.strings_set or self.funcs_map: + return True + return False + def has_value(self, value) -> bool: + if self.strings_set is not None: + val = value.strip('"') + if (value in self.strings_set or val in self.strings_set): + return True + if self.funcs_map is not None: + if value in self.funcs_map.keys(): + return True + return False + + def resolve_global_name(self, value) -> Optional[str]: + + def _is_string(value): + if value.startswith('"') and value.endswith('"'): + return True + return False + + if not self.is_filled(): + return None + + if not _is_string(value): + return None + + val = value.strip('"') + if self.strings_set is not None: + if (value in self.strings_set or val in self.strings_set): + return "global_" + val + + if self.funcs_map is not None: + if val in self.funcs_map: + return self.funcs_map[val] + + return None + +### class SharedFunctionInfo: def __init__(self): @@ -22,6 +89,8 @@ def __init__(self): self.code = None self.const_pool = None self.exception_table = None + self.visible = True + self.metadata = None def is_fully_parsed(self): return all( @@ -40,18 +109,62 @@ def translate_bytecode(self): def simplify_bytecode(self): simplify_translated_bytecode(self, self.code) - def replace_const_pool(self): - replacements = {f"ConstPool[{idx}]": var for idx, var in enumerate(self.const_pool)} + def fill_global_variables(self, global_vars: GlobalVars): + """ + If the Global Vars were defined anywhere in this function, fill them in and store in the global structure. + """ + + patternDef = re.compile(r'ConstPoolLiteral\[(\d+)\]') + + for obj in self.code: + line = obj.decompiled + if "DeclareGlobals(" not in line: + continue + match = re.search(patternDef, line.strip()) + if not match: + continue + index = int(match.group(1)) + # Ensure const_pool exists and index is within valid bounds; otherwise skip + if self.const_pool is None or not (0 <= index < len(self.const_pool)): + continue + if global_vars.parse(self.const_pool[index]): + return True + return False + + def replace_const_pool(self, global_vars: GlobalVars): + + def _replacement(match): + index = int(match.group(2)) + # Ensure const_pool exists and index is within valid bounds; otherwise leave unchanged + if self.const_pool is None or not (0 <= index < len(self.const_pool)): + return match.group(0) # Leave unchanged + + value = self.const_pool[index] + if match.group(1) == "ConstPool": # Not ConstPoolLiteral + + global_symbol = global_vars.resolve_global_name(value) + if global_symbol: + return global_symbol + + return value.strip('"') + return value + + # Regular expression to match patterns A[NUMBER] or B[NUMBER] + pattern = r'(ConstPoolLiteral|ConstPool)\[(\d+)\]' + + #replacements = {f"ConstPool[{idx}]": var.strip('"') for idx, var in enumerate(self.const_pool)} + #replacements.update({f"ConstPoolLiteral[{idx}]": var for idx, var in enumerate(self.const_pool)}) + for line in self.code: - if not line.visible: + if "ConstPool" not in line.decompiled: continue - for const_id, var in replacements.items(): - line.decompiled = line.decompiled.replace(const_id, var) + line.decompiled = re.sub(pattern, _replacement, line.decompiled) - def decompile(self): + def decompile(self, global_vars: GlobalVars): self.translate_bytecode() self.simplify_bytecode() - self.replace_const_pool() + self.fill_global_variables(global_vars) + self.replace_const_pool(global_vars) def export(self, export_v8code=False, export_translated=False, export_decompiled=True): export_func = self.create_function_header() + '\n' @@ -70,3 +183,39 @@ def export(self, export_v8code=False, export_translated=False, export_decompiled if export_line: export_func += export_line + '\n' return export_func + +#### + +FunctionsBlob = Union[Dict[str, "SharedFunctionInfo"], List["SharedFunctionInfo"]] + +# Helper function for serializing multiple functions +def serialize_functions(functions: FunctionsBlob) -> bytes: + """Serialize decompiled output using pickle. + + SECURITY NOTE: + Pickle is unsafe for untrusted input. Only load serialized files that you + generated yourself. + """ + return pickle.dumps(functions, protocol=pickle.HIGHEST_PROTOCOL) + + +def deserialize_functions(data: bytes) -> FunctionsBlob: + """Deserialize decompiled output using pickle. + + SECURITY NOTE: + Unpickling can execute arbitrary code. Do not load files from untrusted + sources. + """ + return pickle.loads(data) + + +def save_functions_to_file(functions: FunctionsBlob, filename: str): + """Save decompiled output to a file (pickle).""" + with open(filename, 'wb') as f: + f.write(serialize_functions(functions)) + + +def load_functions_from_file(filename: str) -> FunctionsBlob: + """Load decompiled output from a file (pickle).""" + with open(filename, 'rb') as f: + return deserialize_functions(f.read()) diff --git a/README.md b/README.md index 9769294..864241d 100644 --- a/README.md +++ b/README.md @@ -18,31 +18,42 @@

Usage

Command-Line Arguments

Basic Usage

To decompile a V8 bytecode file and export the decompiled code:

-
python view8.py input_file output_file
+
python view8.py -i input_file -o output_file

Disassembler Path

By default, view8 detects the V8 bytecode version of the input file (using VersionDetector.exe) and automatically searches for a compatible disassembler binary in the Bin folder. This can be changed by specifing a different disassembler binary, use the --path (or -p) option:

-
python view8.py input_file output_file --path /path/to/disassembler
+
python view8.py -i input_file -o output_file --path /path/to/disassembler

Processing Disassembled Files

-

To skip the disassembling process and provide an already disassembled file as the input, use the --disassembled (or -d) flag:

-
python view8.py input_file output_file --disassembled
+

To skip the disassembling process and provide an already disassembled file as the input, use the --input_format disassembled (or -f disassembled) option:

+
python view8.py -i input_file -o output_file -f disassembled
+

Creating and Processing Serialized Files

+

Sometimes we may want to decompile the file into a serialized format (preserving all the objects and structures). This type of an output may be easier to post-process than a text format, and useful i.e. for further deobfuscation. To create a serialized output we use a specific export format: --export_format serialized (or -e serialized)

+

Security warning: the current serialized format is a Python pickle file (.pkl). Unpickling data from untrusted sources can execute arbitrary code. Only load serialized files that you generated yourself.

+
python view8.py -i input_file -o output_file -e serialized
+

If we ever want to load the serialized output back, and decompile it as a different type of an output, we can do it using --input_format serialized (or -f serialized) option:

+
python view8.py -i input_file -o output_file -f serialized

Export Formats

Specify the export format(s) using the --export_format (or -e) option. You can combine multiple formats:

For example, to export both V8 opcodes and decompiled code side by side:

-
python view8.py input_file output_file -e v8_opcode decompiled
+
python view8.py -i input_file -o output_file -e v8_opcode decompiled

By default, the format used is decompiled.

VersionDetector.exe

@@ -52,3 +63,4 @@
  • -d: Retrieves a hash (little-endian) and returns its corresponding version using brute force.
  • -f: Retrieves a file and returns its version.
  • + diff --git a/Simplify/global_scope_replace.py b/Simplify/global_scope_replace.py index 2eaada6..192bd94 100644 --- a/Simplify/global_scope_replace.py +++ b/Simplify/global_scope_replace.py @@ -1,40 +1,130 @@ import re from collections import defaultdict +from typing import Optional +def find_assignment_op(line: str) -> Optional[int]: + """ + Return the index of the first assignment '=' in `line` that is not part of + ==, ===, !=, <=, >=, =>. Ignores '=' inside single or double quoted strings. + Note: backtick/template strings are not tracked (they don't appear in + decompiled bytecode output). + """ + in_sq = False # inside single-quoted string + in_dq = False # inside double-quoted string + esc = False + + for i, ch in enumerate(line): + if esc: + esc = False + continue + if ch == '\\' and (in_sq or in_dq): + esc = True + continue + if ch == "'" and not in_dq: + in_sq = not in_sq + continue + if ch == '"' and not in_sq: + in_dq = not in_dq + continue + if in_sq or in_dq: + continue + if ch == '=': + prev = line[i - 1] if i > 0 else '' + nxt = line[i + 1] if i + 1 < len(line) else '' + if nxt in ('=', '>'): # == / === / => + continue + if prev in ('!', '<', '>', '='): # != / <= / >= / == / === + continue + return i + return None + +### + +def _print_assignments(scope_assignments): + for key in scope_assignments.keys(): + if scope_assignments[key] is None: + continue + (x,y) = key + print(f"Scope[{x}][{y}] = {scope_assignments[key]}") + +def _replace_global_scope2_func(all_functions, verbosity) -> int: + """ + Collect 2 dimensional Scope definitions, i.e. `Scope[x][y] = value` + Replace their occurrences in the code with the literal value. + Only the Scope values that are assigned once are used for the replacements. + """ + + def _replace_value(match): + key = (match.group(1), match.group(2)) + cnt = scope_counts.get(key, 0) + val = scope_assignments.get(key) + if cnt == 1 and val is not None: + return val + return match.group(0) -def replace_global_scope(all_functions): scope_assignments = {} scope_counts = defaultdict(int) - + # Regex pattern to match Scope[num][num] = value - pattern = re.compile(r'Scope\[(\d+)\]\[(\d+)\] = (\S+)') + pattern = re.compile(r'Scope\[(\d+)\]\[(\d+)\] = (\S+)$') + value_pattern = re.compile(r'([\w#$]+|\"[\w#$]+\")$') + exclusion_pattern = re.compile(r'(ACCU|r\d+|a\d+)$') # First pass: Find all unique Scope assignments for func in all_functions.values(): for line_obj in func.code: - line = line_obj.decompiled - match = pattern.search(line) + line = line_obj.decompiled.strip() + match = pattern.match(line) if match: key = (match.group(1), match.group(2)) value = match.group(3) if value in ("null", "undefined"): + line_obj.visible = False continue - if key in scope_assignments or not value.startswith("func_"): - # If the same Scope is assigned different values, mark it as invalid + + if key in scope_assignments or not value_pattern.match(value) or exclusion_pattern.match(value): + # If the same Scope is assigned more than once, or the value is not eligible, mark it as invalid scope_assignments[key] = None else: scope_assignments[key] = value scope_counts[key] += 1 + if verbosity > 1: + _print_assignments(scope_assignments) + pattern2 = re.compile(r'Scope\[(\d+)\]\[(\d+)\](?![\[])') # Scope[num][num] but not: Scope[num][num][num] + replaced_count = 0 # Second pass: Replace Scope[num][num] with value if it's set only once for func in all_functions.values(): for line_obj in func.code: - new_line = line_obj.decompiled - for key, count in scope_counts.items(): - if count == 1 and scope_assignments[key] is not None: - scope_pattern = re.escape(f'Scope[{key[0]}][{key[1]}]') - new_line = re.sub(scope_pattern, scope_assignments[key], new_line) - line_obj.decompiled = new_line + line = line_obj.decompiled + # Split into left-hand and right-hand side of assignment + idx = find_assignment_op(line) + if idx is not None: + lhs = line[:idx] + rhs = line[idx + 1:] + # Only replace Scope[x][y] if it appears **not** in LHS + new_rhs = pattern2.sub(_replace_value, rhs) + new_line = lhs + '=' + new_rhs + else: + # No assignment; apply replacements freely + new_line = pattern2.sub(_replace_value, line) + + if new_line != line: + replaced_count += 1 + if verbosity > 0: + print(f"[G] Replaced:\n\t{line}\n\t{new_line}") + line_obj.decompiled = new_line + return replaced_count +def replace_global_scope(all_functions, verbosity) -> int: + total_repl = 0 + while True: + repl_cnt = _replace_global_scope2_func(all_functions, verbosity) + if not repl_cnt: + break + total_repl += repl_cnt + if verbosity: + print(f"[G] Replaced count: {repl_cnt}") + return total_repl diff --git a/Simplify/simplify.py b/Simplify/simplify.py index 1df5f91..f511f10 100644 --- a/Simplify/simplify.py +++ b/Simplify/simplify.py @@ -36,23 +36,24 @@ def reg_is_constant(reg, value): return False # Variable is set to a constant value - if re.search(r"^[\(]*(Scope|ConstPool|<|true|false|Undefined|Null|null|[+-]?\d)", value): + if re.search(r"^[\(]*(Scope|ConstPool|ConstPoolLiteral|<|true|false|Undefined|Null|null|[+-]?\d)", value): return True # Variable is set to register[ConstPool[idx]] - if re.search(r"^[ra]\d+\[[\(]*ConstPool\[\d+\]", value): + if re.search(r"^[ra]\d+\[[\(]*(ConstPool|ConstPoolLiteral)\[\d+\]", value): return True return False def get_context_idx_from_var(var): - if var.was_overwritten: - return - pattern = r"Scope\[(\d+)\]" + #if var.was_overwritten: + # return + pattern = r"^Scope\[(\d+)\]$" match = re.match(pattern, var.value) if match: return int(match.group(1)) + return None @@ -66,9 +67,15 @@ def is_reg_defined_in_reg_value(reg, value): def create_loop_reg_scope(prev_reg_scope): + reg_scope = {} # Because loop regs can be overwritten during loop iteration we define prev scope as overwritten - reg_scope = {k: Register("", v.all_initialized_index[0], True) for k, v in prev_reg_scope.items() if - not isinstance(v, int)} + for k,v in prev_reg_scope.items(): + if isinstance(v, int): + continue + if get_context_idx_from_var(v) is not None: + reg_scope[k] = prev_reg_scope[k] + continue + reg_scope[k] = Register("", v.all_initialized_index[0], True) reg_scope["current_context"] = prev_reg_scope["current_context"] return reg_scope @@ -149,10 +156,10 @@ def replace_scope(match): scope_start, steps = scope.split("-") start_context = reg_scope['current_context'] - if scope_start in reg_scope: + if (scope_start in reg_scope) and (get_context_idx_from_var(reg_scope[scope_start]) is not None): start_context = get_context_idx_from_var(reg_scope[scope_start]) - elif scope_start in prev_reg_scope: + elif (scope_start in prev_reg_scope) and (get_context_idx_from_var(prev_reg_scope[scope_start]) is not None): start_context = get_context_idx_from_var(prev_reg_scope[scope_start]) return f"Scope[{function_context_stack.get_context(start_context, int(steps))}]" diff --git a/Translate/jump_blocks.py b/Translate/jump_blocks.py index 35809bb..c7e738a 100644 --- a/Translate/jump_blocks.py +++ b/Translate/jump_blocks.py @@ -1,12 +1,45 @@ class CodeLine: - def __init__(self, opcode="", line="", inst="", translated=""): + def __init__(self, opcode="", line="", inst="", translated="", decompiled=""): self.v8_opcode = opcode self.line_num = line self.v8_instruction = inst self.translated = translated - self.decompiled = "" + self.decompiled = decompiled self.visible = True - + self.metadata = None + + def set_metadata(self, meta_type, meta_val): + """ + Set metadata of a particular type for the code line + """ + if not self.metadata: + self.metadata = dict() + self.metadata[meta_type] = meta_val + + def get_metadata(self, meta_type): + """ + Retrieve metadata of particular type from the code line + """ + if not self.metadata: + return None + if not isinstance(self.metadata, dict): + return None + if meta_type not in self.metadata: + return None + return self.metadata[meta_type] + + def drop_metadata(self, meta_type): + """ + Remove metadata of particular type from the code line + """ + if not self.metadata: + return False + if not isinstance(self.metadata, dict): + return False + if not meta_type in self.metadata: + return False + self.metadata.pop(meta_type, None) + return True class JumpBlocks: def __init__(self, name, code, jump_table): @@ -23,10 +56,14 @@ def jump_done(self, jmp): def get_relative_offset(self, offset, n): # return a relative line offset to a given offset - new_offset = self.code_offset.index(offset) + n - if 0 <= new_offset <= len(self.code_offset): - return self.code_offset[new_offset] - raise Exception(f"relative offset {new_offset} from {offset} out of range") + try: + base_idx = self.code_offset.index(offset) + except ValueError: + raise KeyError(f"offset {offset} not found in code offsets") + new_idx = base_idx + n + if 0 <= new_idx < len(self.code_offset): + return self.code_offset[new_idx] + raise IndexError(f"relative offset {new_idx} from {offset} out of range") def get_all_jump_list(self): # Combine all jumps from the jump tables into one list diff --git a/Translate/translate_table.py b/Translate/translate_table.py index 364cd54..1aed0a8 100644 --- a/Translate/translate_table.py +++ b/Translate/translate_table.py @@ -174,11 +174,11 @@ def get_scope_id(args): "LdaLookupSlot": lambda obj: f"ACCU = ConstPool{obj.args[0]}", "LdaContextSlot": lambda obj: f"ACCU = Scope[{get_scope_id(obj.args)}]{obj.args[1]}", "LdaLookupContextSlot": lambda obj: f"ACCU = Scope[CURRENT-{obj.args[2][1:-1]}]{obj.args[1]}", - "LdaConstant": lambda obj: f"ACCU = ConstPool{obj.args[0]}", - "LdaNamedProperty": lambda obj: f"ACCU = {obj.args[0]}[ConstPool{obj.args[1]}]", - "LdaNamedPropertyFromSuper": lambda obj: f"ACCU = ACCU[ConstPool{obj.args[1]}]", - "GetNamedPropertyFromSuper": lambda obj: f"ACCU = ACCU[ConstPool{obj.args[1]}]", - "GetNamedProperty": lambda obj: f"ACCU = {obj.args[0]}[ConstPool{obj.args[1]}]", + "LdaConstant": lambda obj: f"ACCU = ConstPoolLiteral{obj.args[0]}", + "LdaNamedProperty": lambda obj: f"ACCU = {obj.args[0]}[ConstPoolLiteral{obj.args[1]}]", + "LdaNamedPropertyFromSuper": lambda obj: f"ACCU = ACCU[ConstPoolLiteral{obj.args[1]}]", + "GetNamedPropertyFromSuper": lambda obj: f"ACCU = ACCU[ConstPoolLiteral{obj.args[1]}]", + "GetNamedProperty": lambda obj: f"ACCU = {obj.args[0]}[ConstPoolLiteral{obj.args[1]}]", "GetKeyedProperty": lambda obj: f"ACCU = {obj.args[0]}[ACCU]", "GetTemplateObject": lambda obj: f"ACCU = ConstPool{obj.args[0]}", "LdaKeyedProperty": lambda obj: f"ACCU = {obj.args[0]}[ACCU]", @@ -214,14 +214,14 @@ def get_scope_id(args): # "StaLookupContextSlot": lambda obj: f"Scope[{get_scope_id(obj.args)}]{obj.args[1]} = ACCU", "StaCurrentContextSlot": lambda obj: f"Scope[CURRENT]{obj.args[0]} = ACCU", "StaInArrayLiteral": lambda obj: f"{obj.args[0]}[{obj.args[1]}] = ACCU", - "StaNamedOwnProperty": lambda obj: f"{obj.args[0]}[ConstPool{obj.args[1]}] = ACCU", - "StaNamedProperty": lambda obj: f"{obj.args[0]}[ConstPool{obj.args[1]}] = ACCU", + "StaNamedOwnProperty": lambda obj: f"{obj.args[0]}[ConstPoolLiteral{obj.args[1]}] = ACCU", + "StaNamedProperty": lambda obj: f"{obj.args[0]}[ConstPoolLiteral{obj.args[1]}] = ACCU", "StaKeyedProperty": lambda obj: f"{obj.args[0]}[{obj.args[1]}] = ACCU", "StaKeyedPropertyAsDefine": lambda obj: f"{obj.args[0]}[{obj.args[1]}] = ACCU", "StaDataPropertyInLiteral": lambda obj: f"{obj.args[0]}.{obj.args[1]} = ACCU", - "SetNamedProperty": lambda obj: f"{obj.args[0]}[ConstPool{obj.args[1]}] = ACCU", + "SetNamedProperty": lambda obj: f"{obj.args[0]}[ConstPoolLiteral{obj.args[1]}] = ACCU", "SetKeyedProperty": lambda obj: f"{obj.args[0]}[{obj.args[1]}] = ACCU", - "DefineNamedOwnProperty": lambda obj: f"{obj.args[0]}[ConstPool{obj.args[1]}] = ACCU", + "DefineNamedOwnProperty": lambda obj: f"{obj.args[0]}[ConstPoolLiteral{obj.args[1]}] = ACCU", "DefineKeyedOwnPropertyInLiteral": lambda obj: f"{obj.args[0]}[{obj.args[1]}] = ACCU", "DefineKeyedOwnProperty": lambda obj: f"{obj.args[0]}[{obj.args[1]}] = ACCU", diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e97e1fc --- /dev/null +++ b/__init__.py @@ -0,0 +1,7 @@ +import sys +import os + +_view8_dir = os.path.dirname(os.path.abspath(__file__)) +if _view8_dir not in sys.path: + sys.path.append(_view8_dir) + \ No newline at end of file diff --git a/view8.py b/view8.py old mode 100644 new mode 100755 index 171d834..af34f21 --- a/view8.py +++ b/view8.py @@ -1,8 +1,20 @@ +#!/usr/bin/env python3 import argparse import os + from Parser.parse_v8cache import parse_v8cache_file, parse_disassembled_file +from Parser.shared_function_info import GlobalVars, load_functions_from_file from Simplify.global_scope_replace import replace_global_scope +from view8_util import ( + export_to_file, + find_functions_by_name, + get_start_function, + print_funcs, + save_trees, + split_trees, +) +#### def disassemble(in_file, input_is_disassembled, disassembler): out_name = 'disasm.tmp' @@ -18,38 +30,114 @@ def disassemble(in_file, input_is_disassembled, disassembler): def decompile(all_functions): - # Decompile + global_vars = GlobalVars() print(f"Decompiling {len(all_functions)} functions.") for name in list(all_functions)[::-1]: - all_functions[name].decompile() - # replace_global_scope(all_functions) + all_functions[name].decompile(global_vars) + +def propagate_global_scope(all_func, verbosity): + if replace_global_scope(all_func, verbosity): + if verbosity: + print("Replace global scope done.") + return True + return False +### -def export_to_file(out_name, all_functions, format_list): - print(f"Exporting to file {out_name}.") - with open(out_name, "w") as f: - for function_name in list(all_functions)[::-1]: - f.write(all_functions[function_name].export(export_v8code="v8_opcode" in format_list, export_translated="translated" in format_list, export_decompiled="decompiled" in format_list)) - +def load_functions_set(filename): + try: + with open(filename, "r") as file: + deobf_funcs = set() + for line in file: + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + deobf_funcs.add(stripped) + return deobf_funcs + except FileNotFoundError: + return None + return None def main(): parser = argparse.ArgumentParser(description="View8: V8 cache decompiler.") - parser.add_argument('input_file', help="The input file name.") - parser.add_argument('output_file', help="The output file name.") - parser.add_argument('--path', '-p', help="Path to disassembler binary.", default=None) - parser.add_argument('--disassembled', '-d', action='store_true', help="Indicate if the input file is already disassembled.") - parser.add_argument('--export_format', '-e', nargs='+', choices=['v8_opcode', 'translated', 'decompiled'], - help="Specify the export format(s). Options are 'v8_opcode', 'translated', and 'decompiled'. Multiple options can be combined.", + parser.add_argument('--input_format', '-f', choices=['raw', 'serialized', 'disassembled'], + help="Specify the input format. Options are: 'raw', 'serialized' (pickle; trusted input only), 'disassembled'.", default='raw') + parser.add_argument('--inp', '-i', help="The input file name.", default=None, required=True) + parser.add_argument('--out', '-o', help="The output file name.", default=None) + parser.add_argument('--path', '-p', help="Path to disassembler binary. Required if the input is in the raw format.", default=None) + parser.add_argument('--export_format', '-e', nargs='+', choices=['v8_opcode', 'translated', 'decompiled', 'serialized'], + help="Specify the export format(s). Options are 'v8_opcode', 'translated', 'decompiled', and 'serialized'. Multiple options can be combined.", default=['decompiled']) - + parser.add_argument('--scope', help="Propagate scope arguments.", default=1, type=int, required=False) + parser.add_argument('--tree', '-t', help="Show functions tree, starting from a given node. To start from the default main function, use 'start'", default=None) + parser.add_argument('--mainlimit', '-l', help="In tree mode: a tree with depth above this limit will be treated as different module than main", type=int, default=1) + parser.add_argument('--include', '-n', help="Functions to Include (file containing a list)", default=None) + parser.add_argument('--exclude', '-x', help="Functions to Exclude (file containing a list)", default=None) + parser.add_argument('--func', help="A function to be displayed.", default=None, required=False) + parser.add_argument('--show_all', help="Should show lines marked as hidden (in function display mode)", default=False, required=False, action='store_true') + parser.add_argument('--verbosity', '-v', help="Verbosity level (0-3)", default=0, type=int, required=False) args = parser.parse_args() - if not os.path.isfile(args.input_file): - raise FileNotFoundError(f"The input file {args.input_file} does not exist.") + if not os.path.isfile(args.inp): + raise FileNotFoundError(f"The input file {args.inp} does not exist.") + + funcs_to_include = None + if args.include: + funcs_to_include = load_functions_set(args.include) + if funcs_to_include: + print(f"Include: {len(funcs_to_include)} functions") + + funcs_to_exclude = None + if args.exclude: + funcs_to_exclude = load_functions_set(args.exclude) + if funcs_to_exclude: + print(f"Exclude: {len(funcs_to_exclude)} functions") + + if args.input_format == 'serialized': + print(f"Reading from serialized, already decompiled input: {args.inp}") + all_func = load_functions_from_file(args.inp) + else: + disassembled = False + if args.input_format == 'disassembled': + disassembled = True + all_func = disassemble(args.inp, disassembled, args.path) + decompile(all_func) + + if args.scope: + print("Propagating scope arguments...") + propagate_global_scope(all_func, args.verbosity) + + # print a single selected function: + if args.func: + func_name = args.func + filtered = find_functions_by_name(all_func, func_name) + if func_name not in filtered: + print(f"Function {func_name} was not found. Found {len(filtered)} similar names.") + for key in filtered: + print(key) + if len(filtered) == 0: + return + print_funcs(filtered, args.show_all) + return + + if args.tree: + tree_root = args.tree + if tree_root == "start": + tree_root = get_start_function(all_func) + if not tree_root: + print("Error: tree root function not found.") + return + items_map = split_trees(all_func, tree_root) + if items_map is None: + print(f"Error: could not build tree from root '{tree_root}'.") + return + if args.out: + save_trees(all_func, tree_root, args.mainlimit, items_map, args.out, args.export_format, funcs_to_exclude) + print(f"Done.") + return - all_func = disassemble(args.input_file, args.disassembled, args.path) - decompile(all_func) - export_to_file(args.output_file, all_func, args.export_format) + if args.out: + export_to_file(args.out, all_func, args.export_format, funcs_to_include, funcs_to_exclude) print(f"Done.") diff --git a/view8_util.py b/view8_util.py new file mode 100644 index 0000000..95fadc6 --- /dev/null +++ b/view8_util.py @@ -0,0 +1,304 @@ +#!/usr/bin/env python3 +import os +import re + +from Parser.shared_function_info import SharedFunctionInfo, save_functions_to_file + +def is_root(sfi): + if sfi is None: + return False + if sfi.declarer is None: + return True + return False + +def get_start_function(functions): + if not functions: + return None + for curr_func in functions: + sfi = functions.get(curr_func) + if is_root(sfi): + return curr_func + if (sfi is None) or (sfi.declarer is None): + break + curr_func = sfi.declarer + return None + +def get_declared_children(functions, curr_func): + children_list = [] + for func_name, sfi in functions.items(): + declarer = sfi.declarer + if declarer == curr_func: + children_list.append(func_name) + return children_list + +def next_visible_line(func, indx, is_backward=False): + """ + Next non-empty, visible line + Returns Index + """ + if is_backward: + step = (-1) + else: + step = 1 + indx += step + while (indx >= 0 and indx < len(func.code)): + if ((not func.code[indx].visible) or (not func.code[indx].decompiled) or (func.code[indx].decompiled.strip() == "")): + indx += step + continue + return indx + return None + +### + +def _rename_functions( + functions: dict[str, SharedFunctionInfo], + renamed_dict: dict[str, str], +) -> int: + renamed_count = 0 + new_functions: dict[str, SharedFunctionInfo] = {} + + for name, func in functions.items(): + if name in renamed_dict: + new_name = renamed_dict[name] + func.name = new_name + if func.declarer in renamed_dict: + func.declarer = renamed_dict[func.declarer] + new_functions[new_name] = func + renamed_count += 1 + else: + new_functions[name] = func + + # mutate the original dict in place (if callers hold a reference) + functions.clear() + functions.update(new_functions) + return renamed_count + +def rename_functions_in_code( + functions: dict[str, SharedFunctionInfo], + renamed_dict: dict[str, str], + verbosity: int +) -> int: + + renamed = _rename_functions(functions, renamed_dict) + if verbosity: + print(f"Renamed functions: {renamed}") + + func_pattern = r'\b(func_[a-zA-Z0-9_$]+_0x[0-9a-fA-F]+)\b' + regex = re.compile(func_pattern) + + for func in functions.values(): + for indx in range(len(func.code)): + if (not func.code[indx].visible) or (not func.code[indx].decompiled): + continue + line = func.code[indx].decompiled + # Replace only if the found name is in renamed_dict + def repl(m): + name = m.group(1) + return renamed_dict.get(name, name) + + new_line = regex.sub(repl, line) + + if new_line != line: + func.code[indx].decompiled = new_line + if verbosity: + print(f"REPL: {line.strip()} -> {new_line.strip()}") + return renamed + +### + +def print_func(func_name, func, show_hidden=False, show_line_num=True, show_const=False, show_line_meta=False): + print("###") + print(f"# {func_name}") + print(f"# Declarer: {func.declarer}") + if func.metadata: + print(f"Metadata: {func.metadata}") + if show_const: + print(f"# Const Pool") + print(func.const_pool) + print(f"# Code") + indx = 0 + i = 0 + for i in range(len(func.code)): + line_obj = func.code[i] + if not line_obj.decompiled: + continue + if not show_hidden and not line_obj.visible: + continue + indx += 1 + line = line_obj.decompiled + + meta = "" + if show_line_meta: + if line_obj.metadata: + meta = f" # {line_obj.metadata}" + + if show_line_num: + if indx != i: + print(f"{indx}|{i}|{line}{meta}") + else: + print(f"{indx}|{line}{meta}") + else: + print(f"{line}") +# + +def print_funcs(functions, show_hidden=False, show_line_num=True, show_const=False, show_line_meta=False): + for func_name, func in functions.items(): + print_func(func_name, func, show_hidden, show_line_num, show_const, show_line_meta) + +def find_functions_by_name(all_func, name): + funcs = dict() + if name in all_func: + funcs[name] = all_func[name] + return funcs + sub_name = "_" + name + "_" + for key in all_func: + if sub_name in key: + funcs[key] = all_func[key] + return funcs +### + +def build_declaration_map(functions): + declared_by = {} + + for func_name, sfi in functions.items(): + declarer = sfi.declarer + if declarer: + if declarer not in declared_by: + declared_by[declarer] = [] + declared_by[declarer].append(func_name) + + return declared_by + +def remove_exclude_functions(all_functions, exclude_list): + declaration_table = build_declaration_map(all_functions) + number_of_function = len(exclude_list) + while exclude_list: + current_function = exclude_list.pop() + all_functions.pop(current_function, None) + next_level = declaration_table.get(current_function, []) + number_of_function += len(next_level) + exclude_list += next_level + print(f"Removed {number_of_function} functions") + +def get_included_functions(all_functions, include_list): + declaration_table = build_declaration_map(all_functions) + number_of_function = len(include_list) + new_all_func = {} + while include_list: + current_function = include_list.pop() + new_all_func[current_function] = all_functions[current_function] + next_level = declaration_table.get(current_function, []) + number_of_function += len(next_level) + include_list += next_level + return new_all_func +### + +def _export_to_file(out_name, all_functions, format_list, included_list=None, excluded_list=None): + with open(out_name, "w", encoding="utf-8") as f: + print(f"Exporting to file {out_name}") + for function_name in list(all_functions)[::-1]: + include = True + if (excluded_list is not None and len(excluded_list)) and (function_name in excluded_list): + include = False + if (included_list is not None and len(included_list)) and (function_name not in included_list): + include = False + if not all_functions[function_name].visible: + include = False + if not include: + continue + f.write(all_functions[function_name].export(export_v8code="v8_opcode" in format_list, export_translated="translated" in format_list, export_decompiled="decompiled" in format_list)) + +def _get_extension(filename): + ext = None + if '.' in filename: + ext = '.' + filename.rsplit('.', 1)[-1] + return ext + +def _add_or_replace_extension(filename, new_ext): + if not new_ext.startswith('.'): + new_ext = '.' + new_ext + base = filename.rsplit('.', 1)[0] if '.' in filename else filename + return base + new_ext + +def export_to_file(out_name, all_functions, format_list, included_list=None, excluded_list=None): + """ + Saves the decompiled functions into a file of a given format. + + :param out_name: Name of the output file (the extension may be autoadjusted to the output format) + :param all_functions: Decompiled functions to be exported + :param format_list: A list defining the format/s that will be used to export the content. + :param included_list: If defined: only functions to be included in the output (param ignored in `serialized` format) + :param excluded_list: If defined: functions to be excluded from the output (param ignored in `serialized` format) + """ + serialize_only = False + serialized_ext = ".pkl" + text_ext = ".txt" + if ('serialized' in format_list): + if len(format_list) == 1: + serialize_only = True + + serialized_name = _add_or_replace_extension(out_name, serialized_ext) + print(f"Serializing to file: {serialized_name}") + save_functions_to_file(all_functions, serialized_name) + + if serialize_only: + return + if _get_extension(out_name) == serialized_ext: + out_name = _add_or_replace_extension(out_name, text_ext) + _export_to_file(out_name, all_functions, format_list, included_list, excluded_list) + +### + +def split_trees(functions, curr_func): + sfi = functions.get(curr_func) + if sfi is None: + return None + print("Tree root: " + sfi.name) + if sfi.declarer is None: + print("Declarer Root") + else: + print("Parent: " + sfi.declarer) + children = get_declared_children(functions, curr_func) + my_map = dict() + for c in children: + family = get_included_functions(functions, [c]) + my_map[c] = family + sorted_map = dict(sorted(my_map.items(), key=lambda item: len(item[1]))) + return sorted_map + +def create_dirs(nested_directory): + is_ok = False + try: + os.makedirs(nested_directory) + is_ok = True + except FileExistsError: + is_ok = True + except Exception as e: + print(f"An error occurred: {e}") + return is_ok + +def save_trees(all_functions, main_func, main_limit, items_map, out_dir, export_format, excluded_list): + # export the root function and directly related: + main_set = [main_func] + for name, filtered_func in items_map.items(): + if len(filtered_func) <= main_limit: + main_set += filtered_func + file_name = f"{main_func}.txt" + create_dirs(out_dir) + out_path = os.path.join(out_dir, file_name) + export_to_file(out_path, all_functions, export_format, main_set, excluded_list) + + # export the subtrees: + for name, filtered_func in items_map.items(): + if len(filtered_func) <= main_limit: + continue #skip + print(f"Name: {name}, List Length: {len(filtered_func)}") + subdir = f"{len(filtered_func)}" + file_name = f"{name}.txt" + dirs = os.path.join(out_dir, subdir) + create_dirs(dirs) + out_path = os.path.join(out_dir, subdir, file_name) + export_to_file(out_path, all_functions, export_format, filtered_func, excluded_list) + +### +