-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path_Extract_RFA_V2.py
More file actions
150 lines (115 loc) · 4.14 KB
/
Copy path_Extract_RFA_V2.py
File metadata and controls
150 lines (115 loc) · 4.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import sys
from pathlib import Path
import locale
import olefile
STDOUT_ENCODING = sys.stdout.encoding or locale.getpreferredencoding(False)
def safe_print(text: str = ""):
"""Print text without UnicodeEncodeError."""
try:
print(text)
except UnicodeEncodeError:
safe = text.encode(
STDOUT_ENCODING,
errors="replace"
).decode(
STDOUT_ENCODING,
errors="replace"
)
print(safe)
def hexdump(data: bytes, max_bytes: int = 64):
"""Make a simple hexdump of the first max_bytes."""
data = data[:max_bytes]
hex_parts = []
ascii_parts = []
for i, b in enumerate(data):
hex_parts.append(f"{b:02X}")
ascii_parts.append(chr(b) if 32 <= b < 127 else ".")
# blocks of 16 bytes
if (i + 1) % 16 == 0:
yield "{:<48} {}".format(" ".join(hex_parts), "".join(ascii_parts))
hex_parts = []
ascii_parts = []
if hex_parts:
yield "{:<48} {}".format(" ".join(hex_parts), "".join(ascii_parts))
def extract_ascii_strings(data: bytes, min_len: int = 4):
"""Search for readable ASCII strings in the data."""
result = []
current = bytearray()
for b in data:
if 32 <= b <= 126:
current.append(b)
else:
if len(current) >= min_len:
try:
result.append(current.decode("ascii", errors="ignore"))
except Exception:
pass
current = bytearray()
if len(current) >= min_len:
try:
result.append(current.decode("ascii", errors="ignore"))
except Exception:
pass
return result
def parse_basic_file_info(data: bytes):
"""
Try to convert BasicFileInfo to readable lines.
This is often UTF-16 LE with many null bytes.
"""
try:
txt = data.decode("utf-16-le", errors="ignore")
except Exception:
txt = data.decode("latin1", errors="ignore")
txt = txt.replace("\x00", "")
lines = [line.strip() for line in txt.replace("\r", "").split("\n")]
lines = [l for l in lines if l]
return lines
def inspect_rfa(path: Path):
if not path.exists():
safe_print(f"File not found: {path}")
sys.exit(1)
report_dir = path.with_suffix("") # e.g. racbasicsamplefamily
report_dir.mkdir(exist_ok=True)
safe_print(f"File: {path}")
safe_print(f"Report folder: {report_dir}")
safe_print()
with olefile.OleFileIO(str(path)) as ole:
streams = ole.listdir(streams=True, storages=False)
for stream in streams:
# For display
display_name = "/".join(stream)
# For files on disk, without subfolders
file_stub = "_".join(stream)
data = ole.openstream(stream).read()
size = len(data)
safe_print("=" * 80)
safe_print(f"STREAM: {display_name}")
safe_print(f"Size: {size} bytes")
safe_print("\nHexdump (first 64 bytes):")
for line in hexdump(data, max_bytes=64):
safe_print(line)
# special handling for BasicFileInfo
if display_name == "BasicFileInfo":
safe_print("\nBasicFileInfo (attempted decode):")
lines = parse_basic_file_info(data)
for l in lines:
safe_print(" " + l)
# extract strings for all streams (print only)
strings_found = extract_ascii_strings(data, min_len=4)
if strings_found:
safe_print("\nASCII strings (selection):")
for s in strings_found[:20]:
safe_print(" " + s)
# write raw data for possible further analysis
raw_file = report_dir / f"{file_stub}.bin"
raw_file.write_bytes(data)
safe_print()
safe_print("=" * 80)
safe_print("Done. For each stream there is a .bin in:")
safe_print(str(report_dir))
if __name__ == "__main__":
if len(sys.argv) > 1:
rfa_path = Path(sys.argv[1])
else:
rfa_path = Path("racbasicsamplefamily.rfa")
inspect_rfa(rfa_path)