-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathBasicFileInfo_Decode_V5.py
More file actions
198 lines (165 loc) · 5.22 KB
/
Copy pathBasicFileInfo_Decode_V5.py
File metadata and controls
198 lines (165 loc) · 5.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import argparse
import json
import locale
import pathlib
import re
import sys
STDOUT_ENCODING = sys.stdout.encoding or "utf-8"
BASE_DEFAULT = pathlib.Path("racbasicsamplefamily") / "BasicFileInfo.bin"
def safe_print(text: str = ""):
"""Print text without the console breaking on odd Unicode."""
try:
print(text)
except UnicodeEncodeError:
safe = text.encode(
STDOUT_ENCODING, errors="backslashreplace"
).decode(STDOUT_ENCODING, errors="backslashreplace")
print(safe)
def extract_utf16le_strings(blob: bytes, min_len: int = 4):
"""Scan blob as UTF-16 LE and collect readable substrings."""
results = []
current = []
def flush():
if len(current) >= min_len:
results.append("".join(current))
current.clear()
i = 0
while i + 1 < len(blob):
ch = blob[i]
nul = blob[i + 1]
if 32 <= ch <= 126 and nul == 0:
current.append(chr(ch))
else:
flush()
i += 2
flush()
return results
def decode_full_utf16le(blob: bytes):
"""Full UTF-16 LE decode with cleanup into individual lines."""
text = blob.decode("utf-16-le", errors="ignore")
text = text.replace("\x00", "")
lines = [line.strip() for line in text.replace("\r", "").split("\n")]
return [line for line in lines if line]
def extract_guids(text: str):
pattern = re.compile(
r"[0-9a-fA-F]{8}-"
r"[0-9a-fA-F]{4}-"
r"[0-9a-fA-F]{4}-"
r"[0-9a-fA-F]{4}-"
r"[0-9a-fA-F]{12}"
)
return sorted(set(pattern.findall(text)))
def parse_metadata_from_lines(lines):
"""
Heuristics to extract the key fields from BasicFileInfo.
Works well for your example and usually for other Revit files too.
"""
info: dict[str, object] = {}
joined = " ".join(lines)
tokens = joined.split()
# Revit version: first token with exactly 4 digits
for t in tokens:
if re.fullmatch(r"\d{4}", t):
info["revit_version"] = t
break
# Build string: something like 20190207_1515(x64)
for t in tokens:
if "_" in t and "(" in t and ")" in t:
info["build"] = t
break
# Original file path
original_path = ""
for i, t in enumerate(tokens):
if ":" in t and "\\" in t:
parts = [t]
for u in tokens[i + 1 :]:
parts.append(u)
if u.lower().endswith((".rfa", ".rvt")):
break
original_path = " ".join(parts)
break
if original_path:
info["original_path"] = original_path
# GUIDs
guids = extract_guids(joined)
if guids:
info["document_guid"] = guids[0]
if len(guids) > 1:
info["session_guid"] = guids[1]
if len(guids) > 2:
info["extra_guids"] = guids[2:]
# Platform bits: something like 64$
for t in tokens:
if t.endswith("$") and t[:-1].isdigit():
try:
info["platform_bits"] = int(t[:-1])
except ValueError:
pass
break
return info
def main():
parser = argparse.ArgumentParser(
description="Decode racbasicsamplefamily/BasicFileInfo.bin"
)
parser.add_argument(
"path",
nargs="?",
default=str(BASE_DEFAULT),
help="Path to BasicFileInfo.bin "
"(default: racbasicsamplefamily/BasicFileInfo.bin)",
)
parser.add_argument(
"--dump-lines",
action="store_true",
help="Show all UTF-16 LE lines",
)
parser.add_argument(
"--dump-substrings",
action="store_true",
help="Dump all found UTF-16 substrings",
)
parser.add_argument(
"--json-out",
action="store_true",
help="Write metadata to BasicFileInfo.json next to the bin",
)
args = parser.parse_args()
path = pathlib.Path(args.path)
if not path.exists():
safe_print(f"File not found: {path}")
return 1
blob = path.read_bytes()
safe_print(f"File: {path}")
safe_print(f"Size: {len(blob)} bytes")
safe_print()
# Full decode to lines
lines = decode_full_utf16le(blob)
# Extract metadata
meta = parse_metadata_from_lines(lines)
safe_print("Parsed metadata:")
safe_print("----------------")
for key in sorted(meta.keys()):
safe_print(f" {key}: {meta[key]}")
safe_print()
if args.json_out:
json_path = path.with_suffix(".json")
json_path.write_text(json.dumps(meta, indent=2), encoding="utf-8")
safe_print(f"Metadata written to: {json_path}")
safe_print()
# Optional: dump all UTF-16 LE lines
if args.dump_lines:
safe_print("UTF-16LE lines (full decode):")
safe_print("--------------------------------")
for line in lines:
safe_print(f" {line}")
safe_print()
# Optional: brute-force substring scan
if args.dump_substrings:
safe_print("Extracted UTF-16LE substrings (scan):")
safe_print("-------------------------------------")
for s in extract_utf16le_strings(blob):
safe_print(f" {s}")
safe_print()
return 0
if __name__ == "__main__":
raise SystemExit(main())