diff --git a/CHANGELOG.md b/CHANGELOG.md index 83e61e94..8ee2dec9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ### 0.39.1 * Fix data-only splits missing the initial `macro.inc` include. +* Make sure all plain text file read and writes have an explicit UTF-8 encoding. ### 0.39.0 diff --git a/src/splat/scripts/create_config.py b/src/splat/scripts/create_config.py index eed507cd..56ef8652 100644 --- a/src/splat/scripts/create_config.py +++ b/src/splat/scripts/create_config.py @@ -182,7 +182,7 @@ def create_n64_config(rom_path: Path): """ out_file = Path(f"{cleaned_basename}.yaml") - with out_file.open("w", newline="\n") as f: + with out_file.open("w", encoding="utf-8", newline="\n") as f: print(f"Writing config to {out_file}") f.write(header) f.write(segments) @@ -247,7 +247,7 @@ def create_n64_config(rom_path: Path): ) reloc_addrs.append("") if reloc_addrs: - with Path("reloc_addrs.txt").open("w", newline="\n") as f: + with Path("reloc_addrs.txt").open("w", encoding="utf-8", newline="\n") as f: print("Writing reloc_addrs.txt") f.write( "// Visit https://github.com/ethteck/splat/wiki/Advanced-Reloc for documentation about this file\n" @@ -265,7 +265,7 @@ def create_n64_config(rom_path: Path): ) if symbol_addrs: symbol_addrs.append("") - with Path("symbol_addrs.txt").open("w", newline="\n") as f: + with Path("symbol_addrs.txt").open("w", encoding="utf-8", newline="\n") as f: print("Writing symbol_addrs.txt") f.write( "// Visit https://github.com/ethteck/splat/wiki/Adding-Symbols for documentation about this file\n" @@ -363,7 +363,7 @@ def create_psx_config(exe_path: Path, exe_bytes: bytes): """ out_file = Path(f"{cleaned_basename}.yaml") - with out_file.open("w", newline="\n") as f: + with out_file.open("w", encoding="utf-8", newline="\n") as f: print(f"Writing config to {out_file}") f.write(header) f.write(segments) @@ -478,7 +478,7 @@ def do_elf(elf_path: Path, elf_bytes: bytes, objcopy: Optional[str]): """ out_file = Path(f"{cleaned_basename}.yaml") - with out_file.open("w", newline="\n") as f: + with out_file.open("w", encoding="utf-8", newline="\n") as f: print(f"Writing config to {out_file}") f.write(header) f.write(segments) @@ -493,7 +493,7 @@ def do_elf(elf_path: Path, elf_bytes: bytes, objcopy: Optional[str]): symbol_addrs.append(f"_start = 0x{elf.entrypoint:08X}; // type:func") if symbol_addrs: symbol_addrs.append("") - with Path("symbol_addrs.txt").open("w", newline="\n") as f: + with Path("symbol_addrs.txt").open("w", encoding="utf-8", newline="\n") as f: print("Writing symbol_addrs.txt") f.write( "// Visit https://github.com/ethteck/splat/wiki/Adding-Symbols for documentation about this file\n" @@ -506,7 +506,11 @@ def do_elf(elf_path: Path, elf_bytes: bytes, objcopy: Optional[str]): linker_script.append("ENTRY(_start);") if linker_script: linker_script.append("") - with Path("linker_script_extra.ld").open("w", newline="\n") as f: + with Path("linker_script_extra.ld").open( + "w", + encoding="utf-8", + newline="\n", + ) as f: print("Writing linker_script_extra.ld") f.write( "/* Pass this file to the linker with the `-T linker_script_extra.ld` flag */\n" diff --git a/src/splat/scripts/split.py b/src/splat/scripts/split.py index a4fc643b..98191acf 100644 --- a/src/splat/scripts/split.py +++ b/src/splat/scripts/split.py @@ -354,8 +354,7 @@ def do_split( if segment.should_split(): segment_bytes = rom_bytes if segment.file_path: - with open(segment.file_path, "rb") as segment_input_file: - segment_bytes = segment_input_file.read() + segment_bytes = segment.file_path.read_bytes() segment.split(segment_bytes) @@ -461,13 +460,17 @@ def write_elf_sections_file(all_segments: List[Segment]): for segment in all_segments: section_list += "." + segment.get_cname() + "\n" options.opts.elf_section_list_path.parent.mkdir(parents=True, exist_ok=True) - with options.opts.elf_section_list_path.open("w", newline="\n") as f: + with options.opts.elf_section_list_path.open( + "w", + encoding="utf-8", + newline="\n", + ) as f: f.write(section_list) def write_undefined_auto(to_write: List[symbols.Symbol], file_path: Path): file_path.parent.mkdir(parents=True, exist_ok=True) - with file_path.open("w", newline="\n") as f: + with file_path.open("w", encoding="utf-8", newline="\n") as f: for symbol in to_write: f.write(f"{symbol.name} = 0x{symbol.vram_start:X};\n") @@ -518,7 +521,7 @@ def dump_symbols() -> None: splat_hidden_folder = options.opts.base_path / ".splat" splat_hidden_folder.mkdir(parents=True, exist_ok=True) - with open(splat_hidden_folder / "splat_symbols.csv", "w") as f: + with open(splat_hidden_folder / "splat_symbols.csv", "w", encoding="utf-8") as f: f.write( "vram_start,given_name,name,type,given_size,size,rom,defined,user_declared,referenced,extract\n" ) diff --git a/src/splat/segtypes/common/bin.py b/src/splat/segtypes/common/bin.py index 7c4b80d9..bf3699b8 100644 --- a/src/splat/segtypes/common/bin.py +++ b/src/splat/segtypes/common/bin.py @@ -28,11 +28,10 @@ def split(self, rom_bytes): if self.size is None or self.size <= 0: log.error(f"Segment {self.name} has zero size.") - with open(path, "wb") as f: - assert isinstance(self.rom_start, int) - assert isinstance(self.rom_end, int) + assert isinstance(self.rom_start, int) + assert isinstance(self.rom_end, int) + path.write_bytes(rom_bytes[self.rom_start : self.rom_end]) - f.write(rom_bytes[self.rom_start : self.rom_end]) self.log(f"Wrote {self.name} to {path}") @property diff --git a/src/splat/segtypes/common/c.py b/src/splat/segtypes/common/c.py index 77990eb4..7cbf0b23 100644 --- a/src/splat/segtypes/common/c.py +++ b/src/splat/segtypes/common/c.py @@ -51,8 +51,7 @@ def replacer(match): @staticmethod def get_funcs_defined_in_c(c_file: Path) -> Set[str]: - with open(c_file, "r", encoding="utf-8") as f: - text = CommonSegC.strip_c_comments(f.read()) + text = CommonSegC.strip_c_comments(c_file.read_text(encoding="utf-8")) return set(m.group(1) for m in C_FUNC_RE.finditer(text)) @@ -105,8 +104,7 @@ def find_include_rodata(text: str): @staticmethod def get_global_asm_funcs(c_file: Path) -> Set[str]: - with c_file.open(encoding="utf-8") as f: - text = CommonSegC.strip_c_comments(f.read()) + text = CommonSegC.strip_c_comments(c_file.read_text(encoding="utf-8")) if options.opts.compiler == IDO: return set(m.group(2) for m in C_GLOBAL_ASM_IDO_RE.finditer(text)) else: @@ -114,8 +112,7 @@ def get_global_asm_funcs(c_file: Path) -> Set[str]: @staticmethod def get_global_asm_rodata_syms(c_file: Path) -> Set[str]: - with c_file.open(encoding="utf-8") as f: - text = CommonSegC.strip_c_comments(f.read()) + text = CommonSegC.strip_c_comments(c_file.read_text(encoding="utf-8")) if options.opts.compiler == IDO: return set(m.group(2) for m in C_GLOBAL_ASM_IDO_RE.finditer(text)) else: @@ -351,7 +348,7 @@ def create_c_asm_file( outpath.parent.mkdir(parents=True, exist_ok=True) - with outpath.open("w", newline="\n") as f: + with outpath.open("w", encoding="utf-8", newline="\n") as f: if options.opts.asm_inc_header: f.write( options.opts.c_newline.join(options.opts.asm_inc_header.split("\n")) @@ -389,7 +386,7 @@ def create_unmigrated_rodata_file( outpath.parent.mkdir(parents=True, exist_ok=True) - with outpath.open("w", newline="\n") as f: + with outpath.open("w", encoding="utf-8", newline="\n") as f: preamble = options.opts.generated_s_preamble if preamble: f.write(preamble + "\n") @@ -480,7 +477,7 @@ def create_c_file( c_lines += self.get_c_lines_for_rodata_sym(rodata_sym, asm_out_dir) c_path.parent.mkdir(parents=True, exist_ok=True) - with c_path.open("w", newline=options.opts.c_newline) as f: + with c_path.open("w", encoding="utf-8", newline=options.opts.c_newline) as f: f.write("\n".join(c_lines)) log.write(f"Wrote {self.name} to {c_path}") diff --git a/src/splat/segtypes/common/codesubsegment.py b/src/splat/segtypes/common/codesubsegment.py index f4b07a6b..8b7de679 100644 --- a/src/splat/segtypes/common/codesubsegment.py +++ b/src/splat/segtypes/common/codesubsegment.py @@ -245,7 +245,7 @@ def split_as_asm_file(self, out_path: Optional[Path]): self.print_file_boundaries() - with open(out_path, "w", newline="\n") as f: + with out_path.open("w", encoding="utf-8", newline="\n") as f: # Write `.text` contents for line in self.get_asm_file_header(): f.write(line + "\n") @@ -257,7 +257,7 @@ def split_as_asmtu_file(self, out_path: Path): self.print_file_boundaries() - with open(out_path, "w", newline="\n") as f: + with open(out_path, "w", encoding="utf-8", newline="\n") as f: for line in self.get_asm_file_header(): f.write(line + "\n") diff --git a/src/splat/segtypes/common/databin.py b/src/splat/segtypes/common/databin.py index 25ffd257..550c0cb7 100644 --- a/src/splat/segtypes/common/databin.py +++ b/src/splat/segtypes/common/databin.py @@ -35,7 +35,7 @@ def split(self, rom_bytes): assert s_path is not None s_path.parent.mkdir(parents=True, exist_ok=True) - with s_path.open("w") as f: + with s_path.open("w", encoding="utf-8") as f: f.write('.include "macro.inc"\n\n') preamble = options.opts.generated_s_preamble if preamble: diff --git a/src/splat/segtypes/common/header.py b/src/splat/segtypes/common/header.py index f8c41c66..c0c2b1ad 100644 --- a/src/splat/segtypes/common/header.py +++ b/src/splat/segtypes/common/header.py @@ -37,7 +37,7 @@ def split(self, rom_bytes): src_path = self.out_path() src_path.parent.mkdir(parents=True, exist_ok=True) - with open(src_path, "w", newline="\n") as f: + with src_path.open("w", encoding="utf-8", newline="\n") as f: f.write("\n".join(header_lines)) self.log(f"Wrote {self.name} to {src_path}") diff --git a/src/splat/segtypes/common/rodatabin.py b/src/splat/segtypes/common/rodatabin.py index f3e8575a..8219c5e3 100644 --- a/src/splat/segtypes/common/rodatabin.py +++ b/src/splat/segtypes/common/rodatabin.py @@ -35,7 +35,7 @@ def split(self, rom_bytes): assert s_path is not None s_path.parent.mkdir(parents=True, exist_ok=True) - with s_path.open("w") as f: + with s_path.open("w", encoding="utf-8") as f: f.write('.include "macro.inc"\n\n') preamble = options.opts.generated_s_preamble if preamble: diff --git a/src/splat/segtypes/common/textbin.py b/src/splat/segtypes/common/textbin.py index e24eb6f7..2b1db88e 100644 --- a/src/splat/segtypes/common/textbin.py +++ b/src/splat/segtypes/common/textbin.py @@ -153,7 +153,7 @@ def split(self, rom_bytes): s_path.parent.mkdir(parents=True, exist_ok=True) - with s_path.open("w") as f: + with s_path.open("w", encoding="utf-8") as f: f.write('.include "macro.inc"\n\n') preamble = options.opts.generated_s_preamble if preamble: diff --git a/src/splat/segtypes/linker_entry.py b/src/splat/segtypes/linker_entry.py index 192a203d..be7c3851 100644 --- a/src/splat/segtypes/linker_entry.py +++ b/src/splat/segtypes/linker_entry.py @@ -45,13 +45,13 @@ def path_to_object_path(path: Path) -> Path: def write_file_if_different(path: Path, new_content: str): if path.exists(): - old_content = path.read_text() + old_content = path.read_text(encoding="utf-8") else: old_content = "" if old_content != new_content: path.parent.mkdir(parents=True, exist_ok=True) - with path.open("w", newline=options.opts.c_newline) as f: + with path.open("w", encoding="utf-8", newline=options.opts.c_newline) as f: f.write(new_content) diff --git a/src/splat/segtypes/n64/decompressor.py b/src/splat/segtypes/n64/decompressor.py index 1d992212..94369129 100644 --- a/src/splat/segtypes/n64/decompressor.py +++ b/src/splat/segtypes/n64/decompressor.py @@ -14,14 +14,13 @@ def split(self, rom_bytes): ) out_path = out_dir / f"{self.name}.bin" - with open(out_path, "wb") as f: - assert isinstance(self.rom_start, int) - assert isinstance(self.rom_end, int) - - self.log(f"Decompressing {self.name}") - compressed_bytes = rom_bytes[self.rom_start : self.rom_end] - decompressed_bytes = self.decompress(compressed_bytes) - f.write(decompressed_bytes) + assert isinstance(self.rom_start, int) + assert isinstance(self.rom_end, int) + + self.log(f"Decompressing {self.name}") + compressed_bytes = rom_bytes[self.rom_start : self.rom_end] + decompressed_bytes = self.decompress(compressed_bytes) + out_path.write_bytes(decompressed_bytes) self.log(f"Wrote {self.name} to {out_path}") def get_linker_entries(self): diff --git a/src/splat/segtypes/n64/gfx.py b/src/splat/segtypes/n64/gfx.py index dbf0afd3..1ef52346 100644 --- a/src/splat/segtypes/n64/gfx.py +++ b/src/splat/segtypes/n64/gfx.py @@ -259,10 +259,11 @@ def light_sub_func(match): return out_str def split(self, rom_bytes: bytes): - if self.file_text and self.out_path(): - self.out_path().parent.mkdir(parents=True, exist_ok=True) + out_path = self.out_path() + if self.file_text and out_path: + out_path.parent.mkdir(parents=True, exist_ok=True) - with open(self.out_path(), "w", newline="\n") as f: + with out_path.open("w", encoding="utf-8", newline="\n") as f: f.write(self.file_text) def should_scan(self) -> bool: diff --git a/src/splat/segtypes/n64/vtx.py b/src/splat/segtypes/n64/vtx.py index acf4bcbb..4f54726f 100644 --- a/src/splat/segtypes/n64/vtx.py +++ b/src/splat/segtypes/n64/vtx.py @@ -89,10 +89,11 @@ def disassemble_data(self, rom_bytes) -> str: return "\n".join(lines) def split(self, rom_bytes: bytes): - if self.file_text and self.out_path(): - self.out_path().parent.mkdir(parents=True, exist_ok=True) + out_path = self.out_path() + if self.file_text and out_path: + out_path.parent.mkdir(parents=True, exist_ok=True) - with open(self.out_path(), "w", newline="\n") as f: + with out_path.open("w", encoding="utf-8", newline="\n") as f: f.write(self.file_text) def should_scan(self) -> bool: diff --git a/src/splat/util/conf.py b/src/splat/util/conf.py index ea45069b..2877c45a 100644 --- a/src/splat/util/conf.py +++ b/src/splat/util/conf.py @@ -78,8 +78,8 @@ def load( config: Dict[str, Any] = {} for entry in config_path: - with entry.open() as f: - additional_config = yaml.load(f.read(), Loader=yaml.SafeLoader) + entry_text = entry.read_text(encoding="utf-8") + additional_config = yaml.load(entry_text, Loader=yaml.SafeLoader) config = _merge_configs(config, additional_config, entry) vram_classes.initialize(config.get("vram_classes")) diff --git a/src/splat/util/file_presets.py b/src/splat/util/file_presets.py index 7965f68b..ea940ea6 100644 --- a/src/splat/util/file_presets.py +++ b/src/splat/util/file_presets.py @@ -28,11 +28,10 @@ def _write(filepath: str, contents: str): p.parent.mkdir(parents=True, exist_ok=True) if p.exists(): - with p.open("r", encoding="UTF-8") as f: - existing_contents = f.read() + existing_contents = p.read_text(encoding="utf-8") if existing_contents == contents: return - with p.open("w", encoding="UTF-8", newline="\n") as f: + with p.open("w", encoding="utf-8", newline="\n") as f: f.write(contents) diff --git a/src/splat/util/n64/find_code_length.py b/src/splat/util/n64/find_code_length.py index 1f9053ca..8400a776 100755 --- a/src/splat/util/n64/find_code_length.py +++ b/src/splat/util/n64/find_code_length.py @@ -1,6 +1,7 @@ #! /usr/bin/env python3 import argparse +from pathlib import Path import rabbitizer import spimdisasm @@ -49,8 +50,7 @@ def run(rom_bytes, start_offset, vram, end_offset=None): def main(): args = parser.parse_args() - with open(args.rom, "rb") as f: - rom_bytes = f.read() + rom_bytes = Path(args.rom).read_bytes() start = args.start end = args.end diff --git a/src/splat/util/relocs.py b/src/splat/util/relocs.py index 37a7a01a..10a7a7b3 100644 --- a/src/splat/util/relocs.py +++ b/src/splat/util/relocs.py @@ -31,7 +31,7 @@ def initialize(): if not path.exists(): continue - with path.open() as f: + with path.open(encoding="utf-8") as f: sym_addrs_lines = f.readlines() prog_bar = progress_bar.get_progress_bar(sym_addrs_lines) diff --git a/src/splat/util/symbols.py b/src/splat/util/symbols.py index cc38bb10..600856c4 100644 --- a/src/splat/util/symbols.py +++ b/src/splat/util/symbols.py @@ -344,7 +344,7 @@ def initialize(all_segments: "List[Segment]"): # Manual list of func name / addrs for path in options.opts.symbol_addrs_paths: if path.exists(): - with open(path) as f: + with open(path, encoding="utf-8") as f: sym_addrs_lines = f.readlines() handle_sym_addrs(path, sym_addrs_lines, all_segments) diff --git a/test.py b/test.py index d38eac4d..f1dfc0ba 100755 --- a/test.py +++ b/test.py @@ -22,7 +22,10 @@ class Testing(unittest.TestCase): def compare_files(self, test_path, ref_path): - with io.open(test_path) as test_f, io.open(ref_path) as ref_f: + with ( + io.open(test_path, encoding="utf-8") as test_f, + io.open(ref_path, encoding="utf-8") as ref_f, + ): self.assertListEqual(list(test_f), list(ref_f)) def get_same_files(self, dcmp: filecmp.dircmp, out: List[Tuple[str, str, str]]): @@ -103,9 +106,9 @@ def test_basic_app(self): remove_from_diff.add(file) continue - with open(f"{file[1]}/{file[0]}") as file1: + with open(f"{file[1]}/{file[0]}", encoding="utf-8") as file1: file1_lines = file1.readlines() - with open(f"{file[2]}/{file[0]}") as file2: + with open(f"{file[2]}/{file[0]}", encoding="utf-8") as file2: file2_lines = file2.readlines() for line in difflib.unified_diff(