From 7bcc2718951405d60ca54aa019665b0f8d547658 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Wed, 24 Jun 2026 19:26:37 -0400 Subject: [PATCH 1/4] Drop portage._unicode_encode/_encodings usage Python 3's open() accepts str paths directly and always uses UTF-8 as the filesystem and content encoding on modern systems. The _unicode_encode(path, encoding=_encodings["fs"]) pattern was Python 2 compatibility code that converted str paths to bytes before passing them to open(); it is unnecessary in Python 3. Replace open(_unicode_encode(path, encoding=_encodings["fs"]), encoding=_encodings["content"]) with open(path, encoding="utf-8") throughout, and drop the portage imports. Signed-off-by: Matt Turner --- pym/gentoolkit/eclean/exclude.py | 5 ++--- pym/gentoolkit/enalyze/rebuild.py | 5 ++--- pym/gentoolkit/equery/uses.py | 9 ++++----- pym/gentoolkit/equery/which.py | 6 ++---- pym/gentoolkit/helpers.py | 3 +-- pym/gentoolkit/package.py | 3 +-- pym/gentoolkit/profile.py | 14 ++++++-------- pym/gentoolkit/revdep_rebuild/analyse.py | 5 ++--- pym/gentoolkit/revdep_rebuild/cache.py | 23 ++++++++--------------- pym/gentoolkit/revdep_rebuild/collect.py | 12 ++++-------- pym/gentoolkit/revdep_rebuild/settings.py | 7 ++----- pym/gentoolkit/test/eclean/creator.py | 5 ++--- 12 files changed, 36 insertions(+), 61 deletions(-) diff --git a/pym/gentoolkit/eclean/exclude.py b/pym/gentoolkit/eclean/exclude.py index e569e560..9a121d13 100644 --- a/pym/gentoolkit/eclean/exclude.py +++ b/pym/gentoolkit/eclean/exclude.py @@ -7,7 +7,6 @@ import os import re import portage -from portage import _encodings, _unicode_encode # Misc. shortcuts to some portage stuff: listdir = portage.listdir @@ -78,8 +77,8 @@ def parseExcludeFile(filepath, output): output("Parsing Exclude file: " + filepath) try: file_ = open( - _unicode_encode(filepath, encoding=_encodings["fs"]), - encoding=_encodings["content"], + filepath, + encoding="utf-8", ) except OSError: raise ParseExcludeFileException("Could not open exclusion file: " + filepath) diff --git a/pym/gentoolkit/enalyze/rebuild.py b/pym/gentoolkit/enalyze/rebuild.py index 5863276b..b20870f2 100644 --- a/pym/gentoolkit/enalyze/rebuild.py +++ b/pym/gentoolkit/enalyze/rebuild.py @@ -26,7 +26,6 @@ import portage -from portage import _encodings, _unicode_encode def cpv_all_diff_use( @@ -391,9 +390,9 @@ def save_file(self, filepath, data): if not self.options["quiet"]: print(" - Saving file: %s" % filepath) with open( - _unicode_encode(filepath, encoding=_encodings["fs"]), + filepath, mode="w", - encoding=_encodings["content"], + encoding="utf-8", ) as output: output.write("\n".join(data)) output.write("\n") diff --git a/pym/gentoolkit/equery/uses.py b/pym/gentoolkit/equery/uses.py index bcc3d092..a7646721 100644 --- a/pym/gentoolkit/equery/uses.py +++ b/pym/gentoolkit/equery/uses.py @@ -18,7 +18,6 @@ from glob import glob from portage import settings -from portage import _encodings, _unicode_encode import gentoolkit.pprinter as pp from gentoolkit import errors @@ -151,8 +150,8 @@ def get_global_useflags(): try: path = os.path.join(settings["PORTDIR"], "profiles", "use.desc") with open( - _unicode_encode(path, encoding=_encodings["fs"]), - encoding=_encodings["content"], + path, + encoding="utf-8", ) as open_file: for line in open_file: if line.startswith("#"): @@ -171,8 +170,8 @@ def get_global_useflags(): for path in glob(os.path.join(settings["PORTDIR"], "profiles", "desc", "*.desc")): try: with open( - _unicode_encode(path, encoding=_encodings["fs"]), - encoding=_encodings["content"], + path, + encoding="utf-8", ) as open_file: for line in open_file: if line.startswith("#"): diff --git a/pym/gentoolkit/equery/which.py b/pym/gentoolkit/equery/which.py index 0b726dd8..d3269673 100644 --- a/pym/gentoolkit/equery/which.py +++ b/pym/gentoolkit/equery/which.py @@ -22,8 +22,6 @@ from gentoolkit.equery import format_options, mod_usage from gentoolkit.query import Query -from portage import _encodings, _unicode_encode - # ======= # Globals # ======= @@ -62,8 +60,8 @@ def print_help(with_description=True): def print_ebuild(ebuild_path): """Output the ebuild to std_out""" with open( - _unicode_encode(ebuild_path, encoding=_encodings["fs"]), - encoding=_encodings["content"], + ebuild_path, + encoding="utf-8", ) as f: lines = f.readlines() print("\n\n") diff --git a/pym/gentoolkit/helpers.py b/pym/gentoolkit/helpers.py index 34d2a0e1..e2748dc7 100644 --- a/pym/gentoolkit/helpers.py +++ b/pym/gentoolkit/helpers.py @@ -28,7 +28,6 @@ from itertools import chain import portage -from portage import _encodings, _unicode_encode from gentoolkit import pprinter as pp from gentoolkit import errors @@ -300,7 +299,7 @@ def get_bintree_cpvs(predicate=None): def print_file(path): """Display the contents of a file.""" - with open(_unicode_encode(path, encoding=_encodings["fs"]), mode="rb") as open_file: + with open(path, mode="rb") as open_file: lines = open_file.read() pp.uprint(lines.strip()) diff --git a/pym/gentoolkit/package.py b/pym/gentoolkit/package.py index 6a46e71c..210c879a 100644 --- a/pym/gentoolkit/package.py +++ b/pym/gentoolkit/package.py @@ -51,7 +51,6 @@ import portage from portage.util import LazyItemsDict -from portage import _encodings, _unicode_encode import gentoolkit.pprinter as pp from gentoolkit import errors @@ -397,7 +396,7 @@ def size(self): size = n_files = n_uncounted = 0 for path in self.parsed_contents(prefix_root=True): try: - st = os.lstat(_unicode_encode(path, encoding=_encodings["fs"])) + st = os.lstat(path) except OSError: continue diff --git a/pym/gentoolkit/profile.py b/pym/gentoolkit/profile.py index 11a65608..8ce691f5 100644 --- a/pym/gentoolkit/profile.py +++ b/pym/gentoolkit/profile.py @@ -13,8 +13,6 @@ import portage import sys -from portage import _encodings, _unicode_encode - def warning(msg): """Write |msg| as a warning to stderr""" @@ -47,8 +45,8 @@ def load_profile_data(portdir=None, repo=""): try: arch_list = os.path.join(portdir, "profiles", "arch.list") with open( - _unicode_encode(arch_list, encoding=_encodings["fs"]), - encoding=_encodings["content"], + arch_list, + encoding="utf-8", ) as f: for line in f: line = line.split("#", 1)[0].strip() @@ -66,8 +64,8 @@ def load_profile_data(portdir=None, repo=""): } profiles_list = os.path.join(portdir, "profiles", "profiles.desc") with open( - _unicode_encode(profiles_list, encoding=_encodings["fs"]), - encoding=_encodings["content"], + profiles_list, + encoding="utf-8", ) as f: for line in f: line = line.split("#", 1)[0].split() @@ -91,8 +89,8 @@ def load_profile_data(portdir=None, repo=""): try: arches_list = os.path.join(portdir, "profiles", "arches.desc") with open( - _unicode_encode(arches_list, encoding=_encodings["fs"]), - encoding=_encodings["content"], + arches_list, + encoding="utf-8", ) as f: for line in f: line = line.split("#", 1)[0].split() diff --git a/pym/gentoolkit/revdep_rebuild/analyse.py b/pym/gentoolkit/revdep_rebuild/analyse.py index 3f3bdd6d..0fd1a852 100644 --- a/pym/gentoolkit/revdep_rebuild/analyse.py +++ b/pym/gentoolkit/revdep_rebuild/analyse.py @@ -6,7 +6,6 @@ import re import time -from portage import _encodings, _unicode_encode from portage.output import bold, blue, yellow, green from .stuff import scan @@ -91,8 +90,8 @@ def extract_dependencies_from_la(la, libraries, to_check, logger): continue for line in open( - _unicode_encode(_file, encoding=_encodings["fs"]), - encoding=_encodings["content"], + _file, + encoding="utf-8", ).readlines(): line = line.strip() if line.startswith("dependency_libs="): diff --git a/pym/gentoolkit/revdep_rebuild/cache.py b/pym/gentoolkit/revdep_rebuild/cache.py index 36d86842..de7af94f 100644 --- a/pym/gentoolkit/revdep_rebuild/cache.py +++ b/pym/gentoolkit/revdep_rebuild/cache.py @@ -5,7 +5,6 @@ from portage import os import time -from portage import _encodings, _unicode_encode from portage.output import red from .settings import DEFAULTS @@ -28,10 +27,8 @@ def read_cache(temp_path=DEFAULTS["DEFAULT_TMP_DIR"]): try: for key, val in ret.items(): _file = open( - _unicode_encode( - os.path.join(temp_path, key), encoding=_encodings["fs"] - ), - encoding=_encodings["content"], + os.path.join(temp_path, key), + encoding="utf-8", ) for line in _file.readlines(): val.add(line.strip()) @@ -60,22 +57,18 @@ def save_cache(logger, to_save={}, temp_path=DEFAULTS["DEFAULT_TMP_DIR"]): try: _file = open( - _unicode_encode( - os.path.join(temp_path, "timestamp"), encoding=_encodings["fs"] - ), + os.path.join(temp_path, "timestamp"), mode="w", - encoding=_encodings["content"], + encoding="utf-8", ) _file.write(str(int(time.time()))) _file.close() for key, val in to_save.items(): _file = open( - _unicode_encode( - os.path.join(temp_path, key), encoding=_encodings["fs"] - ), + os.path.join(temp_path, key), mode="w", - encoding=_encodings["content"], + encoding="utf-8", ) for line in val: _file.write(line + "\n") @@ -105,8 +98,8 @@ def check_temp_files( try: _file = open( - _unicode_encode(timestamp_path, encoding=_encodings["fs"]), - encoding=_encodings["content"], + timestamp_path, + encoding="utf-8", ) timestamp = int(_file.readline()) _file.close() diff --git a/pym/gentoolkit/revdep_rebuild/collect.py b/pym/gentoolkit/revdep_rebuild/collect.py index a36b7019..a2dab410 100644 --- a/pym/gentoolkit/revdep_rebuild/collect.py +++ b/pym/gentoolkit/revdep_rebuild/collect.py @@ -8,7 +8,6 @@ import stat import portage -from portage import _encodings, _unicode_encode from portage.output import blue, yellow from .settings import parse_revdep_config @@ -27,8 +26,8 @@ def parse_conf(conf_file, visited=None, logger=None): for conf in conf_file: try: with open( - _unicode_encode(conf, encoding=_encodings["fs"]), - encoding=_encodings["content"], + conf, + encoding="utf-8", ) as _file: for line in _file.readlines(): line = line.strip() @@ -75,11 +74,8 @@ def prepare_search_dirs(logger, settings): # try: with open( - _unicode_encode( - os.path.join(portage.root, settings["DEFAULT_ENV_FILE"]), - encoding=_encodings["fs"], - ), - encoding=_encodings["content"], + os.path.join(portage.root, settings["DEFAULT_ENV_FILE"]), + encoding="utf-8", ) as _file: for line in _file.readlines(): line = line.strip() diff --git a/pym/gentoolkit/revdep_rebuild/settings.py b/pym/gentoolkit/revdep_rebuild/settings.py index 2a880fd2..491ec804 100644 --- a/pym/gentoolkit/revdep_rebuild/settings.py +++ b/pym/gentoolkit/revdep_rebuild/settings.py @@ -9,7 +9,6 @@ import glob import portage -from portage import _encodings, _unicode_encode portage_root = str(portage.root) @@ -158,10 +157,8 @@ def parse_revdep_config(revdep_confdir): for _file in os.listdir(revdep_confdir): for line in open( - _unicode_encode( - os.path.join(revdep_confdir, _file), encoding=_encodings["fs"] - ), - encoding=_encodings["content"], + os.path.join(revdep_confdir, _file), + encoding="utf-8", ): line = line.strip() # first check for comment, we do not want to regex all lines diff --git a/pym/gentoolkit/test/eclean/creator.py b/pym/gentoolkit/test/eclean/creator.py index 795d2ba0..62e603c3 100644 --- a/pym/gentoolkit/test/eclean/creator.py +++ b/pym/gentoolkit/test/eclean/creator.py @@ -10,7 +10,6 @@ import random import gentoolkit.pprinter as pp -from portage import _encodings, _unicode_encode __version__ = "0.0.1" __author__ = "Brian Dolbec" @@ -51,10 +50,10 @@ def make_dist(path, files, clean_dict=None): data = "0" * size filepath = os.path.join(path, file_) with open( - _unicode_encode(filepath, encoding=_encodings["fs"]), + filepath, "w", file_mode, - encoding=_encodings["content"], + encoding="utf-8", ) as new_file: new_file.write(data) if file_ not in clean_dict: From 9c1126f7472ddc31baf3c0cae06cf9f644e68450 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Wed, 24 Jun 2026 20:33:42 -0400 Subject: [PATCH 2/4] revdep_rebuild: replace portage.os with stdlib os portage.os is a re-export of the stdlib os module, kept for Python 2 compatibility. Import os directly. Signed-off-by: Matt Turner --- pym/gentoolkit/revdep_rebuild/cache.py | 2 +- pym/gentoolkit/revdep_rebuild/collect.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pym/gentoolkit/revdep_rebuild/cache.py b/pym/gentoolkit/revdep_rebuild/cache.py index de7af94f..499193e0 100644 --- a/pym/gentoolkit/revdep_rebuild/cache.py +++ b/pym/gentoolkit/revdep_rebuild/cache.py @@ -2,7 +2,7 @@ Functions for reading, saving and verifying the data caches """ -from portage import os +import os import time from portage.output import red diff --git a/pym/gentoolkit/revdep_rebuild/collect.py b/pym/gentoolkit/revdep_rebuild/collect.py index a2dab410..8e3d1ed2 100644 --- a/pym/gentoolkit/revdep_rebuild/collect.py +++ b/pym/gentoolkit/revdep_rebuild/collect.py @@ -3,7 +3,7 @@ """Data collection module""" import re -from portage import os +import os import glob import stat From 1265d08395863075b99fa99b28fc152944c69741 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Wed, 24 Jun 2026 20:33:55 -0400 Subject: [PATCH 3/4] pprinter: drop Python 2 unicode shim The try/except block that defined unicode = str when unicode was not a builtin was Python 2 compatibility code. In Python 3, unicode does not exist; str is always correct. Drop the shim and use str directly. Signed-off-by: Matt Turner --- pym/gentoolkit/pprinter.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pym/gentoolkit/pprinter.py b/pym/gentoolkit/pprinter.py index fac1d167..35feda7d 100644 --- a/pym/gentoolkit/pprinter.py +++ b/pym/gentoolkit/pprinter.py @@ -162,12 +162,6 @@ def warn(string): return "!!! " + string + "\n" -try: - unicode -except NameError: - unicode = str - - def uprint(*args, **kw): """Replacement for the builtin print function. @@ -199,7 +193,7 @@ def encoded_args(): if isinstance(arg, bytes): yield arg else: - yield unicode(arg).encode(encoding, "replace") + yield str(arg).encode(encoding, "replace") sep = sep.encode(encoding, "replace") end = end.encode(encoding, "replace") From b1465203dd137a72b901594dbdd988112c93f512 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Wed, 24 Jun 2026 21:07:36 -0400 Subject: [PATCH 4/4] Add encoding="utf-8" to text-mode open() calls Without an explicit encoding, open() uses the locale encoding, which may not be UTF-8 on all systems. All files read or written here are UTF-8 (ebuilds, Gentoo profile files). Signed-off-by: Matt Turner --- pym/gentoolkit/eshowkw/__init__.py | 4 +++- pym/gentoolkit/imlate/imlate.py | 2 +- pym/gentoolkit/merge_driver_ekeyword/merge_driver_ekeyword.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pym/gentoolkit/eshowkw/__init__.py b/pym/gentoolkit/eshowkw/__init__.py index 0d65733d..d736fe0d 100644 --- a/pym/gentoolkit/eshowkw/__init__.py +++ b/pym/gentoolkit/eshowkw/__init__.py @@ -191,7 +191,9 @@ def main(argv, indirect=False): for repo in ports.repositories: repos[repo.name] = repo.location - with open(os.path.join(ourtree, "profiles", "repo_name")) as f: + with open( + os.path.join(ourtree, "profiles", "repo_name"), encoding="utf-8" + ) as f: repo_name = f.readline().strip() repos[repo_name] = ourtree diff --git a/pym/gentoolkit/imlate/imlate.py b/pym/gentoolkit/imlate/imlate.py index 63565017..f6ce3764 100755 --- a/pym/gentoolkit/imlate/imlate.py +++ b/pym/gentoolkit/imlate/imlate.py @@ -97,7 +97,7 @@ def show_result(conf, pkgs): elif conf["FILE"] == "stderr": out = stderr else: - out = open(conf["FILE"], "w") + out = open(conf["FILE"], "w", encoding="utf-8") if conf["STABLE"] and conf["KEYWORD"]: _cand = "%i Stable and %i Keyword(~)" % ( diff --git a/pym/gentoolkit/merge_driver_ekeyword/merge_driver_ekeyword.py b/pym/gentoolkit/merge_driver_ekeyword/merge_driver_ekeyword.py index b575aabd..3ca11837 100755 --- a/pym/gentoolkit/merge_driver_ekeyword/merge_driver_ekeyword.py +++ b/pym/gentoolkit/merge_driver_ekeyword/merge_driver_ekeyword.py @@ -58,7 +58,7 @@ def keyword_line_changes(old: str, new: str) -> KeywordChanges: def keyword_changes(ebuild1: str, ebuild2: str) -> Optional[KeywordChanges]: - with open(ebuild1) as e1, open(ebuild2) as e2: + with open(ebuild1, encoding="utf-8") as e1, open(ebuild2, encoding="utf-8") as e2: lines1 = e1.readlines() lines2 = e2.readlines()