From b5d590453ec745d91e3ced33fbddda937606a0be Mon Sep 17 00:00:00 2001 From: adv0r <> Date: Wed, 20 May 2026 12:21:22 +0200 Subject: [PATCH] autogen-studio: pin utf-8 encoding on production text-file open() calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refs #5566. Continuation of the same encoding sweep started in #6094 (which fixed the original `playwright_controller.py` site) and continued in the `magentic-one-cli` PR. The reporter of #5566 explicitly flagged that *"there will be some similar issues in the codebase while using open function"* — this PR closes the autogen-studio production code paths that read or write text files without specifying an encoding. On a non-UTF-8 default locale (e.g. cp950 on Traditional Chinese Windows, cp1252 on Western European Windows), Python's `open(..., "r")` falls back to the platform encoding and crashes with `UnicodeDecodeError` on any non-ASCII byte. For autogen-studio that manifests every time: - `schema_manager.py` reads or writes Alembic templates (`env.py`, `script.py.mako`, `alembic.ini`) that may contain non-ASCII paths or comments - `cli.py` / `lite/studio.py` write the runtime `.env` file (project paths can contain user/folder names with accented characters) - `web/auth/manager.py` loads a user-supplied YAML config - `gallery/builder.py` writes `gallery_default.json` Files touched (11 lines, 5 files): | File | open() sites fixed | |------|--------------------| | autogenstudio/cli.py | 1 | | autogenstudio/lite/studio.py | 1 | | autogenstudio/database/schema_manager.py | 6 | | autogenstudio/web/auth/manager.py | 1 | | autogenstudio/gallery/builder.py | 1 | For every site the change is the same shape: ```python - with open(path, "r") as f: + with open(path, "r", encoding="utf-8") as f: ``` Scope deliberately narrowed: - **Production-code only** — no test fixtures. - **Skipped `aiofiles.open` in `teammanager.py`** — the API is slightly different and that one deserves its own audited PR. - **Did NOT sweep `agbench/benchmarks/*`** — those are user-facing scenario scripts that read JSONL produced by other agents; forcing UTF-8 there could mask issues upstream. No behaviour change for already-UTF-8-locale users (UTF-8 IS what Python opens these as on macOS/Linux today). All five files re-parsed cleanly via `ast.parse(...)` after the rewrite. AI-assisted via Cursor (Claude Opus 4.7). Personal token-burn initiative by @adv0r to use up an expiring Cursor subscription budget on small, useful upstream contributions. Co-authored-by: Cursor --- .../packages/autogen-studio/autogenstudio/cli.py | 2 +- .../autogenstudio/database/schema_manager.py | 14 +++++++------- .../autogenstudio/gallery/builder.py | 2 +- .../autogen-studio/autogenstudio/lite/studio.py | 2 +- .../autogenstudio/web/auth/manager.py | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/python/packages/autogen-studio/autogenstudio/cli.py b/python/packages/autogen-studio/autogenstudio/cli.py index 373f13e087f2..aa92013c98df 100644 --- a/python/packages/autogen-studio/autogenstudio/cli.py +++ b/python/packages/autogen-studio/autogenstudio/cli.py @@ -69,7 +69,7 @@ def ui( # Create temporary env file to share configuration with uvicorn workers env_file_path = get_env_file_path() - with open(env_file_path, "w") as temp_env: + with open(env_file_path, "w", encoding="utf-8") as temp_env: for key, value in env_vars.items(): temp_env.write(f"{key}={value}\n") diff --git a/python/packages/autogen-studio/autogenstudio/database/schema_manager.py b/python/packages/autogen-studio/autogenstudio/database/schema_manager.py index 0762b0890d30..6b942354c4ce 100644 --- a/python/packages/autogen-studio/autogenstudio/database/schema_manager.py +++ b/python/packages/autogen-studio/autogenstudio/database/schema_manager.py @@ -75,7 +75,7 @@ def _update_configuration(self) -> None: # Update alembic.ini config_content = self._generate_alembic_ini_content() - with open(self.alembic_ini_path, "w") as f: + with open(self.alembic_ini_path, "w", encoding="utf-8") as f: f.write(config_content) # Update env.py @@ -115,7 +115,7 @@ def _initialize_alembic(self) -> bool: # Create initial config file for alembic init config_content = self._generate_alembic_ini_content() - with open(self.alembic_ini_path, "w") as f: + with open(self.alembic_ini_path, "w", encoding="utf-8") as f: f.write(config_content) # Use the config we just created @@ -187,7 +187,7 @@ def run_migrations_online() -> None: else: run_migrations_online()""" - with open(env_path, "w") as f: + with open(env_path, "w", encoding="utf-8") as f: f.write(content) def _generate_alembic_ini_content(self) -> str: @@ -239,7 +239,7 @@ def update_script_template(self): """Update the Alembic script template to include SQLModel.""" template_path = self.alembic_dir / "script.py.mako" try: - with open(template_path, "r") as f: + with open(template_path, "r", encoding="utf-8") as f: content = f.read() # Add sqlmodel import to imports section @@ -248,7 +248,7 @@ def update_script_template(self): content = content.replace(import_section, new_imports) - with open(template_path, "w") as f: + with open(template_path, "w", encoding="utf-8") as f: f.write(content) return True @@ -265,7 +265,7 @@ def _update_env_py(self, env_path: Path) -> None: self._create_minimal_env_py(env_path) return try: - with open(env_path, "r") as f: + with open(env_path, "r", encoding="utf-8") as f: content = f.read() # Add SQLModel import if not present @@ -303,7 +303,7 @@ def _update_env_py(self, env_path: Path) -> None: )""", ) - with open(env_path, "w") as f: + with open(env_path, "w", encoding="utf-8") as f: f.write(content) except Exception as e: logger.error(f"Failed to update env.py: {e}") diff --git a/python/packages/autogen-studio/autogenstudio/gallery/builder.py b/python/packages/autogen-studio/autogenstudio/gallery/builder.py index 55a124367dd4..b83456ec0ee6 100644 --- a/python/packages/autogen-studio/autogenstudio/gallery/builder.py +++ b/python/packages/autogen-studio/autogenstudio/gallery/builder.py @@ -630,5 +630,5 @@ def create_default_lite_team(): gallery = create_default_gallery() # Save to file - with open("gallery_default.json", "w") as f: + with open("gallery_default.json", "w", encoding="utf-8") as f: f.write(gallery.model_dump_json(indent=2)) diff --git a/python/packages/autogen-studio/autogenstudio/lite/studio.py b/python/packages/autogen-studio/autogenstudio/lite/studio.py index 94b25cd85b6e..6ff0b4011f82 100644 --- a/python/packages/autogen-studio/autogenstudio/lite/studio.py +++ b/python/packages/autogen-studio/autogenstudio/lite/studio.py @@ -151,7 +151,7 @@ def _setup_environment(self) -> str: } env_file_path = self._get_env_file_path() - with open(env_file_path, "w") as temp_env: + with open(env_file_path, "w", encoding="utf-8") as temp_env: for key, value in env_vars.items(): temp_env.write(f"{key}={value}\n") diff --git a/python/packages/autogen-studio/autogenstudio/web/auth/manager.py b/python/packages/autogen-studio/autogenstudio/web/auth/manager.py index ab16e0432d0a..de391e093261 100644 --- a/python/packages/autogen-studio/autogenstudio/web/auth/manager.py +++ b/python/packages/autogen-studio/autogenstudio/web/auth/manager.py @@ -117,7 +117,7 @@ def is_valid_token(self, token: str) -> bool: def from_yaml(cls, yaml_path: str) -> Self: """Create AuthManager from YAML config file.""" try: - with open(yaml_path, "r") as f: + with open(yaml_path, "r", encoding="utf-8") as f: config_data = yaml.safe_load(f) config = AuthConfig(**config_data) return cls(config)