Skip to content

Commit 8321c8e

Browse files
committed
Fix: Configure UTF-8 encoding for macro execution to support Unicode output
Fixes #263 Prevents UnicodeEncodeError when macros print Unicode characters on Windows systems with cp1252 locale. Adds test to verify Unicode support.
1 parent e8de1c2 commit 8321c8e

4 files changed

Lines changed: 145 additions & 6 deletions

File tree

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,15 @@
22

33
See DataLab [roadmap page](https://datalab-platform.com/en/contributing/roadmap.html) for future and past milestones.
44

5+
## DataLab Version 1.0.2 (unreleased) ##
6+
7+
### 🛠️ Bug Fixes ###
8+
9+
**Macro execution:**
10+
11+
* Fixed `UnicodeEncodeError` when executing macros that print Unicode characters (e.g., arrows ``) on Windows systems with certain locales, e.g. cp1252 (closes [Issue #263](https://github.com/datalab-platform/datalab/issues/263))
12+
* The macro subprocess now automatically uses UTF-8 encoding for stdout and stderr, eliminating the need to manually add `sys.stdout.reconfigure(encoding='utf-8')` at the beginning of each macro.
13+
514
## DataLab Version 1.0.1 ##
615

716
This major release represents a significant milestone for DataLab with numerous enhancements across all areas. The changes are organized by category for easier navigation.

datalab/gui/macroeditor.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ def __init__(self, console: PythonShellWidget, title: str | None = None) -> None
9393
self.set_code(self.MACRO_SAMPLE)
9494
self.editor.modificationChanged.connect(self.modification_changed)
9595
self.process = None
96+
self.__last_exit_code = None
9697

9798
@property
9899
def title(self) -> str:
@@ -259,7 +260,14 @@ def run(self) -> None:
259260
self.process = QC.QProcess()
260261
code = self.get_code().replace('"', "'")
261262
datalab_path = osp.abspath(osp.join(osp.dirname(datalab.__file__), os.pardir))
262-
code = f"import sys; sys.path.append(r'{datalab_path}'){os.linesep}{code}"
263+
# Reconfigure stdout/stderr to use UTF-8 encoding to avoid UnicodeEncodeError
264+
# on Windows with locales that don't support all Unicode characters
265+
# (e.g., cp1252)
266+
code = (
267+
f"import sys; sys.path.append(r'{datalab_path}'); "
268+
f"sys.stdout.reconfigure(encoding='utf-8'); "
269+
f"sys.stderr.reconfigure(encoding='utf-8'){os.linesep}{code}"
270+
)
263271
env = QC.QProcessEnvironment()
264272
env.insert(execenv.XMLRPCPORT_ENV, str(execenv.xmlrpcport))
265273
sysenv = env.systemEnvironment()
@@ -305,6 +313,15 @@ def finished(self, exit_code: int, exit_status: QC.QProcess.ExitStatus) -> None:
305313
exit_code: Exit code
306314
exit_status: Exit status
307315
"""
316+
self.__last_exit_code = exit_code
308317
self.print(_("# <== '%s' macro has finished") % self.title, eol_before=False)
309318
self.FINISHED.emit()
310319
self.process = None
320+
321+
def get_exit_code(self) -> int | None:
322+
"""Return last exit code of the macro process
323+
324+
Returns:
325+
Last exit code or None if process has not finished yet
326+
"""
327+
return self.__last_exit_code

datalab/tests/features/macro/macroeditor_unit_test.py

Lines changed: 102 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@
1818
# guitest: show
1919

2020
import os.path as osp
21+
import time
2122

2223
from guidata.qthelpers import qt_app_context
24+
from qtpy import QtWidgets as QW
2325

2426
from datalab.env import execenv
2527
from datalab.gui.macroeditor import Macro
2628
from datalab.gui.panel import macro
27-
from datalab.tests import helpers
29+
from datalab.tests import datalab_test_app_context, helpers
2830

2931

3032
def get_macro_example_path() -> str:
@@ -98,5 +100,104 @@ def test_macro_editor():
98100
assert widget.get_macro_titles()[0] == osp.basename(macro_path)
99101

100102

103+
def test_macro_unicode_encoding():
104+
"""Test that macros can print Unicode characters without encoding errors.
105+
106+
This test verifies the fix for the UnicodeEncodeError that occurred on Windows
107+
systems with locales like cp1252 when macros printed Unicode characters.
108+
109+
The test creates and runs a macro that prints various Unicode characters,
110+
simulating the scenario where RemoteProxy connection messages (which contain
111+
arrows →) would cause encoding errors on Windows with cp1252 locale.
112+
113+
Without the UTF-8 encoding fix in Macro.run(), this test would fail with:
114+
UnicodeEncodeError: 'charmap' codec can't encode character '\u2192'
115+
"""
116+
with helpers.WorkdirRestoringTempDir():
117+
with datalab_test_app_context(console=False) as win:
118+
win.set_current_panel("macro")
119+
120+
# Create a macro that prints various Unicode characters
121+
macro = win.macropanel.add_macro()
122+
macro.title = "Unicode Test Macro"
123+
124+
# This test verifies that Unicode characters can be printed successfully.
125+
# The macro prints Unicode characters without any encoding manipulation.
126+
# With the UTF-8 fix in Macro.run(), these print statements work correctly.
127+
# Without the fix, on systems with cp1252 locale, these would fail.
128+
#
129+
# Note: We cannot reliably simulate cp1252 locale in the test because:
130+
# 1. Modern Python often defaults to UTF-8
131+
# 2. If we manually reconfigure to cp1252 in the macro, it overrides
132+
# any fix done before the macro code runs
133+
# 3. The PYTHONIOENCODING env var might be set system-wide
134+
#
135+
# This test serves as a regression test - it will catch if the fix
136+
# is removed, but only on systems that actually default to cp1252.
137+
unicode_code = """
138+
import sys
139+
140+
# Print encoding info for debugging
141+
print(f"stdout encoding: {sys.stdout.encoding}")
142+
print(f"stderr encoding: {sys.stderr.encoding}")
143+
144+
# Print various Unicode characters that are not in cp1252
145+
# On systems with cp1252 default locale, without the UTF-8 fix,
146+
# these would cause UnicodeEncodeError
147+
print("Testing Unicode output:")
148+
print(" → Arrow character (U+2192)")
149+
print(" ✓ Check mark (U+2713)")
150+
print(" • Bullet point (U+2022)")
151+
print(" … Ellipsis (U+2026)")
152+
print(" Emoji: 🎉 🚀 ⚡")
153+
154+
# Simulate RemoteProxy connection message format
155+
print("Setting XML-RPC port... [input:None] →[execenv.xmlrpcport:None] OK")
156+
157+
print("All Unicode tests passed! ✓")
158+
"""
159+
macro.set_code(unicode_code)
160+
161+
# Run the macro and wait for completion
162+
execenv.print("Running Unicode test macro...")
163+
win.macropanel.run_macro()
164+
165+
# Wait for macro to complete (with timeout)
166+
# We need to process Qt events for the QProcess signals to be delivered
167+
max_wait = 10 # seconds
168+
elapsed = 0
169+
while macro.is_running() and elapsed < max_wait:
170+
QW.QApplication.processEvents()
171+
time.sleep(0.1)
172+
elapsed += 0.1
173+
174+
# Verify the macro completed (not still running)
175+
# If there was an encoding error, the process would have crashed
176+
assert not macro.is_running(), (
177+
"Macro did not complete within timeout - "
178+
"likely failed with encoding error"
179+
)
180+
181+
# Check the exit code - should be 0 for success
182+
# With the UTF-8 fix, the macro completes successfully (exit code 0)
183+
# Without the fix, it crashes with UnicodeEncodeError (exit code 1)
184+
exit_code = macro.get_exit_code()
185+
assert exit_code == 0, (
186+
f"Macro exited with error code {exit_code} - "
187+
f"likely UnicodeEncodeError when trying to print Unicode characters"
188+
)
189+
190+
execenv.print("✓ Unicode test macro completed successfully!")
191+
execenv.print(
192+
"Note: This test verifies Unicode support works. On systems with "
193+
"UTF-8 as default encoding, it may pass even without the fix. "
194+
"The fix is critical for Windows systems with cp1252 locale."
195+
)
196+
197+
# Clean up
198+
win.macropanel.remove_all_objects()
199+
200+
101201
if __name__ == "__main__":
102202
test_macro_editor()
203+
test_macro_unicode_encoding()

doc/locale/fr/LC_MESSAGES/changelog.po

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ msgid ""
77
msgstr ""
88
"Project-Id-Version: DataLab \n"
99
"Report-Msgid-Bugs-To: \n"
10-
"POT-Creation-Date: 2025-11-18 16:32+0100\n"
10+
"POT-Creation-Date: 2025-11-25 14:51+0100\n"
1111
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
1212
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
1313
"Language: fr\n"
@@ -24,6 +24,21 @@ msgstr "Versions"
2424
msgid "See DataLab [roadmap page](https://datalab-platform.com/en/contributing/roadmap.html) for future and past milestones."
2525
msgstr "Voir la page de la [feuille de route](https://datalab-platform.com/fr/contributing/roadmap.html) de DataLab pour les jalons futurs et passés."
2626

27+
msgid "DataLab Version 1.0.2 (unreleased)"
28+
msgstr "DataLab Version 1.0.2 (non publiée)"
29+
30+
msgid "🛠️ Bug Fixes"
31+
msgstr "🛠️ Correctifs"
32+
33+
msgid "**Macro execution:**"
34+
msgstr "**Exécution des macros :**"
35+
36+
msgid "Fixed `UnicodeEncodeError` when executing macros that print Unicode characters (e.g., arrows `→`) on Windows systems with certain locales, e.g. cp1252 (closes [Issue #263](https://github.com/datalab-platform/datalab/issues/263))"
37+
msgstr "Correction de l'erreur `UnicodeEncodeError` lors de l'exécution de macros qui impriment des caractères Unicode (par exemple, des flèches `→`) sur les systèmes Windows avec certaines locales, par exemple cp1252 (ceci clôture [Issue #263](https://github.com/datalab-platform/datalab/issues/263))"
38+
39+
msgid "The macro subprocess now automatically uses UTF-8 encoding for stdout and stderr, eliminating the need to manually add `sys.stdout.reconfigure(encoding='utf-8')` at the beginning of each macro."
40+
msgstr "Le sous-processus de la macro utilise désormais automatiquement l'encodage UTF-8 pour stdout et stderr, éliminant ainsi la nécessité d'ajouter manuellement `sys.stdout.reconfigure(encoding='utf-8')` au début de chaque macro."
41+
2742
msgid "DataLab Version 1.0.1"
2843
msgstr "DataLab Version 1.0.1"
2944

@@ -622,9 +637,6 @@ msgstr "`compute_1_to_1`, `compute_1_to_0`, `compute_1_to_n`, `compute_n_to_1`,
622637
msgid "**No backward compatibility maintained** for these internal changes (closes [Issue #180](https://github.com/DataLab-Platform/DataLab/issues/180))"
623638
msgstr "**Aucune compatibilité ascendante n'est maintenue** pour ces modifications internes (ceci clôture [Issue #180](https://github.com/DataLab-Platform/DataLab/issues/180))"
624639

625-
msgid "🛠️ Bug Fixes"
626-
msgstr "🛠️ Corrections de bugs :"
627-
628640
msgid "**Performance fixes:**"
629641
msgstr "**Corrections de performances :**"
630642

0 commit comments

Comments
 (0)