From ae6342ea470b7a38c9897ac4e7ee4d66ede2bc09 Mon Sep 17 00:00:00 2001 From: Bjoern Holtvogt Date: Tue, 29 Apr 2025 19:57:43 +0200 Subject: [PATCH 1/4] Add venv to ignore list --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index edd91535..d1b362a8 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ poetry.lock .idea/ .vscode/ .tox/ +.venv/ From 74d8a42e25edb069e186b47baaaa06379b58984f Mon Sep 17 00:00:00 2001 From: Bjoern Holtvogt Date: Tue, 29 Apr 2025 19:58:10 +0200 Subject: [PATCH 2/4] Add default encoding constant --- src/docformatter/encode.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/docformatter/encode.py b/src/docformatter/encode.py index 2e6a0740..2889eaa5 100644 --- a/src/docformatter/encode.py +++ b/src/docformatter/encode.py @@ -42,9 +42,12 @@ class Encoder: LF = "\n" CRLF = "\r\n" + # Default encoding to use if the file encoding cannot be detected + DEFAULT_ENCODING = "latin-1" + def __init__(self): """Initialize an Encoder instance.""" - self.encoding = "latin-1" + self.encoding = self.DEFAULT_ENCODING self.system_encoding = locale.getpreferredencoding() or sys.getdefaultencoding() def do_detect_encoding(self, filename) -> None: @@ -62,7 +65,7 @@ def do_detect_encoding(self, filename) -> None: with self.do_open_with_encoding(filename) as check_file: check_file.read() except (SyntaxError, LookupError, UnicodeDecodeError): - self.encoding = "latin-1" + self.encoding = self.DEFAULT_ENCODING def do_find_newline(self, source: List[str]) -> str: """Return type of newline used in source. From 4465d1a73f0abb74e34e7f971f798f74347e7d2d Mon Sep 17 00:00:00 2001 From: Bjoern Holtvogt Date: Tue, 29 Apr 2025 21:34:17 +0200 Subject: [PATCH 3/4] Fallback to default encoding when detection fails Now, uses the default encoding (latin-1) when from_path(filename).best() returns None. --- src/docformatter/encode.py | 3 ++- tests/test_encoding_functions.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/docformatter/encode.py b/src/docformatter/encode.py index 2889eaa5..d336258b 100644 --- a/src/docformatter/encode.py +++ b/src/docformatter/encode.py @@ -59,7 +59,8 @@ def do_detect_encoding(self, filename) -> None: The full path name of the file whose encoding is to be detected. """ try: - self.encoding = from_path(filename).best().encoding + detection_result = from_path(filename).best() + self.encoding = detection_result.encoding if detection_result else self.DEFAULT_ENCODING # Check for correctness of encoding. with self.do_open_with_encoding(filename) as check_file: diff --git a/tests/test_encoding_functions.py b/tests/test_encoding_functions.py index 082da046..d1748ed5 100644 --- a/tests/test_encoding_functions.py +++ b/tests/test_encoding_functions.py @@ -83,6 +83,21 @@ def test_detect_encoding_with_bad_encoding(self, temporary_file, contents): assert "ascii" == uut.encoding + @pytest.mark.unit + @pytest.mark.parametrize("contents", [""]) + def test_detect_encoding_with_undetectable_encoding(self, temporary_file): + """Default to latin-1 when encoding detection fails.""" + uut = Encoder() + + # Simulate a file with undetectable encoding + with open(temporary_file, "wb") as file: + # Binary content unlikely to have a detectable encoding + file.write(b"\xFF\xFE\xFD\xFC\x00\x00\x00\x00") + + uut.do_detect_encoding(temporary_file) + + assert uut.encoding == uut.DEFAULT_ENCODING + class TestFindNewline: """Class for testing the find_newline() function.""" From 52cda9cf30b123cfd647a800be3af35f74e05a51 Mon Sep 17 00:00:00 2001 From: Doyle Rowland Date: Sun, 11 May 2025 00:31:05 -0400 Subject: [PATCH 4/4] Update encode.py --- src/docformatter/encode.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/docformatter/encode.py b/src/docformatter/encode.py index d336258b..30e10420 100644 --- a/src/docformatter/encode.py +++ b/src/docformatter/encode.py @@ -60,7 +60,9 @@ def do_detect_encoding(self, filename) -> None: """ try: detection_result = from_path(filename).best() - self.encoding = detection_result.encoding if detection_result else self.DEFAULT_ENCODING + self.encoding = ( + detection_result.encoding if detection_result else self.DEFAULT_ENCODING + ) # Check for correctness of encoding. with self.do_open_with_encoding(filename) as check_file: