diff --git a/.gitignore b/.gitignore index 6978c65..878a56b 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ htmlcov/ .idea/ .vscode/ .tox/ +.venv/ diff --git a/src/docformatter/encode.py b/src/docformatter/encode.py index 2e6a074..30e1042 100644 --- a/src/docformatter/encode.py +++ b/src/docformatter/encode.py @@ -42,9 +42,12 @@ class Encoder: LF = "\n" CRLF = "\r\n" + # Default encoding to use if the file encoding cannot be detected + DEFAULT_ENCODING = "latin-1" + def __init__(self): """Initialize an Encoder instance.""" - self.encoding = "latin-1" + self.encoding = self.DEFAULT_ENCODING self.system_encoding = locale.getpreferredencoding() or sys.getdefaultencoding() def do_detect_encoding(self, filename) -> None: @@ -56,13 +59,16 @@ def do_detect_encoding(self, filename) -> None: The full path name of the file whose encoding is to be detected. """ try: - self.encoding = from_path(filename).best().encoding + detection_result = from_path(filename).best() + self.encoding = ( + detection_result.encoding if detection_result else self.DEFAULT_ENCODING + ) # Check for correctness of encoding. with self.do_open_with_encoding(filename) as check_file: check_file.read() except (SyntaxError, LookupError, UnicodeDecodeError): - self.encoding = "latin-1" + self.encoding = self.DEFAULT_ENCODING def do_find_newline(self, source: List[str]) -> str: """Return type of newline used in source. diff --git a/tests/test_encoding_functions.py b/tests/test_encoding_functions.py index 082da04..d1748ed 100644 --- a/tests/test_encoding_functions.py +++ b/tests/test_encoding_functions.py @@ -83,6 +83,21 @@ def test_detect_encoding_with_bad_encoding(self, temporary_file, contents): assert "ascii" == uut.encoding + @pytest.mark.unit + @pytest.mark.parametrize("contents", [""]) + def test_detect_encoding_with_undetectable_encoding(self, temporary_file): + """Default to latin-1 when encoding detection fails.""" + uut = Encoder() + + # Simulate a file with undetectable encoding + with open(temporary_file, "wb") as file: + # Binary content unlikely to have a detectable encoding + file.write(b"\xFF\xFE\xFD\xFC\x00\x00\x00\x00") + + uut.do_detect_encoding(temporary_file) + + assert uut.encoding == uut.DEFAULT_ENCODING + class TestFindNewline: """Class for testing the find_newline() function."""