From 6e4b10d4b8098c8ba063c3e0fc03323d860b7836 Mon Sep 17 00:00:00 2001 From: Doyle Rowland Date: Thu, 24 Jul 2025 21:10:36 -0400 Subject: [PATCH 01/11] fix: adding additional blank line before inner function --- src/docformatter/classify.py | 16 +++++---------- src/docformatter/format.py | 21 ++++++++++++-------- tests/_data/string_files/do_format_code.toml | 5 +++-- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/docformatter/classify.py b/src/docformatter/classify.py index f946660..8597e29 100644 --- a/src/docformatter/classify.py +++ b/src/docformatter/classify.py @@ -28,6 +28,7 @@ # Standard Library Imports +import re import sys import tokenize from tokenize import TokenInfo @@ -289,9 +290,9 @@ def is_definition_line(token: tokenize.TokenInfo) -> bool: True if the token is a definition line, False otherwise. """ if token.type == tokenize.NAME and ( - token.line.strip().startswith("def ") - or token.line.strip().startswith("async ") - or token.line.strip().startswith("class ") + token.line.startswith("def ") + or token.line.startswith("async ") + or token.line.startswith("class ") ): return True @@ -407,14 +408,7 @@ def is_nested_definition_line(token: tokenize.TokenInfo) -> bool: bool True if the token is a nested definition line, False otherwise. """ - if token.type == tokenize.NAME and ( - token.line.startswith(" def ") - or token.line.startswith(" async ") - or token.line.startswith(" class ") - ): - return True - - return False + return re.match(r"^ {4,}(async|class|def) ", token.line) is not None def is_newline_continuation( diff --git a/src/docformatter/format.py b/src/docformatter/format.py index 78c4bc0..d9a7820 100644 --- a/src/docformatter/format.py +++ b/src/docformatter/format.py @@ -243,12 +243,13 @@ def _get_class_docstring_newlines( def _get_function_docstring_newlines( # noqa: PLR0911 tokens: list[tokenize.TokenInfo], index: int, + black: bool = False, ) -> int: """Return number of newlines after a function or method docstring. - docformatter_9.5: No blank lines after a function or method docstring. + PEP_257_9.5: No blank lines after a function or method docstring. docformatter_9.6: One blank line after a function or method docstring if there is - an inner function definition. + an inner function definition when in black mode. docformatter_9.7: Two blank lines after a function docstring if the stub function has no code. docformatter_9.8: One blank line after a method docstring if the stub method has @@ -282,7 +283,7 @@ def _get_function_docstring_newlines( # noqa: PLR0911 continue # The docstring is followed by an attribute assignment. - if tokens[j].type == tokenize.OP: + if tokens[j].type == tokenize.OP and tokens[j].string == "=": return 0 # There is a line of code following the docstring. @@ -293,7 +294,7 @@ def _get_function_docstring_newlines( # noqa: PLR0911 return 0 # There is a method definition or nested function or class definition following - # the docstring. + # the docstring and docformatter is running in black mode. if _classify.is_nested_definition_line(tokens[j]): return 1 @@ -343,6 +344,8 @@ def _get_newlines_by_type( A list of tokens from the source code. index : int The index of the docstring token in the list of tokens. + black : bool + Whether docformatter is running in black mode. Returns ------- @@ -350,12 +353,16 @@ def _get_newlines_by_type( The number of newlines to insert after the docstring. """ if _classify.is_module_docstring(tokens, index): + # print("Module") return _get_module_docstring_newlines(black) elif _classify.is_class_docstring(tokens, index): + # print("Class") return _get_class_docstring_newlines(tokens, index) elif _classify.is_function_or_method_docstring(tokens, index): - return _get_function_docstring_newlines(tokens, index) + # print("Function or method") + return _get_function_docstring_newlines(tokens, index, black) elif _classify.is_attribute_docstring(tokens, index): + # print("Attribute") return _get_attribute_docstring_newlines(tokens, index) return 0 # Default: probably a string literal @@ -1029,9 +1036,7 @@ def _do_rewrite_docstring_blocks( _docstring_token = tokens[d] _indent = " " * _docstring_token.start[1] if typ != "module" else "" _blank_line_count = _get_newlines_by_type( - tokens, - d, - black=self.args.black, + tokens, d, black=self.args.black ) if _util.is_in_range( diff --git a/tests/_data/string_files/do_format_code.toml b/tests/_data/string_files/do_format_code.toml index 7ea92cd..4572aec 100644 --- a/tests/_data/string_files/do_format_code.toml +++ b/tests/_data/string_files/do_format_code.toml @@ -139,6 +139,7 @@ expected='''def foo():\r This is a docstring.\r """\r ''' + [non_docstring] source='''x = """This is @@ -209,11 +210,11 @@ source='''def foo(): """ Hello foo. """ - def test_method_no_chr_92(): the501(92) # \''' + def test_method_no_chr_92(): the501(92) # Comment''' expected='''def foo(): """Hello foo.""" - def test_method_no_chr_92(): the501(92) # \''' + def test_method_no_chr_92(): the501(92) # Comment''' [raw_lowercase] source='''def foo(): From eebfe3ca81ed8447d011a18f3d37de62d491e132 Mon Sep 17 00:00:00 2001 From: Doyle Rowland Date: Fri, 25 Jul 2025 00:54:34 -0400 Subject: [PATCH 02/11] fix: removing blank line between two preceding comment blocks --- src/docformatter/format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/docformatter/format.py b/src/docformatter/format.py index d9a7820..2dfe39a 100644 --- a/src/docformatter/format.py +++ b/src/docformatter/format.py @@ -92,7 +92,7 @@ def _do_remove_preceding_blank_lines( elif ( tokens[j].type in (tokenize.NEWLINE, tokenize.NL) and tokens[j].line == "\n" - and not tokens[j - 1].line.startswith("#!/") + and not tokens[j - 1].line.startswith("#") ): _indices_to_remove.append(j) From 0d9a2b0a5c1e9821924ad8c3b023e40a09bf8041 Mon Sep 17 00:00:00 2001 From: Doyle Rowland Date: Sun, 27 Jul 2025 11:11:45 -0400 Subject: [PATCH 03/11] fix: mangling long description when splitting multi-sentence summary --- src/docformatter/format.py | 129 ++++++++++++++++----------- src/docformatter/strings.py | 12 ++- src/docformatter/wrappers/summary.py | 44 ++++++++- 3 files changed, 127 insertions(+), 58 deletions(-) diff --git a/src/docformatter/format.py b/src/docformatter/format.py index 2dfe39a..a058810 100644 --- a/src/docformatter/format.py +++ b/src/docformatter/format.py @@ -70,7 +70,7 @@ def _do_remove_preceding_blank_lines( Returns ------- - tokens : list + list A list of tokens with blank lines preceding docstrings removed. """ _num_tokens = len(tokens) @@ -110,6 +110,34 @@ def _do_remove_preceding_blank_lines( return tokens +def _do_skip_newlines( + tokens: list[tokenize.TokenInfo], + docstring_idx: int, +) -> int: + """Skip newline tokens between anchor and docstring indices. + + Parameters + ---------- + tokens : list + The list of tokens representing the docstring text. + docstring_idx : int + The index in the list of tokens where the docstring begins. + + Returns + ------- + int + The index of the last newline token. + """ + j = docstring_idx + 1 + while j < len(tokens) and tokens[j].type in ( + tokenize.NL, + tokenize.NEWLINE, + ): + j += 1 + + return j + + def _do_update_token_indices( tokens: list[tokenize.TokenInfo], ) -> list[tokenize.TokenInfo]: @@ -243,7 +271,6 @@ def _get_class_docstring_newlines( def _get_function_docstring_newlines( # noqa: PLR0911 tokens: list[tokenize.TokenInfo], index: int, - black: bool = False, ) -> int: """Return number of newlines after a function or method docstring. @@ -360,7 +387,7 @@ def _get_newlines_by_type( return _get_class_docstring_newlines(tokens, index) elif _classify.is_function_or_method_docstring(tokens, index): # print("Function or method") - return _get_function_docstring_newlines(tokens, index, black) + return _get_function_docstring_newlines(tokens, index) elif _classify.is_attribute_docstring(tokens, index): # print("Attribute") return _get_attribute_docstring_newlines(tokens, index) @@ -736,7 +763,7 @@ def _do_format_file(self, filename: str) -> int: Return ------ - result_code : int + int One of the FormatResult codes. """ self.encodor.do_detect_encoding(filename) @@ -784,7 +811,7 @@ def _do_format_code(self, source: str) -> str: Returns ------- - formatted : str + str The source file text with docstrings formatted. """ if not source: @@ -828,7 +855,7 @@ def _do_format_docstring( # noqa PLR0911 Returns ------- - docstring_formatted : str + str The docstring formatted according the various options. """ contents, open_quote = _strings.do_strip_docstring(docstring) @@ -915,7 +942,7 @@ def _do_format_oneline_docstring( Returns ------- - formatted_docstring : str + str The formatted docstring. """ if self.args.make_summary_multi_line: @@ -968,7 +995,7 @@ def _do_format_multiline_docstring( Returns ------- - formatted_docstring : str + str The formatted docstring. """ # Compensate for triple quotes by temporarily prepending 3 spaces. @@ -1008,82 +1035,78 @@ def _do_rewrite_docstring_blocks( Parameters ---------- - tokens : list[TokenInfo] + tokens : list The tokenized Python source code. """ - blocks = _classify.do_find_docstring_blocks(tokens) + # print(tokens) + _blocks = _classify.do_find_docstring_blocks(tokens) + _skip_indices: set[int] = set() self.new_tokens = [] - skip_indices: set[int] = set() - for i, tok in enumerate(tokens): - if i in skip_indices: + for _idx, _token in enumerate(tokens): + if _idx in _skip_indices: continue - match = next(((s, d, t) for (s, d, t) in blocks if d == i), None) - if match: - s, d, typ = match - - # Skip tokens from anchor (s) up to and including the docstring (d), - # plus trailing blank lines - j = d + 1 - while j < len(tokens) and tokens[j].type in ( - tokenize.NL, - tokenize.NEWLINE, - ): - j += 1 - skip_indices.update(range(s + 1, j)) + _match = next(((s, d, t) for (s, d, t) in _blocks if d == _idx), None) + if _match: + _anchor_idx, _docstr_idx, _type = _match + _last_idx = _do_skip_newlines(tokens, _docstr_idx) + _skip_indices.update(range(_anchor_idx + 1, _last_idx)) - _docstring_token = tokens[d] - _indent = " " * _docstring_token.start[1] if typ != "module" else "" + _docstring_token = tokens[_docstr_idx] _blank_line_count = _get_newlines_by_type( - tokens, d, black=self.args.black + tokens, _docstr_idx, black=self.args.black ) - if _util.is_in_range( - self.args.line_range, - _docstring_token.start[0], - _docstring_token.end[0], - ) and _util.has_correct_length( - self.args.length_range, - _docstring_token.start[0], - _docstring_token.end[0], + if ( + _util.is_in_range( + self.args.line_range, + _docstring_token.start[0], + _docstring_token.end[0], + ) + and _util.has_correct_length( + self.args.length_range, + _docstring_token.start[0], + _docstring_token.end[0], + ) + and not _patterns.is_string_constant(tokens[_docstr_idx - 1]) ): self._do_add_formatted_docstring( _docstring_token, - tokens[i + 1], - typ, + tokens[_idx + 1], + _type, _blank_line_count, ) else: - self._do_add_unformatted_docstring(_docstring_token, typ) + self._do_add_unformatted_docstring(_docstring_token, _type) if ( ( - self.new_tokens[-2].string == tokens[i + 1].string - and _docstring_token.line == tokens[i + 1].line + self.new_tokens[-2].string == tokens[_idx + 1].string + and _docstring_token.line == tokens[_idx + 1].line ) - or tokens[i + 1].string == "\n" - or tokens[i + 1].type in (tokenize.NEWLINE, tokenize.NL) + or tokens[_idx + 1].string == "\n" + or tokens[_idx + 1].type in (tokenize.NEWLINE, tokenize.NL) ): - skip_indices.add(i + 1) + _skip_indices.add(_idx + 1) continue else: - _new_tok = tok + _new_tok = _token # If it's a standalone STRING (not identified as a docstring block), # ensure .line ends with newline - if tok.type == tokenize.STRING: - _line = tok.line + if _token.type == tokenize.STRING: + _line = _token.line if not _line.endswith("\n"): _line += "\n" _new_tok = tokenize.TokenInfo( - type=tok.type, - string=tok.string, - start=tok.start, - end=tok.end, + type=_token.type, + string=_token.string, + start=_token.start, + end=_token.end, line=_line, ) self.new_tokens.append(_new_tok) - self.new_tokens = _do_remove_preceding_blank_lines(self.new_tokens, blocks) + self.new_tokens = _do_remove_preceding_blank_lines(self.new_tokens, _blocks) self.new_tokens = _do_update_token_indices(self.new_tokens) diff --git a/src/docformatter/strings.py b/src/docformatter/strings.py index f6bbf95..94c8b43 100644 --- a/src/docformatter/strings.py +++ b/src/docformatter/strings.py @@ -397,8 +397,18 @@ def do_split_summary(lines) -> List[str]: rest_text = "".join(rest).strip() lines[0] = first_sentence + + # If there is remaining text, it should become the beginning of the description + # in a multiline docstring. Thus, insert a newline and then the remaining text to + # the list of lines. if rest_text: - lines.insert(2, rest_text) + _pos = 1 if len(lines) >= 2 else 0 # noqa: PLR2004 + _leading_spaces = " " * (len(lines[_pos]) - len(lines[_pos].lstrip())) + _internal_spaces = " " * (_pos) + lines.insert(1, "") + lines[_pos + 1] = ( + f"{_leading_spaces}{rest_text}{_internal_spaces}{lines[_pos + 1].strip()}" + ) return lines diff --git a/src/docformatter/wrappers/summary.py b/src/docformatter/wrappers/summary.py index fabfda0..46dd274 100644 --- a/src/docformatter/wrappers/summary.py +++ b/src/docformatter/wrappers/summary.py @@ -32,13 +32,49 @@ import textwrap -def do_unwrap_summary(summary): - """Return summary with newlines removed in preparation for wrapping.""" +def do_unwrap_summary(summary: str) -> str: + r"""Return summary with newlines removed in preparation for wrapping. + + Parameters + ---------- + summary : str + The summary text from the docstring. + + Returns + ------- + str + The summary text with newline (\n) characters replaced by a single space. + """ return re.sub(r"\s*\n\s*", " ", summary) -def do_wrap_summary(summary, initial_indent, subsequent_indent, wrap_length): - """Return line-wrapped summary text.""" +def do_wrap_summary( + summary: str, + initial_indent: str, + subsequent_indent: str, + wrap_length: int, +) -> str: + """Return line-wrapped summary text. + + If the wrap_length is any value less than or equal to zero, the raw, unwrapped + summary text will be returned. + + Parameters + ---------- + summary : str + The summary text from the docstring. + initial_indent : str + The indentation string for the first line of the summary. + subsequent_indent : str + The indentation string for all the other lines of the summary. + wrap_length : int + The column position to wrap the summary lines. + + Returns + ------- + str + The summary text from the docstring wrapped at wrap_length columns. + """ if wrap_length > 0: return textwrap.fill( do_unwrap_summary(summary), From 6935b8094fa745ea9ece10f21a61d1141af8f5ca Mon Sep 17 00:00:00 2001 From: Doyle Rowland Date: Sun, 27 Jul 2025 11:23:14 -0400 Subject: [PATCH 04/11] fix: not detecting string constants --- docs/source/conf.py | 2 +- src/docformatter/constants.py | 6 +-- src/docformatter/patterns/fields.py | 65 +++++++++++++++-------------- src/docformatter/patterns/lists.py | 40 +++++++++--------- src/docformatter/patterns/misc.py | 44 +++++++++++++------ src/docformatter/patterns/url.py | 14 +++---- 6 files changed, 96 insertions(+), 75 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 5eddc4d..0547762 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -3,11 +3,11 @@ # # For the full list of built-in configuration values, see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html - # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information """Configuration file for the Sphinx documentation builder.""" + project = "docformatter" copyright = "2022-2023, Steven Myint" author = "Steven Myint" diff --git a/src/docformatter/constants.py b/src/docformatter/constants.py index 0860ebe..fab5ae0 100644 --- a/src/docformatter/constants.py +++ b/src/docformatter/constants.py @@ -76,9 +76,6 @@ OPTION_REGEX = r"^ {0,}-{1,2}[\S ]+ \w+" """Regular expression to use for finding option lists.""" -REST_REGEX = r"((\.{2}|`{2}) ?[\w.~-]+(:{2}|`{2})?[\w ]*?|`[\w.~]+`)" -"""Regular expression to use for finding reST directives.""" - REST_SECTION_REGEX = ( r"(^ *[#\*=\-^\'\"\+_\~`\.\:]+\n)?[\w ]+\n *[#\*=\-^\'\"\+_\~`\.\:]+" ) @@ -153,7 +150,8 @@ ) """The URL patterns to look for when finding links. -Based on the table at +Based on the table at + """ # This is the regex used to find URL links: diff --git a/src/docformatter/patterns/fields.py b/src/docformatter/patterns/fields.py index 49fe4b5..4712e22 100644 --- a/src/docformatter/patterns/fields.py +++ b/src/docformatter/patterns/fields.py @@ -56,10 +56,11 @@ def do_find_field_lists( Returns ------- - _field_idx, _wrap_parameters : tuple - A list of tuples with each tuple containing the starting and ending - position of each field list found in the passed description. - A boolean indicating whether long field list lines should be wrapped. + tuple[list[tuple], bool] + A tuple containing lists of tuples and a boolean. Each inner tuple + contains the starting and ending position of each field list found in the + description. The boolean indicates whether long field list lines should + be wrapped. """ _field_idx = [] _wrap_parameters = False @@ -116,17 +117,17 @@ def is_epytext_field_list(line: str) -> Union[Match[str], None]: line : str The line to check for Epytext field list patterns. - Notes - ----- - Epytext field lists have the following pattern: - @param x: - @type x: - Returns ------- Match[str] | None A match object if the line matches an Epytext field list pattern, None otherwise. + + Notes + ----- + Epytext field lists have the following pattern: + @param x: + @type x: """ return re.match(EPYTEXT_REGEX, line) @@ -139,15 +140,15 @@ def is_google_field_list(line: str) -> Union[Match[str], None]: line: str The line to check for Google field list patterns. - Notes - ----- - Google field lists have the following pattern: - x (int): Description of x. - Returns ------- Match[str] | None A match object if the line matches a Google field list pattern, None otherwise. + + Notes + ----- + Google field lists have the following pattern: + x (int): Description of x. """ return re.match(GOOGLE_REGEX, line) @@ -160,16 +161,18 @@ def is_numpy_field_list(line: str) -> Union[Match[str], None]: line: str The line to check for NumPy field list patterns. + Returns + ------- + Match[str] | None + A match object if the line matches a NumPy field list pattern, None otherwise. + Notes ----- NumPy field lists have the following pattern: x : int Description of x. - - Returns - ------- - Match[str] | None - A match object if the line matches a NumPy field list pattern, None otherwise. + x + Description of x. """ return re.match(NUMPY_REGEX, line) @@ -182,15 +185,15 @@ def is_sphinx_field_list(line: str) -> Union[Match[str], None]: line: str The line to check for Sphinx field list patterns. - Notes - ----- - Sphinx field lists have the following pattern: - :parameter: description - Returns ------- Match[str] | None A match object if the line matches a Sphinx field list pattern, None otherwise. + + Notes + ----- + Sphinx field lists have the following pattern: + :parameter: description """ return re.match(SPHINX_REGEX, line) @@ -205,6 +208,12 @@ def is_user_defined_field_list(line: str) -> Union[Match[str], None]: line: str The line to check for user-defined field list patterns. + Returns + ------- + Match[str] | None + A match object if the line matches a user-defined field list pattern, None + otherwise. + Notes ----- User-defined field lists have the following pattern: @@ -215,12 +224,6 @@ def is_user_defined_field_list(line: str) -> Union[Match[str], None]: These patterns were in the original docformatter code. These patterns do not conform to any common docstring styles. There is no documented reason they were included and are retained for historical purposes. - - Returns - ------- - Match[str] | None - A match object if the line matches a user-defined field list pattern, None - otherwise. """ return ( re.match(r"[\S ]+ - \S+", line) diff --git a/src/docformatter/patterns/lists.py b/src/docformatter/patterns/lists.py index 2e53ef5..48acbdf 100644 --- a/src/docformatter/patterns/lists.py +++ b/src/docformatter/patterns/lists.py @@ -111,6 +111,11 @@ def is_bullet_list(line: str) -> Union[Match[str], None]: line : str The line to check for bullet list patterns. + Returns + ------- + Match[str] | None + A match object if the line matches a bullet list pattern, None otherwise. + Notes ----- Bullet list items have the following pattern: @@ -119,11 +124,6 @@ def is_bullet_list(line: str) -> Union[Match[str], None]: + item See `_ - - Returns - ------- - Match[str] | None - A match object if the line matches a bullet list pattern, None otherwise. """ return re.match(BULLET_REGEX, line) @@ -136,17 +136,17 @@ def is_definition_list(line: str) -> Union[Match[str], None]: line : str The line to check for definition list patterns. + Returns + ------- + Match[str] | None + A match object if the line matches a definition list pattern, None otherwise. + Notes ----- Definition list items have the following pattern: term: definition See `_ - - Returns - ------- - Match[str] | None - A match object if the line matches a definition list pattern, None otherwise. """ return re.match(ENUM_REGEX, line) @@ -159,6 +159,11 @@ def is_enumerated_list(line: str) -> Union[Match[str], None]: line : str The line to check for enumerated list patterns. + Returns + ------- + Match[str] | None + A match object if the line matches an enumerated list pattern, None otherwise. + Notes ----- Enumerated list items have the following pattern: @@ -166,11 +171,6 @@ def is_enumerated_list(line: str) -> Union[Match[str], None]: 2. item See `_ - - Returns - ------- - Match[str] | None - A match object if the line matches an enumerated list pattern, None otherwise. """ return re.match(ENUM_REGEX, line) @@ -217,6 +217,11 @@ def is_option_list(line: str) -> Union[Match[str], None]: line : str The line to check for option list patterns. + Returns + ------- + Match[str] | None + A match object if the line matches an option list pattern, None otherwise. + Notes ----- Option list items have the following pattern: @@ -224,10 +229,5 @@ def is_option_list(line: str) -> Union[Match[str], None]: -h, --help: Show help message. See `_ - - Returns - ------- - Match[str] | None - A match object if the line matches an option list pattern, None otherwise. """ return re.match(OPTION_REGEX, line) diff --git a/src/docformatter/patterns/misc.py b/src/docformatter/patterns/misc.py index 2a6ec86..a1577ab 100644 --- a/src/docformatter/patterns/misc.py +++ b/src/docformatter/patterns/misc.py @@ -29,6 +29,7 @@ # Standard Library Imports import re +import tokenize from re import Match from typing import Union @@ -45,15 +46,15 @@ def is_inline_math(line: str) -> Union[Match[str], None]: line : str The line to check for inline math patterns. - Notes - ----- - Inline math expressions have the following pattern: - c :math:`[0, `]` - Returns ------- Match[str] | None A match object if the line matches an inline math pattern, None otherwise. + + Notes + ----- + Inline math expressions have the following pattern: + c :math:`[0, `]` """ return re.match(r" *\w *:[a-zA-Z0-9_\- ]*:", line) @@ -66,16 +67,16 @@ def is_literal_block(line: str) -> Union[Match[str], None]: line : str The line to check for literal block patterns. + Returns + ------- + Match[str] | None + A match object if the line matches a literal block pattern, None otherwise. + Notes ----- Literal blocks have the following pattern: :: code - - Returns - ------- - Match[str] | None - A match object if the line matches a literal block pattern, None otherwise. """ return re.match(LITERAL_REGEX, line) @@ -90,7 +91,7 @@ def is_probably_beginning_of_sentence(line: str) -> Union[Match[str], None, bool Returns ------- - is_beginning : bool + bool True if this token is the beginning of a sentence, False otherwise. """ # Check heuristically for a parameter list. @@ -115,10 +116,29 @@ def is_some_sort_of_code(text: str) -> bool: Returns ------- - is_code : bool + bool True if the text contains and code patterns, False otherwise. """ return any( len(word) > 50 and not re.match(URL_REGEX, word) # noqa: PLR2004 for word in text.split() ) + + +def is_string_constant(token: tokenize.TokenInfo) -> bool: + """Determine if docstring token is actually a string constant. + + Parameters + ---------- + token : TokenInfo + The token immediately preceding the docstring token. + + Returns + ------- + bool + True if the doctring token is actually string constant, False otherwise. + """ + if token.type == tokenize.OP and token.string == "=": + return True + + return False diff --git a/src/docformatter/patterns/url.py b/src/docformatter/patterns/url.py index 5a09dd2..7ad0bee 100644 --- a/src/docformatter/patterns/url.py +++ b/src/docformatter/patterns/url.py @@ -46,9 +46,9 @@ def do_find_links(text: str) -> List[Tuple[int, int]]: Returns ------- - url_index : list + list A list of tuples with each tuple containing the starting and ending - position of each URL found in the passed description. + position of each URL found in the description. """ _url_iter = re.finditer(URL_REGEX, text) return [(_url.start(0), _url.end(0)) for _url in _url_iter] @@ -65,16 +65,16 @@ def do_skip_link(text: str, index: Tuple[int, int]) -> bool: The index in the text of the starting and ending position of the identified link. + Returns + ------- + _do_skip : bool + Whether to skip this link and simpley treat it as a standard text word. + Notes ----- Is the identified link simply: 1. The URL scheme pattern such as 's3://' or 'file://' or 'dns:'. 2. The beginning of a URL link that has been wrapped by the user. - - Returns - ------- - _do_skip : bool - Whether to skip this link and simpley treat it as a standard text word. """ _do_skip = re.search(URL_SKIP_REGEX, text[index[0] : index[1]]) is not None From 351c55adff783d9a937542a7a4a9286e584c5889 Mon Sep 17 00:00:00 2001 From: Doyle Rowland Date: Sun, 27 Jul 2025 12:38:50 -0400 Subject: [PATCH 05/11] feat: add function to find inline reST markup --- src/docformatter/constants.py | 6 ++++ src/docformatter/patterns/rest.py | 40 +++++++++++++++++------- src/docformatter/wrappers/description.py | 4 +-- 3 files changed, 37 insertions(+), 13 deletions(-) diff --git a/src/docformatter/constants.py b/src/docformatter/constants.py index fab5ae0..1fbb1ed 100644 --- a/src/docformatter/constants.py +++ b/src/docformatter/constants.py @@ -76,6 +76,12 @@ OPTION_REGEX = r"^ {0,}-{1,2}[\S ]+ \w+" """Regular expression to use for finding option lists.""" +REST_INLINE_REGEX = ( + r"(?:.-]+([*]{1," + r"2}|[`]{1,2}_?|[|]|[\]]_?)" +) +"""Regular expression to use for finding inline reST markup.""" + REST_SECTION_REGEX = ( r"(^ *[#\*=\-^\'\"\+_\~`\.\:]+\n)?[\w ]+\n *[#\*=\-^\'\"\+_\~`\.\:]+" ) diff --git a/src/docformatter/patterns/rest.py b/src/docformatter/patterns/rest.py index dd89114..42e15ae 100644 --- a/src/docformatter/patterns/rest.py +++ b/src/docformatter/patterns/rest.py @@ -31,18 +31,36 @@ import re # docformatter Package Imports -from docformatter.constants import REST_REGEX +from docformatter.constants import REST_INLINE_REGEX -def do_find_directives(text: str) -> bool: +def do_find_rest_directives( + text: str, + indent: int = 0, +) -> list[tuple[int, int]]: """Determine if docstring contains any reST directives. - .. todo:: + Parameters + ---------- + text : str + The docstring text to test. + indent : int + The number of spaces the reST directive line is indented. + + Returns + ------- + bool + True if the docstring is a reST directive, False otherwise. + """ + _rest_directive_regex = ( + r"^( {0,}\.\. .+?::.*\n(?:[ \t]{" + str(indent + 1) + r",}.*\n|\n)*)" + ) + _rest_iter = re.finditer(_rest_directive_regex, text, flags=re.MULTILINE) + return [(_rest.start(0), _rest.end(0)) for _rest in _rest_iter] + - Currently this function only returns True/False to indicate whether a - reST directive was found. Should return a list of tuples containing - the start and end position of each reST directive found similar to the - function do_find_links(). +def do_find_inline_rest_markup(text: str) -> list[tuple[int, int]]: + """Determine if docstring contains any inline reST markup. Parameters ---------- @@ -51,8 +69,8 @@ def do_find_directives(text: str) -> bool: Returns ------- - is_directive : bool - Whether the docstring is a reST directive. + bool + True if the docstring is a reST directive, False otherwise. """ - _rest_iter = re.finditer(REST_REGEX, text) - return bool([(_rest.start(0), _rest.end(0)) for _rest in _rest_iter]) + _rest_iter = re.finditer(REST_INLINE_REGEX, text, flags=re.MULTILINE) + return [(_rest.start(0), _rest.end(0)) for _rest in _rest_iter] diff --git a/src/docformatter/wrappers/description.py b/src/docformatter/wrappers/description.py index d7ee4ac..2573852 100644 --- a/src/docformatter/wrappers/description.py +++ b/src/docformatter/wrappers/description.py @@ -71,7 +71,7 @@ def do_wrap_description( # noqa: PLR0913 Returns ------- - description : str + str The description wrapped at wrap_length characters. """ text = _strings.do_strip_leading_blank_lines(text) @@ -89,7 +89,7 @@ def do_wrap_description( # noqa: PLR0913 not force_wrap and ( _patterns.is_some_sort_of_code(text) - or _patterns.do_find_directives(text) + or _patterns.do_find_rest_directives(text, len(indentation)) or _patterns.is_type_of_list(text, strict, style) ) ): From 095b7e0f2dbf0ced0a9bd834acb79ba922010903 Mon Sep 17 00:00:00 2001 From: Doyle Rowland Date: Sun, 27 Jul 2025 12:39:11 -0400 Subject: [PATCH 06/11] test: add tests for function to find inline reST markup --- tests/_data/string_files/rest_patterns.toml | 35 ++++++++++--------- tests/patterns/test_rest_patterns.py | 37 +++++++++++++++++---- 2 files changed, 51 insertions(+), 21 deletions(-) diff --git a/tests/_data/string_files/rest_patterns.toml b/tests/_data/string_files/rest_patterns.toml index 95a475d..3c2ddad 100644 --- a/tests/_data/string_files/rest_patterns.toml +++ b/tests/_data/string_files/rest_patterns.toml @@ -1,20 +1,17 @@ -[is_inline_directive] -instring = """ -These are some reST directives that need to be retained even if it means not wrapping the line they are found on. -Constructs and returns a :class:`QuadraticCurveTo `. -Register ``..click:example::`` and ``.. click:run::`` directives, augmented with ANSI coloring. -""" -expected = true - [is_double_dot_directive] instring = """ -.. _linspace API: https://numpy.org/doc/stable/reference/generated/numpy.linspace.html -.. _arange API: https://numpy.org/doc/stable/reference/generated/numpy.arange.html -.. _logspace API: https://numpy.org/doc/stable/reference/generated/numpy.logspace.html +This is a docstring that contains a reST directive. + +.. directive type:: directive + :modifier: + + The directive type description. + +This is the part of the docstring that follows the reST directive. """ -expected = true +expected = [53, 136] -[is_double_dot_directive_2] +[is_double_dot_directive_indented] instring = """ ``pattern`` is considered as an URL only if it is parseable as such and starts with ``http://`` or ``https://``. @@ -26,7 +23,15 @@ instring = """ which is only `available in the standard library starting with Python v3.9 `. """ -expected = true +expected = [114, 499] + +[is_inline_directive] +instring = """ +These are some reST directives that need to be retained even if it means not wrapping the line they are found on. +Constructs and returns a :class:`QuadraticCurveTo `. +Register ``..click:example::`` and ``.. click:run::`` directives, augmented with ANSI coloring. +""" +expected = [145, 183] [is_double_backtick_directive] instring = """ @@ -47,4 +52,4 @@ By default we choose to exclude: Since a mail could be ``Cc``'d to two lists with different ``Reply-To`` munging options set. """ -expected = true +expected = [38, 44] diff --git a/tests/patterns/test_rest_patterns.py b/tests/patterns/test_rest_patterns.py index 35e69a4..25718cc 100644 --- a/tests/patterns/test_rest_patterns.py +++ b/tests/patterns/test_rest_patterns.py @@ -43,7 +43,7 @@ import pytest # docformatter Package Imports -from docformatter.patterns import do_find_directives +from docformatter.patterns import do_find_rest_directives, do_find_inline_rest_markup with open("tests/_data/string_files/rest_patterns.toml", "rb") as f: TEST_STRINGS = tomllib.load(f) @@ -53,15 +53,40 @@ @pytest.mark.parametrize( "test_key", [ - "is_inline_directive", "is_double_dot_directive", - "is_double_dot_directive_2", + "is_double_dot_directive_indented", + ], +) +def test_do_find_rest_directives(test_key): + source = TEST_STRINGS[test_key]["instring"] + expected = TEST_STRINGS[test_key]["expected"] + + result = do_find_rest_directives(source) + assert ( + result[0][0] == expected[0] + ), f"\nFailed {test_key}\nExpected {expected[0]}\nGot {result[0][0]}" + assert ( + result[0][1] == expected[1] + ), f"\nFailed {test_key}\nExpected {expected[0]}\nGot {result[0][1]}" + + +@pytest.mark.unit +@pytest.mark.parametrize( + "test_key", + [ + "is_inline_directive", "is_double_backtick_directive", ], ) -def test_rest_directive_patterns(test_key): +def test_do_find_inline_rest_markup(test_key): source = TEST_STRINGS[test_key]["instring"] expected = TEST_STRINGS[test_key]["expected"] - result = do_find_directives(source) - assert result == expected, f"\nFailed {test_key}\nExpected {expected}\nGot {result}" + result = do_find_inline_rest_markup(source) + print(result) + assert ( + result[0][0] == expected[0] + ), f"\nFailed {test_key}\nExpected {expected[0]}\nGot {result[0][0]}" + assert ( + result[0][1] == expected[1] + ), f"\nFailed {test_key}\nExpected {expected[0]}\nGot {result[0][1]}" From 7f6cf6fd19ae2037c59e79716064fe6c86e475db Mon Sep 17 00:00:00 2001 From: Doyle Rowland Date: Thu, 14 Aug 2025 16:37:54 -0400 Subject: [PATCH 07/11] fix: fix tests that failed after updating regex's As docformatter becomes more strict in identifying patterns and handling the various patterns that are identified, some of the older tests are failing. These tests fail because the patterns in the test strings are not strictly reST, Sphinx, etc. --- README.rst | 6 ++++++ src/docformatter/constants.py | 8 ++++---- src/docformatter/patterns/rest.py | 8 ++------ src/docformatter/strings.py | 2 +- src/docformatter/wrappers/description.py | 2 +- tests/_data/string_files/description_wrappers.toml | 4 ++-- tests/_data/string_files/do_format_code.toml | 12 ++++++------ tests/_data/string_files/do_format_docstrings.toml | 3 ++- tests/_data/string_files/string_functions.toml | 4 ++-- 9 files changed, 26 insertions(+), 23 deletions(-) diff --git a/README.rst b/README.rst index 03c9024..3256385 100644 --- a/README.rst +++ b/README.rst @@ -199,6 +199,12 @@ Do you use *docformatter*? What style docstrings do you use? Add some badges t .. image:: https://img.shields.io/badge/%20style-google-3666d6.svg :target: https://google.github.io/styleguide/pyguide.html#s3.8-comments-and-docstrings +Assistance +========== +``docformatter`` has an IRC channel on `Libera.Chat`_ in the `#docformatter`_ room. + .. _`Libera.Chat`: https://libera.chat + .. _`#docformatter`: https://web.libera.chat/#docformatter + Issues ====== diff --git a/src/docformatter/constants.py b/src/docformatter/constants.py index 1fbb1ed..c44ec6b 100644 --- a/src/docformatter/constants.py +++ b/src/docformatter/constants.py @@ -76,10 +76,10 @@ OPTION_REGEX = r"^ {0,}-{1,2}[\S ]+ \w+" """Regular expression to use for finding option lists.""" -REST_INLINE_REGEX = ( - r"(?:.-]+([*]{1," - r"2}|[`]{1,2}_?|[|]|[\]]_?)" -) +REST_DIRECTIVE_REGEX = r"^( {0,}\.\. .+?:{1,2}.*\n(?:[ \t]{1,}.*\n|\n)*)" +"""Regular expression to use for finding reST directives.""" + +REST_INLINE_REGEX = r"(?:.-]+([*]{1,2}|[`]{1,2}_?|[|]|[\]]_?)" # noqa: E501 """Regular expression to use for finding inline reST markup.""" REST_SECTION_REGEX = ( diff --git a/src/docformatter/patterns/rest.py b/src/docformatter/patterns/rest.py index 42e15ae..ab6f018 100644 --- a/src/docformatter/patterns/rest.py +++ b/src/docformatter/patterns/rest.py @@ -31,12 +31,11 @@ import re # docformatter Package Imports -from docformatter.constants import REST_INLINE_REGEX +from docformatter.constants import REST_DIRECTIVE_REGEX, REST_INLINE_REGEX def do_find_rest_directives( text: str, - indent: int = 0, ) -> list[tuple[int, int]]: """Determine if docstring contains any reST directives. @@ -52,10 +51,7 @@ def do_find_rest_directives( bool True if the docstring is a reST directive, False otherwise. """ - _rest_directive_regex = ( - r"^( {0,}\.\. .+?::.*\n(?:[ \t]{" + str(indent + 1) + r",}.*\n|\n)*)" - ) - _rest_iter = re.finditer(_rest_directive_regex, text, flags=re.MULTILINE) + _rest_iter = re.finditer(REST_DIRECTIVE_REGEX, text, flags=re.MULTILINE) return [(_rest.start(0), _rest.end(0)) for _rest in _rest_iter] diff --git a/src/docformatter/strings.py b/src/docformatter/strings.py index 94c8b43..e5fba2e 100644 --- a/src/docformatter/strings.py +++ b/src/docformatter/strings.py @@ -402,7 +402,7 @@ def do_split_summary(lines) -> List[str]: # in a multiline docstring. Thus, insert a newline and then the remaining text to # the list of lines. if rest_text: - _pos = 1 if len(lines) >= 2 else 0 # noqa: PLR2004 + _pos = 1 if len(lines) >= 3 else 0 # noqa: PLR2004 _leading_spaces = " " * (len(lines[_pos]) - len(lines[_pos].lstrip())) _internal_spaces = " " * (_pos) lines.insert(1, "") diff --git a/src/docformatter/wrappers/description.py b/src/docformatter/wrappers/description.py index 2573852..8954c73 100644 --- a/src/docformatter/wrappers/description.py +++ b/src/docformatter/wrappers/description.py @@ -89,7 +89,7 @@ def do_wrap_description( # noqa: PLR0913 not force_wrap and ( _patterns.is_some_sort_of_code(text) - or _patterns.do_find_rest_directives(text, len(indentation)) + or _patterns.do_find_rest_directives(text) or _patterns.is_type_of_list(text, strict, style) ) ): diff --git a/tests/_data/string_files/description_wrappers.toml b/tests/_data/string_files/description_wrappers.toml index c567d42..ca02e2b 100644 --- a/tests/_data/string_files/description_wrappers.toml +++ b/tests/_data/string_files/description_wrappers.toml @@ -85,11 +85,11 @@ expected = """ instring = """ This is a long docstring containing some reST directives. -.. note +.. note:: This is a note in the reST dialog. """ expected = """ This is a long docstring containing some reST directives. - .. note + .. note:: This is a note in the reST dialog.""" diff --git a/tests/_data/string_files/do_format_code.toml b/tests/_data/string_files/do_format_code.toml index 4572aec..e01cf53 100644 --- a/tests/_data/string_files/do_format_code.toml +++ b/tests/_data/string_files/do_format_code.toml @@ -50,7 +50,7 @@ source=''' class TestClass: """This is a class docstring. :cvar test_int: a class attribute. - ..py.method: big_method() + .. py:method:: big_method() """ ''' expected=''' @@ -58,7 +58,7 @@ expected=''' """This is a class docstring. :cvar test_int: a class attribute. - ..py.method: big_method() + .. py:method:: big_method() """ ''' @@ -678,7 +678,7 @@ class TestClass: """This is a class docstring. :cvar test_int: a class attribute. - ..py.method: big_method() + ..py:method:: big_method() """ ''' expected=''' @@ -686,7 +686,7 @@ class TestClass: """This is a class docstring. :cvar test_int: a class attribute. - ..py.method: big_method() + ..py:method:: big_method() """ ''' @@ -870,8 +870,8 @@ expected='''def mixed_links(): Once ``mpm`` is located, we can rely on it to produce the main output of the plugin. - The output must supports both `Xbar dialect - `_ + The output must supports both + `Xbar dialect `_ and `SwiftBar dialect `_. """ diff --git a/tests/_data/string_files/do_format_docstrings.toml b/tests/_data/string_files/do_format_docstrings.toml index 664cf46..8a0876d 100644 --- a/tests/_data/string_files/do_format_docstrings.toml +++ b/tests/_data/string_files/do_format_docstrings.toml @@ -487,7 +487,8 @@ source='''"""This is another docstring with `a link`_. """''' expected='''"""This is another docstring with `a link`_. - .. a link: http://www.reliqual.com/wiki/how_to_use_ramstk/verification_and_validation_module/index.html. + .. a link: + http://www.reliqual.com/wiki/how_to_use_ramstk/verification_and_validation_module/index.html. """''' [issue_75_2] diff --git a/tests/_data/string_files/string_functions.toml b/tests/_data/string_files/string_functions.toml index 45162dd..039640d 100644 --- a/tests/_data/string_files/string_functions.toml +++ b/tests/_data/string_files/string_functions.toml @@ -116,11 +116,11 @@ expected = ["This e.g. a sentence.", ""] [do_split_multi_sentence_summary] # See issue #283. instring = ["This is a sentence. This is another.", ""] -expected = ["This is a sentence.","","This is another."] +expected = ["This is a sentence.", "This is another.", ""] [do_split_multi_sentence_summary_2] # See issue #283. instring = ["This e.g. a sentence. This is another.", ""] -expected = ["This e.g. a sentence.", "", "This is another."] +expected = ["This e.g. a sentence.", "This is another.", ""] [do_split_description_url_outside_param] instring = "mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm https://mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm\n:param a:mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm" From 3e7e204bfc4092735eec14ddbcedba14d4e9cdf3 Mon Sep 17 00:00:00 2001 From: Doyle Rowland Date: Thu, 14 Aug 2025 17:16:14 -0400 Subject: [PATCH 08/11] doc: add section discussing text patterns to usage docs --- docs/source/usage.rst | 49 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/docs/source/usage.rst b/docs/source/usage.rst index b905e2b..22017bf 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -151,3 +151,52 @@ Use with GitHub Actions action. .. _`python-lint-plus`: https://github.com/marketplace/actions/python-code-style-quality-and-lint + +Dostring Text Patterns +====================== + +``docformatter`` began as a simple tool to format docstrings to follow PEP257. It +was originally a single Python script of 118 lines containing seven functions. +That's no longer the case as an inspection of the codebase will show. Over time, +``docformatter`` has grown to include a number of features that have been requested +by its most fantastic user base. + +In the early days, ``docformatter`` only formatted simple docstrings. "Complex" text +patterns like lists, parameter descriptions, and reStructuredText (reST) sections +caused ``docformatter`` to simply skip formatting the docstring. As feature requests +have been and will be incorporated, ``docformatter`` has gained the ability to +recognize and format more complex text patterns. + +As a result, it is necessary for the user to properly format their docstrings to +follow the patterns documented in the various specifications. These specifications +would include: + +- PEP 257 - Docstring Conventions + https://www.python.org/dev/peps/pep-0257/ +- reStructuredText (reST) Markup Specification + https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html +- Sphinx Documentation Style + https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html +- Epydoc Documentation Style + http://epydoc.sourceforge.net/manual-fields.html + +Any docstring that does not follow these specifications may not be formatted properly +as these patterns may be recognized by ``docformatter`` as simple text that needs to +formatted. For example, if a user writes a docstring that contains a list but does not +format the list according to reST specifications, ``docformatter`` may not recognize +the list and may format the list items as simple text. This could result in a +list that is not properly indented or wrapped. + +The user is encouraged to read and follow these specifications when writing +docstrings to ensure that ``docformatter`` can properly format them. Issues reported +to the ``docformatter`` project that are the result of docstrings not following these +specifications will be closed as ``S:wontfix`` with a request for the user to update +their docstrings to follow the specifications. + +Additionally, as ``docformatter`` continues to add support for more text patterns (e.g., +Numpy or Google style docstrings), new releases may result in significant docstring +formatting changes in your code base. While we hate to see this happen to our users, +it is the result of our desire to make ``docformatter`` the best tool it can be for +formatting docstrings and the best way to achieve that is to strigently comply with +the various specifications. We appreciate your understanding and patience as we +continue to improve ``docformatter``. From 7906402bcdf26abbbfde58f3d3788326e774a52c Mon Sep 17 00:00:00 2001 From: Doyle Rowland Date: Thu, 14 Aug 2025 17:30:37 -0400 Subject: [PATCH 09/11] doc: add assistance section to the README --- README.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.rst b/README.rst index 3256385..4cf384d 100644 --- a/README.rst +++ b/README.rst @@ -205,6 +205,10 @@ Assistance .. _`Libera.Chat`: https://libera.chat .. _`#docformatter`: https://web.libera.chat/#docformatter +There is no ``docformatter`` channel on the Python Code Quality Discord server, but +you can ask for help in the `# general`_ channel. + .. _`# general`: https://discord.com/channels/825463413634891776/934197425357336596 + Issues ====== From 1a5d5bf0b0ac7f53ecfa922b0fe086d10991c879 Mon Sep 17 00:00:00 2001 From: Doyle Rowland Date: Thu, 14 Aug 2025 17:45:18 -0400 Subject: [PATCH 10/11] fix: use sys module to get default file encoding --- README.rst | 1 + src/docformatter/encode.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 4cf384d..a3f2189 100644 --- a/README.rst +++ b/README.rst @@ -207,6 +207,7 @@ Assistance There is no ``docformatter`` channel on the Python Code Quality Discord server, but you can ask for help in the `# general`_ channel. + .. _`# general`: https://discord.com/channels/825463413634891776/934197425357336596 Issues diff --git a/src/docformatter/encode.py b/src/docformatter/encode.py index a4a6384..ac55ee9 100644 --- a/src/docformatter/encode.py +++ b/src/docformatter/encode.py @@ -47,7 +47,7 @@ class Encoder: CRLF = "\r\n" # Default encoding to use if the file encoding cannot be detected - DEFAULT_ENCODING = "latin-1" + DEFAULT_ENCODING = sys.getdefaultencoding() def __init__(self): """Initialize an Encoder instance.""" From 412479e5d6b02f7dd7e20f870132a413dc83898a Mon Sep 17 00:00:00 2001 From: Doyle Rowland Date: Thu, 14 Aug 2025 17:58:02 -0400 Subject: [PATCH 11/11] fix: explicitly ignore utf_16 and utf_32 encoding --- src/docformatter/encode.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/docformatter/encode.py b/src/docformatter/encode.py index ac55ee9..c7e36f0 100644 --- a/src/docformatter/encode.py +++ b/src/docformatter/encode.py @@ -64,9 +64,15 @@ def do_detect_encoding(self, filename) -> None: """ try: detection_result = from_path(filename).best() - self.encoding = ( - detection_result.encoding if detection_result else self.DEFAULT_ENCODING - ) + if detection_result and detection_result.encoding in ["utf_16", "utf_32"]: + # Treat undetectable/binary encodings as failure + self.encoding = self.DEFAULT_ENCODING + else: + self.encoding = ( + detection_result.encoding + if detection_result + else self.DEFAULT_ENCODING + ) # Check for correctness of encoding. with self.do_open_with_encoding(filename) as check_file: