From dbf288b9d68a4ed7527ee45952967eeba70cc1fb Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Sun, 5 Feb 2023 16:38:51 +0530 Subject: [PATCH 01/22] debug: a couple code fixes --- tabulate/__init__.py | 97 ++++++++++++++++++++++++++++++-------------- 1 file changed, 67 insertions(+), 30 deletions(-) diff --git a/tabulate/__init__.py b/tabulate/__init__.py index 3b1a1e11..ca1e3e7d 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -11,8 +11,14 @@ import textwrap import dataclasses +from typing import ( + Union, + List, + Optional, +) + try: - import wcwidth # optional wide-character (CJK) support + import wcwidth # optional wide-character (CJK) support # type: ignore except ImportError: wcwidth = None @@ -23,7 +29,7 @@ def _is_file(f): __all__ = ["tabulate", "tabulate_formats", "simple_separated_format"] try: - from .version import version as __version__ # noqa: F401 + from .version import version as __version__ # noqa: F401 # type: ignore except ImportError: pass # running __init__.py as a script, AppVeyor pytests @@ -1010,7 +1016,7 @@ def _strip_ansi(s): if isinstance(s, str): return _ansi_codes.sub(r"\4", s) else: # a bytestring - return _ansi_codes_bytes.sub(r"\4", s) + return _ansi_codes_bytes.sub(b"\4", s) def _visible_width(s): @@ -1048,7 +1054,7 @@ def _choose_width_fn(has_invisible, enable_widechars, is_multiline): if has_invisible: line_width_fn = _visible_width elif enable_widechars: # optional wide-character support if available - line_width_fn = wcwidth.wcswidth + line_width_fn = wcwidth.wcswidth # type: ignore else: line_width_fn = len if is_multiline: @@ -1088,7 +1094,7 @@ def _align_column_choose_width_fn(has_invisible, enable_widechars, is_multiline) if has_invisible: line_width_fn = _visible_width elif enable_widechars: # optional wide-character support if available - line_width_fn = wcwidth.wcswidth + line_width_fn = wcwidth.wcswidth # type: ignore else: line_width_fn = len if is_multiline: @@ -1141,7 +1147,7 @@ def _align_column( # enable wide-character width corrections s_lens = [[len(s) for s in re.split("[\r\n]", ms)] for ms in strings] visible_widths = [ - [maxwidth - (w - l) for w, l in zip(mw, ml)] + [maxwidth - (w - l) for w, l in zip(mw if isinstance(mw, list) else [mw], ml if isinstance(ml, list) else [ml])] for mw, ml in zip(s_widths, s_lens) ] # wcswidth and _visible_width don't count invisible characters; @@ -1156,7 +1162,13 @@ def _align_column( else: # enable wide-character width corrections s_lens = list(map(len, strings)) - visible_widths = [maxwidth - (w - l) for w, l in zip(s_widths, s_lens)] + # visible_widths = [maxwidth - (w - l) for w, l in zip(s_widths, s_lens)] + visible_widths = [] + for w, l in zip(s_widths, s_lens): + if isinstance(w, list): + visible_widths.append(maxwidth - (w[0] - l)) + else: + visible_widths.append(maxwidth - (w - l)) # wcswidth and _visible_width don't count invisible characters; # padfn doesn't need to apply another correction padded_strings = [padfn(w, s) for s, w in zip(strings, visible_widths)] @@ -1239,7 +1251,10 @@ def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True): if is_a_colored_number: raw_val = _strip_ansi(val) formatted_val = format(float(raw_val), floatfmt) - return val.replace(raw_val, formatted_val) + if isinstance(val, str): + return val.replace(str(raw_val), formatted_val) + else: + return val.decode().replace(str(raw_val), formatted_val) else: return format(float(val), floatfmt) else: @@ -1252,9 +1267,14 @@ def _align_header( "Pad string header to width chars given known visible_width of the header." if is_multiline: header_lines = re.split(_multiline_codes, header) - padded_lines = [ - _align_header(h, alignment, width, width_fn(h)) for h in header_lines - ] + if width_fn and callable(width_fn): + padded_lines = [ + _align_header(h, alignment, width, width_fn(h)) for h in header_lines + ] + else: + padded_lines = [ + _align_header(h, alignment, width, len(h)) for h in header_lines + ] return "\n".join(padded_lines) # else: not multiline ninvisible = len(header) - visible_width @@ -1298,6 +1318,8 @@ def _prepend_row_index(rows, index): "index must be as long as the number of data rows: " + "len(index)={} len(rows)={}".format(len(index), len(rows)) ) + if not isinstance(index, Iterable): + raise ValueError("Index must be iterable") sans_rows, separating_lines = _remove_separating_lines(rows) new_rows = [] index_iter = iter(index) @@ -1407,14 +1429,19 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"): and hasattr(rows[0], "_fields") ): # namedtuple - headers = list(map(str, rows[0]._fields)) + if all(isinstance(row, tuple) and hasattr(row, "_fields") for row in rows): + headers = list(map(str, rows[0]._fields)) # type: ignore + else: + raise ValueError("All objects in the rows list must be named tuples with the `_fields`` attribute") elif len(rows) > 0 and hasattr(rows[0], "keys") and hasattr(rows[0], "values"): # dict-like object uniq_keys = set() # implements hashed lookup keys = [] # storage for set if headers == "firstrow": firstdict = rows[0] if len(rows) > 0 else {} - keys.extend(firstdict.keys()) + if type(tabular_data) != dict and not hasattr(tabular_data, 'keys'): + raise TypeError("Expected a dictionary-like object, got a {}".format(type(tabular_data))) + keys.extend(firstdict.keys()) # type: ignore uniq_keys.update(keys) rows = rows[1:] for row in rows: @@ -1431,7 +1458,7 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"): headers = list(map(str, headers)) elif headers == "firstrow": if len(rows) > 0: - headers = [firstdict.get(k, k) for k in keys] + headers = [firstdict.get(k, k) for k in keys] # type: ignore headers = list(map(str, headers)) else: headers = [] @@ -1507,7 +1534,7 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"): return rows, headers -def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True): +def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses: List[bool] = [True]): if len(list_of_lists): num_cols = len(list_of_lists[0]) else: @@ -1529,7 +1556,7 @@ def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True): # Any future custom formatting of types (such as datetimes) # may need to be more explicit than just `str` of the object casted_cell = ( - str(cell) if _isnumber(cell) else _type(cell, numparse)(cell) + str(cell) if _isnumber(cell) else str(cell) ) wrapped = [ "\n".join(wrapper.wrap(line)) @@ -2240,7 +2267,7 @@ def _expand_numparse(disable_numparse, column_count): return [not disable_numparse] * column_count -def _expand_iterable(original, num_desired, default): +def _expand_iterable(original: Union[Iterable, object], num_desired: int, default): """ Expands the `original` argument to return a return a list of length `num_desired`. If `original` is shorter than `num_desired`, it will @@ -2248,7 +2275,10 @@ def _expand_iterable(original, num_desired, default): If `original` is not a list to begin with (i.e. scalar value) a list of length `num_desired` completely populated with `default will be returned """ - if isinstance(original, Iterable) and not isinstance(original, str): + if isinstance(original, str): + return [default] * num_desired + if isinstance(original, Iterable) and hasattr(original, "__len__"): + original = list(original) return original + [default] * (num_desired - len(original)) else: return [default] * num_desired @@ -2649,7 +2679,7 @@ def _main(): import sys import textwrap - usage = textwrap.dedent(_main.__doc__) + usage: str = textwrap.dedent(_main.__doc__) if _main.__doc__ else "" try: opts, args = getopt.getopt( sys.argv[1:], @@ -2706,17 +2736,24 @@ def _main(): colalign=colalign, ) else: - with open(f) as fobj: - _pprint_file( - fobj, - headers=headers, - tablefmt=tablefmt, - sep=sep, - floatfmt=floatfmt, - intfmt=intfmt, - file=out, - colalign=colalign, - ) + if isinstance(f, io.TextIOBase): + fobj = io.StringIO(f.read()) + + elif isinstance(f, (str, bytes)): + with open(f) as fobj: + _pprint_file( + fobj, + headers=headers, + tablefmt=tablefmt, + sep=sep, + floatfmt=floatfmt, + intfmt=intfmt, + file=out, + colalign=colalign, + ) + + else: + raise TypeError(f"Unsupported file type: {type(f)}") def _pprint_file(fobject, headers, tablefmt, sep, floatfmt, intfmt, file, colalign): From 5e47a21ce0f57fac932f6a697127420d2cd5abba Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Sun, 5 Feb 2023 16:56:28 +0530 Subject: [PATCH 02/22] ignore pipfile --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 0495ac79..ac91325d 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,4 @@ website-build/ ## Unit test / coverage reports .coverage .tox +Pipfile From 2ba467ffabe17794a7c6bd3ed6ab3dda40420550 Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Sun, 5 Feb 2023 16:56:42 +0530 Subject: [PATCH 03/22] pretty little type hints --- tabulate/__init__.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tabulate/__init__.py b/tabulate/__init__.py index ca1e3e7d..1d4a23ba 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -14,7 +14,6 @@ from typing import ( Union, List, - Optional, ) try: @@ -35,25 +34,25 @@ def _is_file(f): # minimum extra space in headers -MIN_PADDING = 2 +MIN_PADDING: int = 2 # Whether or not to preserve leading/trailing whitespace in data. -PRESERVE_WHITESPACE = False +PRESERVE_WHITESPACE: bool = False -_DEFAULT_FLOATFMT = "g" -_DEFAULT_INTFMT = "" -_DEFAULT_MISSINGVAL = "" +_DEFAULT_FLOATFMT: str = "g" +_DEFAULT_INTFMT: str = "" +_DEFAULT_MISSINGVAL: str = "" # default align will be overwritten by "left", "center" or "decimal" # depending on the formatter -_DEFAULT_ALIGN = "default" +_DEFAULT_ALIGN: str = "default" # if True, enable wide-character (CJK) support -WIDE_CHARS_MODE = wcwidth is not None +WIDE_CHARS_MODE: bool = wcwidth is not None # Constant that can be used as part of passed rows to generate a separating line # It is purposely an unprintable character, very unlikely to be used in a table -SEPARATING_LINE = "\001" +SEPARATING_LINE: str = "\001" Line = namedtuple("Line", ["begin", "hline", "sep", "end"]) From 12fb2a38e8e6d153972bd92e7fd66fe7a885d1e2 Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Sun, 5 Feb 2023 16:57:21 +0530 Subject: [PATCH 04/22] fromat: black --- tabulate/__init__.py | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/tabulate/__init__.py b/tabulate/__init__.py index 1d4a23ba..1759a477 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -1053,7 +1053,7 @@ def _choose_width_fn(has_invisible, enable_widechars, is_multiline): if has_invisible: line_width_fn = _visible_width elif enable_widechars: # optional wide-character support if available - line_width_fn = wcwidth.wcswidth # type: ignore + line_width_fn = wcwidth.wcswidth # type: ignore else: line_width_fn = len if is_multiline: @@ -1093,7 +1093,7 @@ def _align_column_choose_width_fn(has_invisible, enable_widechars, is_multiline) if has_invisible: line_width_fn = _visible_width elif enable_widechars: # optional wide-character support if available - line_width_fn = wcwidth.wcswidth # type: ignore + line_width_fn = wcwidth.wcswidth # type: ignore else: line_width_fn = len if is_multiline: @@ -1146,7 +1146,13 @@ def _align_column( # enable wide-character width corrections s_lens = [[len(s) for s in re.split("[\r\n]", ms)] for ms in strings] visible_widths = [ - [maxwidth - (w - l) for w, l in zip(mw if isinstance(mw, list) else [mw], ml if isinstance(ml, list) else [ml])] + [ + maxwidth - (w - l) + for w, l in zip( + mw if isinstance(mw, list) else [mw], + ml if isinstance(ml, list) else [ml], + ) + ] for mw, ml in zip(s_widths, s_lens) ] # wcswidth and _visible_width don't count invisible characters; @@ -1429,18 +1435,24 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"): ): # namedtuple if all(isinstance(row, tuple) and hasattr(row, "_fields") for row in rows): - headers = list(map(str, rows[0]._fields)) # type: ignore + headers = list(map(str, rows[0]._fields)) # type: ignore else: - raise ValueError("All objects in the rows list must be named tuples with the `_fields`` attribute") + raise ValueError( + "All objects in the rows list must be named tuples with the `_fields`` attribute" + ) elif len(rows) > 0 and hasattr(rows[0], "keys") and hasattr(rows[0], "values"): # dict-like object uniq_keys = set() # implements hashed lookup keys = [] # storage for set if headers == "firstrow": firstdict = rows[0] if len(rows) > 0 else {} - if type(tabular_data) != dict and not hasattr(tabular_data, 'keys'): - raise TypeError("Expected a dictionary-like object, got a {}".format(type(tabular_data))) - keys.extend(firstdict.keys()) # type: ignore + if type(tabular_data) != dict and not hasattr(tabular_data, "keys"): + raise TypeError( + "Expected a dictionary-like object, got a {}".format( + type(tabular_data) + ) + ) + keys.extend(firstdict.keys()) # type: ignore uniq_keys.update(keys) rows = rows[1:] for row in rows: @@ -1457,7 +1469,7 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"): headers = list(map(str, headers)) elif headers == "firstrow": if len(rows) > 0: - headers = [firstdict.get(k, k) for k in keys] # type: ignore + headers = [firstdict.get(k, k) for k in keys] # type: ignore headers = list(map(str, headers)) else: headers = [] @@ -1554,9 +1566,7 @@ def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses: List[bool] = [T # Cast based on our internal type handling # Any future custom formatting of types (such as datetimes) # may need to be more explicit than just `str` of the object - casted_cell = ( - str(cell) if _isnumber(cell) else str(cell) - ) + casted_cell = str(cell) if _isnumber(cell) else str(cell) wrapped = [ "\n".join(wrapper.wrap(line)) for line in casted_cell.splitlines() @@ -2566,7 +2576,6 @@ def _wrap_chunks(self, chunks): chunks.reverse() while chunks: - # Start the list of chunks that will make up the current line. # cur_len is just the length of all the chunks in cur_line. cur_line = [] @@ -2719,7 +2728,7 @@ def _main(): print(usage) sys.exit(0) files = [sys.stdin] if not args else args - with (sys.stdout if outfile == "-" else open(outfile, "w")) as out: + with sys.stdout if outfile == "-" else open(outfile, "w") as out: for f in files: if f == "-": f = sys.stdin @@ -2737,7 +2746,7 @@ def _main(): else: if isinstance(f, io.TextIOBase): fobj = io.StringIO(f.read()) - + elif isinstance(f, (str, bytes)): with open(f) as fobj: _pprint_file( @@ -2750,7 +2759,7 @@ def _main(): file=out, colalign=colalign, ) - + else: raise TypeError(f"Unsupported file type: {type(f)}") From 31430f9444aece9f91f148c1660683f088460fc3 Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Mon, 6 Feb 2023 04:16:32 +0530 Subject: [PATCH 05/22] feat: add typings --- tabulate/__init__.py | 492 ++++++++++++++++++++++++++++++++----------- 1 file changed, 371 insertions(+), 121 deletions(-) diff --git a/tabulate/__init__.py b/tabulate/__init__.py index 1759a477..a2e9a242 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -12,9 +12,20 @@ import dataclasses from typing import ( + Callable, + Tuple, Union, + TypeVar, List, + Dict, + Pattern, + Type, + Any, + Optional, + Sized, + Iterable, ) +from typing_extensions import Literal, TypedDict, Protocol try: import wcwidth # optional wide-character (CJK) support # type: ignore @@ -27,6 +38,7 @@ def _is_file(f): __all__ = ["tabulate", "tabulate_formats", "simple_separated_format"] + try: from .version import version as __version__ # noqa: F401 # type: ignore except ImportError: @@ -54,10 +66,49 @@ def _is_file(f): # It is purposely an unprintable character, very unlikely to be used in a table SEPARATING_LINE: str = "\001" -Line = namedtuple("Line", ["begin", "hline", "sep", "end"]) +T_ALIGNS = List[str] +T_COLWIDTHS = List[Union[int, str]] + +AST = TypeVar("AST", bound="AlignableString") + +ALIGNMENT = TypedDict( + "ALIGNMENT", { + "left": str, + "right": str, + "center": str, + "decimal": str, + } +) + +ColWidths = TypedDict( + "ColWidths", { + "colwidth": int, + } +) + +ColAligns = TypedDict( + "ColAlign", { + "colalign": str, + } +) + +Line = namedtuple( + "Line", [ + "begin", + "hline", + "sep", + "end", + ] +) -DataRow = namedtuple("DataRow", ["begin", "sep", "end"]) +DataRow = namedtuple( + "DataRow", [ + "begin", + "sep", + "end" + ] +) # A table structure is supposed to be: @@ -106,7 +157,40 @@ def _is_file(f): ) -def _is_separating_line(row): +class TableOptions(TypedDict): + colwidths_ignore: T_COLWIDTHS + colaligns_ignore: T_ALIGNS + + +class EscapeString(Protocol): + def __str__(self) -> str: + ... + + +class Convertible(Protocol): + def __call__(self, string: Union[str, bytes]) -> Any: + ... + + +class AlignableString(Protocol): + def __str__(self) -> str: + ... + + +class IndexType(Protocol): + def __len__(self) -> int: + ... + + def __iter__(self) -> Iterable: + ... + + +class HasLen(Protocol): + def __len__(self) -> int: + ... + + +def _is_separating_line(row: Union[list, str]) -> Literal[True, False]: row_type = type(row) is_sl = (row_type == list or row_type == str) and ( (len(row) >= 1 and row[0] == SEPARATING_LINE) @@ -115,7 +199,10 @@ def _is_separating_line(row): return is_sl -def _pipe_segment_with_colons(align, colwidth): +def _pipe_segment_with_colons( + align: str, + colwidth: int +) -> str: """Return a segment of a horizontal line with optional colons which indicate column's alignment (as in `pipe` output format).""" w = colwidth @@ -129,7 +216,10 @@ def _pipe_segment_with_colons(align, colwidth): return "-" * w -def _pipe_line_with_colons(colwidths, colaligns): +def _pipe_line_with_colons( + colwidths: List[int], + colaligns: List[str] +) -> str: """Return a horizontal line with optional colons to indicate column's alignment (as in `pipe` output format).""" if not colaligns: # e.g. printing an empty data frame (github issue #15) @@ -138,8 +228,13 @@ def _pipe_line_with_colons(colwidths, colaligns): return "|" + "|".join(segments) + "|" -def _mediawiki_row_with_attrs(separator, cell_values, colwidths, colaligns): - alignment = { +def _mediawiki_row_with_attrs( + separator: str, + cell_values: List[str], + colwidths: List[int], + colaligns: List[Literal["left", "right", "center", "decimal"]], +) -> str: + alignment: ALIGNMENT = { "left": "", "right": 'style="text-align: right;"| ', "center": 'style="text-align: center;"| ', @@ -154,20 +249,37 @@ def _mediawiki_row_with_attrs(separator, cell_values, colwidths, colaligns): return (separator + colsep.join(values_with_attrs)).rstrip() -def _textile_row_with_attrs(cell_values, colwidths, colaligns): +def _textile_row_with_attrs( + cell_values: List[str], + colwidths: List[int], + colaligns: List[ + Union[ + Literal["left"], + Literal["right"], + Literal["center"], + Literal["decimal"], + ] + ], +) -> str: cell_values[0] += " " - alignment = {"left": "<.", "right": ">.", "center": "=.", "decimal": ">."} + alignment: ALIGNMENT = {"left": "<.", "right": ">.", "center": "=.", "decimal": ">."} values = (alignment.get(a, "") + v for a, v in zip(colaligns, cell_values)) return "|" + "|".join(values) + "|" -def _html_begin_table_without_header(colwidths_ignore, colaligns_ignore): +def _html_begin_table_without_header(options: TableOptions) -> str: # this table header will be suppressed if there is a header row return "\n" -def _html_row_with_attrs(celltag, unsafe, cell_values, colwidths, colaligns): - alignment = { +def _html_row_with_attrs( + celltag: str, + unsafe: bool, + cell_values: List[str], + colwidths: List[int], + colaligns: List[str], +) -> str: + alignment: ALIGNMENT = { "left": "", "right": ' style="text-align: right;"', "center": ' style="text-align: center;"', @@ -189,8 +301,14 @@ def _html_row_with_attrs(celltag, unsafe, cell_values, colwidths, colaligns): return rowhtml -def _moin_row_with_attrs(celltag, cell_values, colwidths, colaligns, header=""): - alignment = { +def _moin_row_with_attrs( + celltag: str, + cell_values: List[str], + colwidths: List[int], + colaligns: List[Literal["left", "right", "center", "decimal"]], + header: str = "", +) -> str: + alignment: ALIGNMENT = { "left": "", "right": '', "center": '', @@ -203,7 +321,12 @@ def _moin_row_with_attrs(celltag, cell_values, colwidths, colaligns, header=""): return "".join(values_with_attrs) + "||" -def _latex_line_begin_tabular(colwidths, colaligns, booktabs=False, longtable=False): +def _latex_line_begin_tabular( + colwidths: List[int], + colaligns: List[Literal["left", "right", "center", "decimal"]], + booktabs: bool = False, + longtable: bool = False, +) -> str: alignment = {"left": "l", "right": "r", "center": "c", "decimal": "r"} tabular_columns_fmt = "".join([alignment.get(a, "l") for a in colaligns]) return "\n".join( @@ -216,13 +339,13 @@ def _latex_line_begin_tabular(colwidths, colaligns, booktabs=False, longtable=Fa ) -def _asciidoc_row(is_header, *args): +def _asciidoc_row(is_header: bool, *args: Any): """handle header and data rows for asciidoc format""" - def make_header_line(is_header, colwidths, colaligns): + def make_header_line(is_header: bool, colwidths: List[int], colaligns: List[str]): # generate the column specifiers - alignment = {"left": "<", "right": ">", "center": "^", "decimal": ">"} + alignment: Dict[str, str] = {"left": "<", "right": ">", "center": "^", "decimal": ">"} # use the column widths generated by tabulate for the asciidoc column width specifiers asciidoc_alignments = zip( colwidths, [alignment[colalign] for colalign in colaligns] @@ -269,7 +392,7 @@ def make_header_line(is_header, colwidths, colaligns): ) -LATEX_ESCAPE_RULES = { +LATEX_ESCAPE_RULES: Dict[str, str] = { r"&": r"\&", r"%": r"\%", r"$": r"\$", @@ -285,7 +408,12 @@ def make_header_line(is_header, colwidths, colaligns): } -def _latex_row(cell_values, colwidths, colaligns, escrules=LATEX_ESCAPE_RULES): +def _latex_row( + cell_values: List[str], + colwidths: List[int], + colaligns: List[Literal['left', 'center', 'right']], + escrules: Dict[str, str] = LATEX_ESCAPE_RULES +): def escape_char(c): return escrules.get(c, c) @@ -294,8 +422,11 @@ def escape_char(c): return _build_simple_row(escaped_values, rowfmt) -def _rst_escape_first_column(rows, headers): - def escape_empty(val): +def _rst_escape_first_column( + rows: Union[List[List[Union[str, bytes]]], Any], + headers: List[Union[str, bytes]], +) -> Tuple[List[List[Union[str, bytes, Literal[".."]]]], List[Union[str, bytes, Literal[".."]]]]: + def escape_empty(val: Union[str, bytes]) -> Union[str, bytes, Literal[".."]]: if isinstance(val, (str, bytes)) and not val.strip(): return ".." else: @@ -313,7 +444,7 @@ def escape_empty(val): return new_rows, new_headers -_table_formats = { +_table_formats: Dict[str, TableFormat] = { "simple": TableFormat( lineabove=Line("", "-", " ", ""), linebelowheader=Line("", "-", " ", ""), @@ -682,12 +813,12 @@ def escape_empty(val): } -tabulate_formats = list(sorted(_table_formats.keys())) +tabulate_formats: List[str] = list(sorted(_table_formats.keys())) # The table formats for which multiline cells will be folded into subsequent # table rows. The key is the original format specified at the API. The value is # the format that will be used to represent the original format. -multiline_formats = { +multiline_formats: Dict[str, str] = { "plain": "plain", "simple": "simple", "grid": "grid", @@ -723,8 +854,8 @@ def escape_empty(val): # - tsv: TBD # - textile: Replace \n with
(must be well-formed XML) -_multiline_codes = re.compile(r"\r|\n|\r\n") -_multiline_codes_bytes = re.compile(b"\r|\n|\r\n") +_multiline_codes: Pattern[str] = re.compile(r"\r|\n|\r\n") +_multiline_codes_bytes: Pattern[bytes] = re.compile(b"\r|\n|\r\n") # Handle ANSI escape sequences for both control sequence introducer (CSI) and # operating system command (OSC). Both of these begin with 0x1b (or octal 033), @@ -749,12 +880,12 @@ def escape_empty(val): # params: 0..n optional key value pairs separated by ':' (e.g. foo=bar:baz=qux:abc=123) # URI: the actual URI with protocol scheme (e.g. https://, file://, ftp://) # ST: ESC followed by the '\' character (0x5c) -_esc = r"\x1b" -_csi = rf"{_esc}\[" -_osc = rf"{_esc}\]" -_st = rf"{_esc}\\" +_esc: str = r"\x1b" +_csi: str = rf"{_esc}\[" +_osc: str = rf"{_esc}\]" +_st: str = rf"{_esc}\\" -_ansi_escape_pat = rf""" +_ansi_escape_pat: str = rf""" ( # terminal colors, etc {_csi} # CSI @@ -772,16 +903,16 @@ def escape_empty(val): {_osc}8;;{_st} # "closing" OSC sequence ) """ -_ansi_codes = re.compile(_ansi_escape_pat, re.VERBOSE) -_ansi_codes_bytes = re.compile(_ansi_escape_pat.encode("utf8"), re.VERBOSE) -_ansi_color_reset_code = "\033[0m" +_ansi_codes: Pattern[str] = re.compile(_ansi_escape_pat, re.VERBOSE) +_ansi_codes_bytes: Pattern[bytes] = re.compile(_ansi_escape_pat.encode("utf8"), re.VERBOSE) +_ansi_color_reset_code: str = "\033[0m" -_float_with_thousands_separators = re.compile( +_float_with_thousands_separators: Pattern[str] = re.compile( r"^(([+-]?[0-9]{1,3})(?:,([0-9]{3}))*)?(?(1)\.[0-9]*|\.[0-9]+)?$" ) -def simple_separated_format(separator): +def simple_separated_format(separator: str): """Construct a simple TableFormat with columns separated by a separator. >>> tsv = simple_separated_format("\\t") ; \ @@ -834,7 +965,7 @@ def _isnumber_with_thousands_separator(string): return bool(re.match(_float_with_thousands_separators, string)) -def _isconvertible(conv, string): +def _isconvertible(conv: type, string: Any) -> bool: try: conv(string) return True @@ -842,7 +973,7 @@ def _isconvertible(conv, string): return False -def _isnumber(string): +def _isnumber(string: Any) -> bool: """ >>> _isnumber("123.45") True @@ -864,7 +995,7 @@ def _isnumber(string): return True -def _isint(string, inttype=int): +def _isint(string: Any, inttype: Type[int] = int) -> bool: """ >>> _isint("123") True @@ -883,7 +1014,7 @@ def _isint(string, inttype=int): ) -def _isbool(string): +def _isbool(string: Any) -> bool: """ >>> _isbool(True) True @@ -897,7 +1028,11 @@ def _isbool(string): ) -def _type(string, has_invisible=True, numparse=True): +def _type( + string: Any, + has_invisible: bool = True, + numparse: bool = True +) -> Any: """The least generic type (type(None), int, float, str, unicode). >>> _type(None) is type(None) @@ -932,7 +1067,7 @@ def _type(string, has_invisible=True, numparse=True): return str -def _afterpoint(string): +def _afterpoint(string: Any) -> Union[int, Literal[-1]]: """Symbols after a decimal point, -1 if the string lacks the decimal point. >>> _afterpoint("123.45") @@ -961,7 +1096,7 @@ def _afterpoint(string): return -1 # not a number -def _padleft(width, s): +def _padleft(width: int, s: Any): """Flush right. >>> _padleft(6, '\u044f\u0439\u0446\u0430') == ' \u044f\u0439\u0446\u0430' @@ -972,7 +1107,7 @@ def _padleft(width, s): return fmt.format(s) -def _padright(width, s): +def _padright(width: int, s: Any): """Flush left. >>> _padright(6, '\u044f\u0439\u0446\u0430') == '\u044f\u0439\u0446\u0430 ' @@ -983,7 +1118,7 @@ def _padright(width, s): return fmt.format(s) -def _padboth(width, s): +def _padboth(width: int, s: Any): """Center string. >>> _padboth(6, '\u044f\u0439\u0446\u0430') == ' \u044f\u0439\u0446\u0430 ' @@ -994,11 +1129,11 @@ def _padboth(width, s): return fmt.format(s) -def _padnone(ignore_width, s): +def _padnone(ignore_width: int, s: Any): return s -def _strip_ansi(s): +def _strip_ansi(s: Union[str, bytes]) -> Union[str, bytes]: r"""Remove ANSI escape sequences, both CSI (color codes, etc) and OSC hyperlinks. CSI sequences are simply removed from the output, while OSC hyperlinks are replaced @@ -1018,7 +1153,7 @@ def _strip_ansi(s): return _ansi_codes_bytes.sub(b"\4", s) -def _visible_width(s): +def _visible_width(s: Any) -> int: """Visible width of a printed string. ANSI color codes are removed. >>> _visible_width('\x1b[31mhello\x1b[0m'), _visible_width("world") @@ -1036,19 +1171,26 @@ def _visible_width(s): return len_fn(str(s)) -def _is_multiline(s): +def _is_multiline(s: Union[str, bytes]) -> bool: if isinstance(s, str): return bool(re.search(_multiline_codes, s)) else: # a bytestring return bool(re.search(_multiline_codes_bytes, s)) -def _multiline_width(multiline_s, line_width_fn=len): +def _multiline_width( + multiline_s: str, + line_width_fn: Callable[[str], int] = len +) -> int: """Visible width of a potentially multiline content.""" return max(map(line_width_fn, re.split("[\r\n]", multiline_s))) -def _choose_width_fn(has_invisible, enable_widechars, is_multiline): +def _choose_width_fn( + has_invisible: bool, + enable_widechars: bool, + is_multiline: bool +): """Return a function to calculate visible cell width.""" if has_invisible: line_width_fn = _visible_width @@ -1063,7 +1205,11 @@ def _choose_width_fn(has_invisible, enable_widechars, is_multiline): return width_fn -def _align_column_choose_padfn(strings, alignment, has_invisible): +def _align_column_choose_padfn( + strings: List[str], + alignment: str, + has_invisible: bool +) -> Tuple[List[str], Callable]: if alignment == "right": if not PRESERVE_WHITESPACE: strings = [s.strip() for s in strings] @@ -1089,7 +1235,11 @@ def _align_column_choose_padfn(strings, alignment, has_invisible): return strings, padfn -def _align_column_choose_width_fn(has_invisible, enable_widechars, is_multiline): +def _align_column_choose_width_fn( + has_invisible: bool, + enable_widechars: bool, + is_multiline: bool +): if has_invisible: line_width_fn = _visible_width elif enable_widechars: # optional wide-character support if available @@ -1103,12 +1253,15 @@ def _align_column_choose_width_fn(has_invisible, enable_widechars, is_multiline) return width_fn -def _align_column_multiline_width(multiline_s, line_width_fn=len): +def _align_column_multiline_width( + multiline_s: str, + line_width_fn: Callable[[str], int] = len +) -> List[int]: """Visible width of a potentially multiline content.""" return list(map(line_width_fn, re.split("[\r\n]", multiline_s))) -def _flat_list(nested_list): +def _flat_list(nested_list: List[Union[List[int], int]]) -> List[int]: ret = [] for item in nested_list: if isinstance(item, list): @@ -1120,15 +1273,15 @@ def _flat_list(nested_list): def _align_column( - strings, - alignment, - minwidth=0, - has_invisible=True, - enable_widechars=False, - is_multiline=False, + strings: List[Union[AST, Any]], + alignment: str, + minwidth: int = 0, + has_invisible: bool = True, + enable_widechars: bool = False, + is_multiline: bool = False, ): """[string] -> [padded_string]""" - strings, padfn = _align_column_choose_padfn(strings, alignment, has_invisible) + strings, padfn = _align_column_choose_padfn(strings, alignment, has_invisible) # type: ignore width_fn = _align_column_choose_width_fn( has_invisible, enable_widechars, is_multiline ) @@ -1180,8 +1333,8 @@ def _align_column( return padded_strings -def _more_generic(type1, type2): - types = { +def _more_generic(type1: Type, type2: Type) -> Type: + types: Dict[type, int] = { type(None): 0, bool: 1, int: 2, @@ -1189,7 +1342,7 @@ def _more_generic(type1, type2): bytes: 4, str: 5, } - invtypes = { + invtypes: Dict[int, type] = { 5: str, 4: bytes, 3: float, @@ -1201,7 +1354,11 @@ def _more_generic(type1, type2): return invtypes[moregeneric] -def _column_type(strings, has_invisible=True, numparse=True): +def _column_type( + strings: Any, + has_invisible: bool = True, + numparse: bool = True +) -> Type: """The least generic type all column values are convertible to. >>> _column_type([True, False]) is bool @@ -1227,7 +1384,14 @@ def _column_type(strings, has_invisible=True, numparse=True): return reduce(_more_generic, types, bool) -def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True): +def _format( + val: Any, + valtype: type, + floatfmt: str, + intfmt: str, + missingval: str = "", + has_invisible: bool = True +) -> str: """Format a value according to its type. Unicode is supported: @@ -1267,8 +1431,13 @@ def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True): def _align_header( - header, alignment, width, visible_width, is_multiline=False, width_fn=None -): + header: Any, + alignment: Any, + width: int, + visible_width: int, + is_multiline: Optional[bool] = False, + width_fn: Optional[Callable[[str], int]] = None, +) -> str: "Pad string header to width chars given known visible_width of the header." if is_multiline: header_lines = re.split(_multiline_codes, header) @@ -1294,7 +1463,7 @@ def _align_header( return _padleft(width, header) -def _remove_separating_lines(rows): +def _remove_separating_lines(rows: Any) -> Tuple[List[str], Union[List[int], None]]: if type(rows) == list: separating_lines = [] sans_rows = [] @@ -1308,13 +1477,19 @@ def _remove_separating_lines(rows): return rows, None -def _reinsert_separating_lines(rows, separating_lines): +def _reinsert_separating_lines( + rows: Any, + separating_lines: Optional[List[int]], +) -> None: if separating_lines: for index in separating_lines: rows.insert(index, SEPARATING_LINE) -def _prepend_row_index(rows, index): +def _prepend_row_index( + rows: Any, + index: Union[IndexType, Any] +) -> List[List[Union[int, float, str]]]: """Add a left-most index column.""" if index is None or index is False: return rows @@ -1336,7 +1511,7 @@ def _prepend_row_index(rows, index): return rows -def _bool(val): +def _bool(val: Any) -> bool: "A wrapper around standard bool() which doesn't throw on NumPy arrays" try: return bool(val) @@ -1344,7 +1519,11 @@ def _bool(val): return False -def _normalize_tabular_data(tabular_data, headers, showindex="default"): +def _normalize_tabular_data( + tabular_data: Any, + headers: Any, + showindex: Union[str, int, bool] = "default" +) -> Any: """Transform a supported data type to a list of lists, and a list of headers. Supported tabular data types: @@ -1545,7 +1724,11 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"): return rows, headers -def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses: List[bool] = [True]): +def _wrap_text_to_colwidths( + list_of_lists: Any, + colwidths: Any, + numparses: List[bool] = [True] +): if len(list_of_lists): num_cols = len(list_of_lists[0]) else: @@ -1580,7 +1763,11 @@ def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses: List[bool] = [T return result -def _to_str(s, encoding="utf8", errors="ignore"): +def _to_str( + s: Union[bytes, Any], + encoding: str = "utf8", + errors: str = "ignore", +) -> str: """ A type safe wrapper for converting a bytestring to str. This is essentially just a wrapper around .decode() intended for use with things like map(), but with some @@ -1606,21 +1793,21 @@ def _to_str(s, encoding="utf8", errors="ignore"): def tabulate( - tabular_data, - headers=(), - tablefmt="simple", - floatfmt=_DEFAULT_FLOATFMT, - intfmt=_DEFAULT_INTFMT, - numalign=_DEFAULT_ALIGN, - stralign=_DEFAULT_ALIGN, - missingval=_DEFAULT_MISSINGVAL, - showindex="default", - disable_numparse=False, - colalign=None, - maxcolwidths=None, - rowalign=None, - maxheadercolwidths=None, -): + tabular_data: Any, + headers: Any = (), + tablefmt: Any = "simple", + floatfmt: str = _DEFAULT_FLOATFMT, + intfmt: str = _DEFAULT_INTFMT, + numalign: str = _DEFAULT_ALIGN, + stralign: str = _DEFAULT_ALIGN, + missingval: str = _DEFAULT_MISSINGVAL, + showindex: Literal["default", True, False] = "default", + disable_numparse: bool = False, + colalign: Optional[List[str]] = None, + maxcolwidths: Any = None, + rowalign: Optional[List[str]] = None, + maxheadercolwidths: Any = None, +) -> Any: """Format a fixed width table for pretty printing. >>> print(tabulate([[1, 2.34], [-56, "8.999"], ["2", "10001"]])) @@ -2259,7 +2446,10 @@ def tabulate( ) -def _expand_numparse(disable_numparse, column_count): +def _expand_numparse( + disable_numparse: Union[bool, Iterable[int]], + column_count: int +) -> List[bool]: """ Return a list of bools of length `column_count` which indicates whether number parsing should be used on each column. @@ -2293,7 +2483,7 @@ def _expand_iterable(original: Union[Iterable, object], num_desired: int, defaul return [default] * num_desired -def _pad_row(cells, padding): +def _pad_row(cells: Any, padding: int) -> List[str]: if cells: pad = " " * padding padded_cells = [pad + cell + pad for cell in cells] @@ -2302,13 +2492,21 @@ def _pad_row(cells, padding): return cells -def _build_simple_row(padded_cells, rowfmt): +def _build_simple_row( + padded_cells: List[str], + rowfmt: Tuple[str, str, str], +) -> str: "Format row according to DataRow format without padding." begin, sep, end = rowfmt return (begin + sep.join(padded_cells) + end).rstrip() -def _build_row(padded_cells, colwidths, colaligns, rowfmt): +def _build_row( + padded_cells: List[str], + colwidths: List[int], + colaligns: List[str], + rowfmt: Any, +) -> Union[str, None]: "Return a string which represents a row of data cells." if not rowfmt: return None @@ -2318,13 +2516,25 @@ def _build_row(padded_cells, colwidths, colaligns, rowfmt): return _build_simple_row(padded_cells, rowfmt) -def _append_basic_row(lines, padded_cells, colwidths, colaligns, rowfmt, rowalign=None): +def _append_basic_row( + lines: List, + padded_cells: List[str], + colwidths: Any, + colaligns: Any, + rowfmt: str, + rowalign: Optional[str] = None, +) -> List[str]: # NOTE: rowalign is ignored and exists for api compatibility with _append_multiline_row lines.append(_build_row(padded_cells, colwidths, colaligns, rowfmt)) return lines -def _align_cell_veritically(text_lines, num_lines, column_width, row_alignment): +def _align_cell_veritically( + text_lines: Any, + num_lines: int, + column_width: int, + row_alignment: Union[Literal["bottom", "center", "top"], None] +) -> List[str]: delta_lines = num_lines - len(text_lines) blank = [" " * column_width] if row_alignment == "bottom": @@ -2338,7 +2548,13 @@ def _align_cell_veritically(text_lines, num_lines, column_width, row_alignment): def _append_multiline_row( - lines, padded_multiline_cells, padded_widths, colaligns, rowfmt, pad, rowalign=None + lines: Any, + padded_multiline_cells: List[str], + padded_widths: List[int], + colaligns: List[Literal["left", "center", "right"]], + rowfmt: str, + pad: int, + rowalign: Any = None, ): colwidths = [w - 2 * pad for w in padded_widths] cells_lines = [c.splitlines() for c in padded_multiline_cells] @@ -2359,7 +2575,11 @@ def _append_multiline_row( return lines -def _build_line(colwidths, colaligns, linefmt): +def _build_line( + colwidths: Any, + colaligns: Any, + linefmt: Any, +) -> Optional[str]: "Return a string which represents a horizontal line." if not linefmt: return None @@ -2371,7 +2591,12 @@ def _build_line(colwidths, colaligns, linefmt): return _build_simple_row(cells, (begin, sep, end)) -def _append_line(lines, colwidths, colaligns, linefmt): +def _append_line( + lines: Any, + colwidths: List[int], + colaligns: List[str], + linefmt: Any +) -> str: lines.append(_build_line(colwidths, colaligns, linefmt)) return lines @@ -2380,23 +2605,40 @@ class JupyterHTMLStr(str): """Wrap the string with a _repr_html_ method so that Jupyter displays the HTML table""" - def _repr_html_(self): + def _repr_html_(self) -> str: return self @property - def str(self): + def str(self) -> str: """add a .str property so that the raw string is still accessible""" return self - - -def _format_table(fmt, headers, rows, colwidths, colaligns, is_multiline, rowaligns): + + +class JupyterHTML(Protocol): + def _repr_html(self) -> str: + ... + +def display_in_jupyter(item: Type[JupyterHTML]) -> None: + """Wrapper function for Jupyter display.""" + ... # TODO: add something that'd return the html. + + +def _format_table( + fmt, + headers: Any, + rows: Any, + colwidths: Any, + colaligns: Any, + is_multiline: bool, + rowaligns: Any +): """Produce a plain-text representation of the table.""" - lines = [] + lines: List[str] = [] hidden = fmt.with_header_hide if (headers and fmt.with_header_hide) else [] pad = fmt.padding headerrow = fmt.headerrow - padded_widths = [(w + 2 * pad) for w in colwidths] + padded_widths: List[int] = [(w + 2 * pad) for w in colwidths] if is_multiline: pad_row = lambda row, _: row # noqa do it later, in _append_multiline_row append_row = partial(_append_multiline_row, pad=pad) @@ -2404,8 +2646,8 @@ def _format_table(fmt, headers, rows, colwidths, colaligns, is_multiline, rowali pad_row = _pad_row append_row = _append_basic_row - padded_headers = pad_row(headers, pad) - padded_rows = [pad_row(row, pad) for row in rows] + padded_headers: List[str] = pad_row(headers, pad) + padded_rows: List[List[str]] = [pad_row(row, pad) for row in rows] if fmt.lineabove and "lineabove" not in hidden: _append_line(lines, padded_widths, colaligns, fmt.lineabove) @@ -2468,13 +2710,13 @@ class _CustomTextWrap(textwrap.TextWrapper): and line appending logic. """ - def __init__(self, *args, **kwargs): - self._active_codes = [] - self.max_lines = None # For python2 compatibility + def __init__(self, *args: Any, **kwargs: Any): + self._active_codes: List[str] = [] + self.max_lines: Optional[int] = None # For python2 compatibility textwrap.TextWrapper.__init__(self, *args, **kwargs) @staticmethod - def _len(item): + def _len(item: str) -> Union[int, Literal[0]]: """Custom len that gets console column width for wide and non-wide characters as well as ignores color codes""" stripped = _strip_ansi(item) @@ -2483,7 +2725,7 @@ def _len(item): else: return len(stripped) - def _update_lines(self, lines, new_line): + def _update_lines(self, lines: List[str], new_line: str) -> None: """Adds a new line to the list of lines the text is being wrapped into This function will also track any ANSI color codes in this string as well as add any colors from previous lines order to preserve the same formatting @@ -2510,10 +2752,18 @@ def _update_lines(self, lines, new_line): lines.append(new_line) - def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): - """_handle_long_word(chunks : [string], - cur_line : [string], - cur_len : int, width : int) + def _handle_long_word( + self, + reversed_chunks: List[str], + cur_line: List[str], + cur_len: int, + width: int + ) -> None: + """_handle_long_word( + chunks : [string], + cur_line : [string], + cur_len : int, width : int + ) Handle a chunk of text (most likely a word, not whitespace) that is too long to fit in any line. """ @@ -2548,7 +2798,7 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): # cur_len will be zero, so the next line will be entirely # devoted to the long word that we can't handle right now. - def _wrap_chunks(self, chunks): + def _wrap_chunks(self, chunks: List[str]) -> List[str]: """_wrap_chunks(chunks : [string]) -> [string] Wrap a sequence of text chunks and return a list of lines of length 'self.width' or less. (If 'break_long_words' is false, @@ -2659,7 +2909,7 @@ def _wrap_chunks(self, chunks): return lines -def _main(): +def _main() -> None: """\ Usage: tabulate [options] [FILE ...] From ed292dba47b25838a9fac9e389eb0dec712718e3 Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Mon, 6 Feb 2023 04:18:59 +0530 Subject: [PATCH 06/22] feat: add a new table format (fancy_dottedline) --- tabulate/__init__.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tabulate/__init__.py b/tabulate/__init__.py index a2e9a242..fdd326f5 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -605,6 +605,16 @@ def escape_empty(val: Union[str, bytes]) -> Union[str, bytes, Literal[".."]]: padding=1, with_header_hide=None, ), + "fancy_dottedline": TableFormat( + lineabove=Line("⋅", "⋯", "⋯", "⋅"), + linebelowheader=Line("⋮", "⋯", "⋯", "⋮"), + linebetweenrows=None, + linebelow=Line("⋅", "⋯", "⋯", "⋅"), + headerrow=DataRow("⋮", "⋮", "⋮"), + datarow=DataRow("⋮", "⋮", "⋮"), + padding=1, + with_header_hide=None, + ), "github": TableFormat( lineabove=Line("|", "-", "|", "|"), linebelowheader=Line("|", "-", "|", "|"), From c8561c3d241905202aa244eeb0b5f9b0625521b5 Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Mon, 6 Feb 2023 04:32:09 +0530 Subject: [PATCH 07/22] Update README.md --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index d64b99ab..26a6e612 100644 --- a/README.md +++ b/README.md @@ -400,6 +400,14 @@ corresponds to the `pipe` format without alignment colons: │ bacon │ 0 │ ╘════════╧═══════╛ +`fancy_dottedline` is the same as the `simple_grid` format. + + >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="fancy_dottedline")) + ⋅⋯⋯⋯⋯⋯⋯⋯⋯⋯⋯⋯ + ⋮ spam ⋮ 41.9999 ⋮ + ⋮ eggs ⋮ 451 ⋮ + ⋅⋯⋯⋯⋯⋯⋯⋯⋯⋯⋯⋯ + `presto` is like tables formatted by Presto cli: ```pycon From 9c8db627db1e9f7e22c94779612a676120ca39cd Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Mon, 6 Feb 2023 04:37:00 +0530 Subject: [PATCH 08/22] format: pydev --- tabulate/__init__.py | 282 ++++++++++++++++++++++--------------------- 1 file changed, 143 insertions(+), 139 deletions(-) diff --git a/tabulate/__init__.py b/tabulate/__init__.py index fdd326f5..d2470d84 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -44,7 +44,6 @@ def _is_file(f): except ImportError: pass # running __init__.py as a script, AppVeyor pytests - # minimum extra space in headers MIN_PADDING: int = 2 @@ -58,7 +57,6 @@ def _is_file(f): # depending on the formatter _DEFAULT_ALIGN: str = "default" - # if True, enable wide-character (CJK) support WIDE_CHARS_MODE: bool = wcwidth is not None @@ -94,23 +92,21 @@ def _is_file(f): Line = namedtuple( "Line", [ - "begin", - "hline", - "sep", + "begin", + "hline", + "sep", "end", ] ) - DataRow = namedtuple( "DataRow", [ - "begin", - "sep", + "begin", + "sep", "end" ] ) - # A table structure is supposed to be: # # --- lineabove --------- @@ -163,21 +159,25 @@ class TableOptions(TypedDict): class EscapeString(Protocol): + def __str__(self) -> str: ... class Convertible(Protocol): + def __call__(self, string: Union[str, bytes]) -> Any: ... class AlignableString(Protocol): + def __str__(self) -> str: ... class IndexType(Protocol): + def __len__(self) -> int: ... @@ -186,6 +186,7 @@ def __iter__(self) -> Iterable: class HasLen(Protocol): + def __len__(self) -> int: ... @@ -200,7 +201,7 @@ def _is_separating_line(row: Union[list, str]) -> Literal[True, False]: def _pipe_segment_with_colons( - align: str, + align: str, colwidth: int ) -> str: """Return a segment of a horizontal line with optional colons which @@ -217,7 +218,7 @@ def _pipe_segment_with_colons( def _pipe_line_with_colons( - colwidths: List[int], + colwidths: List[int], colaligns: List[str] ) -> str: """Return a horizontal line with optional colons to indicate column's @@ -229,9 +230,9 @@ def _pipe_line_with_colons( def _mediawiki_row_with_attrs( - separator: str, - cell_values: List[str], - colwidths: List[int], + separator: str, + cell_values: List[str], + colwidths: List[int], colaligns: List[Literal["left", "right", "center", "decimal"]], ) -> str: alignment: ALIGNMENT = { @@ -250,13 +251,13 @@ def _mediawiki_row_with_attrs( def _textile_row_with_attrs( - cell_values: List[str], - colwidths: List[int], + cell_values: List[str], + colwidths: List[int], colaligns: List[ Union[ - Literal["left"], - Literal["right"], - Literal["center"], + Literal["left"], + Literal["right"], + Literal["center"], Literal["decimal"], ] ], @@ -273,10 +274,10 @@ def _html_begin_table_without_header(options: TableOptions) -> str: def _html_row_with_attrs( - celltag: str, - unsafe: bool, - cell_values: List[str], - colwidths: List[int], + celltag: str, + unsafe: bool, + cell_values: List[str], + colwidths: List[int], colaligns: List[str], ) -> str: alignment: ALIGNMENT = { @@ -302,11 +303,11 @@ def _html_row_with_attrs( def _moin_row_with_attrs( - celltag: str, - cell_values: List[str], - colwidths: List[int], - colaligns: List[Literal["left", "right", "center", "decimal"]], - header: str = "", + celltag: str, + cell_values: List[str], + colwidths: List[int], + colaligns: List[Literal["left", "right", "center", "decimal"]], + header: str="", ) -> str: alignment: ALIGNMENT = { "left": "", @@ -322,18 +323,18 @@ def _moin_row_with_attrs( def _latex_line_begin_tabular( - colwidths: List[int], - colaligns: List[Literal["left", "right", "center", "decimal"]], - booktabs: bool = False, - longtable: bool = False, + colwidths: List[int], + colaligns: List[Literal["left", "right", "center", "decimal"]], + booktabs: bool=False, + longtable: bool=False, ) -> str: alignment = {"left": "l", "right": "r", "center": "c", "decimal": "r"} tabular_columns_fmt = "".join([alignment.get(a, "l") for a in colaligns]) return "\n".join( [ ("\\begin{tabular}{" if not longtable else "\\begin{longtable}{") - + tabular_columns_fmt - + "}", + +tabular_columns_fmt + +"}", "\\toprule" if booktabs else "\\hline", ] ) @@ -388,7 +389,7 @@ def make_header_line(is_header: bool, colwidths: List[int], colaligns: List[str] else: raise ValueError( " _asciidoc_row() requires two (colwidths, colaligns) " - + "or three (cell_values, colwidths, colaligns) arguments) " + +"or three (cell_values, colwidths, colaligns) arguments) " ) @@ -409,11 +410,12 @@ def make_header_line(is_header: bool, colwidths: List[int], colaligns: List[str] def _latex_row( - cell_values: List[str], - colwidths: List[int], - colaligns: List[Literal['left', 'center', 'right']], - escrules: Dict[str, str] = LATEX_ESCAPE_RULES + cell_values: List[str], + colwidths: List[int], + colaligns: List[Literal['left', 'center', 'right']], + escrules: Dict[str, str]=LATEX_ESCAPE_RULES ): + def escape_char(c): return escrules.get(c, c) @@ -423,9 +425,10 @@ def escape_char(c): def _rst_escape_first_column( - rows: Union[List[List[Union[str, bytes]]], Any], + rows: Union[List[List[Union[str, bytes]]], Any], headers: List[Union[str, bytes]], ) -> Tuple[List[List[Union[str, bytes, Literal[".."]]]], List[Union[str, bytes, Literal[".."]]]]: + def escape_empty(val: Union[str, bytes]) -> Union[str, bytes, Literal[".."]]: if isinstance(val, (str, bytes)) and not val.strip(): return ".." @@ -822,7 +825,6 @@ def escape_empty(val: Union[str, bytes]) -> Union[str, bytes, Literal[".."]]: ), } - tabulate_formats: List[str] = list(sorted(_table_formats.keys())) # The table formats for which multiline cells will be folded into subsequent @@ -1005,7 +1007,7 @@ def _isnumber(string: Any) -> bool: return True -def _isint(string: Any, inttype: Type[int] = int) -> bool: +def _isint(string: Any, inttype: Type[int]=int) -> bool: """ >>> _isint("123") True @@ -1039,9 +1041,9 @@ def _isbool(string: Any) -> bool: def _type( - string: Any, - has_invisible: bool = True, - numparse: bool = True + string: Any, + has_invisible: bool=True, + numparse: bool=True ) -> Any: """The least generic type (type(None), int, float, str, unicode). @@ -1189,16 +1191,16 @@ def _is_multiline(s: Union[str, bytes]) -> bool: def _multiline_width( - multiline_s: str, - line_width_fn: Callable[[str], int] = len + multiline_s: str, + line_width_fn: Callable[[str], int]=len ) -> int: """Visible width of a potentially multiline content.""" return max(map(line_width_fn, re.split("[\r\n]", multiline_s))) def _choose_width_fn( - has_invisible: bool, - enable_widechars: bool, + has_invisible: bool, + enable_widechars: bool, is_multiline: bool ): """Return a function to calculate visible cell width.""" @@ -1216,8 +1218,8 @@ def _choose_width_fn( def _align_column_choose_padfn( - strings: List[str], - alignment: str, + strings: List[str], + alignment: str, has_invisible: bool ) -> Tuple[List[str], Callable]: if alignment == "right": @@ -1246,8 +1248,8 @@ def _align_column_choose_padfn( def _align_column_choose_width_fn( - has_invisible: bool, - enable_widechars: bool, + has_invisible: bool, + enable_widechars: bool, is_multiline: bool ): if has_invisible: @@ -1264,8 +1266,8 @@ def _align_column_choose_width_fn( def _align_column_multiline_width( - multiline_s: str, - line_width_fn: Callable[[str], int] = len + multiline_s: str, + line_width_fn: Callable[[str], int]=len ) -> List[int]: """Visible width of a potentially multiline content.""" return list(map(line_width_fn, re.split("[\r\n]", multiline_s))) @@ -1285,13 +1287,13 @@ def _flat_list(nested_list: List[Union[List[int], int]]) -> List[int]: def _align_column( strings: List[Union[AST, Any]], alignment: str, - minwidth: int = 0, - has_invisible: bool = True, - enable_widechars: bool = False, - is_multiline: bool = False, + minwidth: int=0, + has_invisible: bool=True, + enable_widechars: bool=False, + is_multiline: bool=False, ): """[string] -> [padded_string]""" - strings, padfn = _align_column_choose_padfn(strings, alignment, has_invisible) # type: ignore + strings, padfn = _align_column_choose_padfn(strings, alignment, has_invisible) # type: ignore width_fn = _align_column_choose_width_fn( has_invisible, enable_widechars, is_multiline ) @@ -1365,9 +1367,9 @@ def _more_generic(type1: Type, type2: Type) -> Type: def _column_type( - strings: Any, - has_invisible: bool = True, - numparse: bool = True + strings: Any, + has_invisible: bool=True, + numparse: bool=True ) -> Type: """The least generic type all column values are convertible to. @@ -1395,12 +1397,12 @@ def _column_type( def _format( - val: Any, - valtype: type, - floatfmt: str, - intfmt: str, - missingval: str = "", - has_invisible: bool = True + val: Any, + valtype: type, + floatfmt: str, + intfmt: str, + missingval: str="", + has_invisible: bool=True ) -> str: """Format a value according to its type. @@ -1441,12 +1443,12 @@ def _format( def _align_header( - header: Any, - alignment: Any, - width: int, - visible_width: int, - is_multiline: Optional[bool] = False, - width_fn: Optional[Callable[[str], int]] = None, + header: Any, + alignment: Any, + width: int, + visible_width: int, + is_multiline: Optional[bool]=False, + width_fn: Optional[Callable[[str], int]]=None, ) -> str: "Pad string header to width chars given known visible_width of the header." if is_multiline: @@ -1473,7 +1475,7 @@ def _align_header( return _padleft(width, header) -def _remove_separating_lines(rows: Any) -> Tuple[List[str], Union[List[int], None]]: +def _remove_separating_lines(rows: Any) -> Tuple[List[str], Union[List[int], None]]: if type(rows) == list: separating_lines = [] sans_rows = [] @@ -1488,7 +1490,7 @@ def _remove_separating_lines(rows: Any) -> Tuple[List[str], Union[List[int], No def _reinsert_separating_lines( - rows: Any, + rows: Any, separating_lines: Optional[List[int]], ) -> None: if separating_lines: @@ -1497,7 +1499,7 @@ def _reinsert_separating_lines( def _prepend_row_index( - rows: Any, + rows: Any, index: Union[IndexType, Any] ) -> List[List[Union[int, float, str]]]: """Add a left-most index column.""" @@ -1506,7 +1508,7 @@ def _prepend_row_index( if isinstance(index, Sized) and len(index) != len(rows): raise ValueError( "index must be as long as the number of data rows: " - + "len(index)={} len(rows)={}".format(len(index), len(rows)) + +"len(index)={} len(rows)={}".format(len(index), len(rows)) ) if not isinstance(index, Iterable): raise ValueError("Index must be iterable") @@ -1530,9 +1532,9 @@ def _bool(val: Any) -> bool: def _normalize_tabular_data( - tabular_data: Any, - headers: Any, - showindex: Union[str, int, bool] = "default" + tabular_data: Any, + headers: Any, + showindex: Union[str, int, bool]="default" ) -> Any: """Transform a supported data type to a list of lists, and a list of headers. @@ -1735,9 +1737,9 @@ def _normalize_tabular_data( def _wrap_text_to_colwidths( - list_of_lists: Any, - colwidths: Any, - numparses: List[bool] = [True] + list_of_lists: Any, + colwidths: Any, + numparses: List[bool]=[True] ): if len(list_of_lists): num_cols = len(list_of_lists[0]) @@ -1774,9 +1776,9 @@ def _wrap_text_to_colwidths( def _to_str( - s: Union[bytes, Any], - encoding: str = "utf8", - errors: str = "ignore", + s: Union[bytes, Any], + encoding: str="utf8", + errors: str="ignore", ) -> str: """ A type safe wrapper for converting a bytestring to str. This is essentially just @@ -1804,19 +1806,19 @@ def _to_str( def tabulate( tabular_data: Any, - headers: Any = (), - tablefmt: Any = "simple", - floatfmt: str = _DEFAULT_FLOATFMT, - intfmt: str = _DEFAULT_INTFMT, - numalign: str = _DEFAULT_ALIGN, - stralign: str = _DEFAULT_ALIGN, - missingval: str = _DEFAULT_MISSINGVAL, - showindex: Literal["default", True, False] = "default", - disable_numparse: bool = False, - colalign: Optional[List[str]] = None, - maxcolwidths: Any = None, - rowalign: Optional[List[str]] = None, - maxheadercolwidths: Any = None, + headers: Any=(), + tablefmt: Any="simple", + floatfmt: str=_DEFAULT_FLOATFMT, + intfmt: str=_DEFAULT_INTFMT, + numalign: str=_DEFAULT_ALIGN, + stralign: str=_DEFAULT_ALIGN, + missingval: str=_DEFAULT_MISSINGVAL, + showindex: Literal["default", True, False]="default", + disable_numparse: bool=False, + colalign: Optional[List[str]]=None, + maxcolwidths: Any=None, + rowalign: Optional[List[str]]=None, + maxheadercolwidths: Any=None, ) -> Any: """Format a fixed width table for pretty printing. @@ -2457,7 +2459,7 @@ def tabulate( def _expand_numparse( - disable_numparse: Union[bool, Iterable[int]], + disable_numparse: Union[bool, Iterable[int]], column_count: int ) -> List[bool]: """ @@ -2503,7 +2505,7 @@ def _pad_row(cells: Any, padding: int) -> List[str]: def _build_simple_row( - padded_cells: List[str], + padded_cells: List[str], rowfmt: Tuple[str, str, str], ) -> str: "Format row according to DataRow format without padding." @@ -2512,9 +2514,9 @@ def _build_simple_row( def _build_row( - padded_cells: List[str], - colwidths: List[int], - colaligns: List[str], + padded_cells: List[str], + colwidths: List[int], + colaligns: List[str], rowfmt: Any, ) -> Union[str, None]: "Return a string which represents a row of data cells." @@ -2527,12 +2529,12 @@ def _build_row( def _append_basic_row( - lines: List, - padded_cells: List[str], - colwidths: Any, - colaligns: Any, - rowfmt: str, - rowalign: Optional[str] = None, + lines: List, + padded_cells: List[str], + colwidths: Any, + colaligns: Any, + rowfmt: str, + rowalign: Optional[str]=None, ) -> List[str]: # NOTE: rowalign is ignored and exists for api compatibility with _append_multiline_row lines.append(_build_row(padded_cells, colwidths, colaligns, rowfmt)) @@ -2540,9 +2542,9 @@ def _append_basic_row( def _align_cell_veritically( - text_lines: Any, - num_lines: int, - column_width: int, + text_lines: Any, + num_lines: int, + column_width: int, row_alignment: Union[Literal["bottom", "center", "top"], None] ) -> List[str]: delta_lines = num_lines - len(text_lines) @@ -2558,13 +2560,13 @@ def _align_cell_veritically( def _append_multiline_row( - lines: Any, - padded_multiline_cells: List[str], - padded_widths: List[int], - colaligns: List[Literal["left", "center", "right"]], - rowfmt: str, - pad: int, - rowalign: Any = None, + lines: Any, + padded_multiline_cells: List[str], + padded_widths: List[int], + colaligns: List[Literal["left", "center", "right"]], + rowfmt: str, + pad: int, + rowalign: Any=None, ): colwidths = [w - 2 * pad for w in padded_widths] cells_lines = [c.splitlines() for c in padded_multiline_cells] @@ -2586,8 +2588,8 @@ def _append_multiline_row( def _build_line( - colwidths: Any, - colaligns: Any, + colwidths: Any, + colaligns: Any, linefmt: Any, ) -> Optional[str]: "Return a string which represents a horizontal line." @@ -2602,9 +2604,9 @@ def _build_line( def _append_line( - lines: Any, - colwidths: List[int], - colaligns: List[str], + lines: Any, + colwidths: List[int], + colaligns: List[str], linefmt: Any ) -> str: lines.append(_build_line(colwidths, colaligns, linefmt)) @@ -2625,21 +2627,23 @@ def str(self) -> str: class JupyterHTML(Protocol): + def _repr_html(self) -> str: ... + def display_in_jupyter(item: Type[JupyterHTML]) -> None: """Wrapper function for Jupyter display.""" - ... # TODO: add something that'd return the html. + ... # TODO: add something that'd return the html. def _format_table( - fmt, - headers: Any, - rows: Any, - colwidths: Any, - colaligns: Any, - is_multiline: bool, + fmt, + headers: Any, + rows: Any, + colwidths: Any, + colaligns: Any, + is_multiline: bool, rowaligns: Any ): """Produce a plain-text representation of the table.""" @@ -2743,7 +2747,7 @@ def _update_lines(self, lines: List[str], new_line: str) -> None: """ code_matches = [x for x in _ansi_codes.finditer(new_line)] color_codes = [ - code.string[code.span()[0] : code.span()[1]] for code in code_matches + code.string[code.span()[0]: code.span()[1]] for code in code_matches ] # Add color codes from earlier in the unwrapped line, and then track any new ones we add. @@ -2763,9 +2767,9 @@ def _update_lines(self, lines: List[str], new_line: str) -> None: lines.append(new_line) def _handle_long_word( - self, - reversed_chunks: List[str], - cur_line: List[str], + self, + reversed_chunks: List[str], + cur_line: List[str], cur_len: int, width: int ) -> None: @@ -2794,7 +2798,7 @@ def _handle_long_word( while self._len(chunk[:i]) <= space_left: i = i + 1 cur_line.append(chunk[: i - 1]) - reversed_chunks[-1] = chunk[i - 1 :] + reversed_chunks[-1] = chunk[i - 1:] # Otherwise, we have to preserve the long word intact. Only add # it to the current line if there's nothing already there -- From c520e42cdb02a9715952575339f3aca26175930c Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Mon, 6 Feb 2023 14:20:19 +0530 Subject: [PATCH 09/22] improve error when passing too many entries --- tabulate/__init__.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tabulate/__init__.py b/tabulate/__init__.py index d2470d84..4a451f59 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -2419,8 +2419,15 @@ def tabulate( aligns = [numalign if ct in [int, float] else stralign for ct in coltypes] if colalign is not None: assert isinstance(colalign, Iterable) + if not isinstance(colalign, (list, tuple)): + raise ValueError("Expected list or tuple, got {}".format(type(colalign))) for idx, align in enumerate(colalign): - aligns[idx] = align + try: + aligns[idx] = align + except IndexError: + # This means the user has passed more values than needed in ``colalign`` + # Drop the extra alignment + pass minwidths = ( [width_fn(h) + min_padding for h in headers] if headers else [0] * len(cols) ) From c8e29ea4af158c1a6c040540456ae13bd9f7bc27 Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Mon, 6 Feb 2023 14:38:11 +0530 Subject: [PATCH 10/22] format: black --- tabulate/__init__.py | 229 +++++++++++++++++++------------------------ 1 file changed, 99 insertions(+), 130 deletions(-) diff --git a/tabulate/__init__.py b/tabulate/__init__.py index 4a451f59..9a8a5e43 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -70,42 +70,40 @@ def _is_file(f): AST = TypeVar("AST", bound="AlignableString") ALIGNMENT = TypedDict( - "ALIGNMENT", { + "ALIGNMENT", + { "left": str, "right": str, "center": str, "decimal": str, - } + }, ) ColWidths = TypedDict( - "ColWidths", { + "ColWidths", + { "colwidth": int, - } + }, ) ColAligns = TypedDict( - "ColAlign", { + "ColAlign", + { "colalign": str, - } + }, ) Line = namedtuple( - "Line", [ + "Line", + [ "begin", "hline", "sep", "end", - ] + ], ) -DataRow = namedtuple( - "DataRow", [ - "begin", - "sep", - "end" - ] -) +DataRow = namedtuple("DataRow", ["begin", "sep", "end"]) # A table structure is supposed to be: # @@ -156,40 +154,35 @@ def _is_file(f): class TableOptions(TypedDict): colwidths_ignore: T_COLWIDTHS colaligns_ignore: T_ALIGNS - - -class EscapeString(Protocol): + +class EscapeString(Protocol): def __str__(self) -> str: ... - - -class Convertible(Protocol): + +class Convertible(Protocol): def __call__(self, string: Union[str, bytes]) -> Any: ... - - -class AlignableString(Protocol): + +class AlignableString(Protocol): def __str__(self) -> str: ... - - -class IndexType(Protocol): + +class IndexType(Protocol): def __len__(self) -> int: ... - + def __iter__(self) -> Iterable: ... - - -class HasLen(Protocol): + +class HasLen(Protocol): def __len__(self) -> int: ... - + def _is_separating_line(row: Union[list, str]) -> Literal[True, False]: row_type = type(row) @@ -200,10 +193,7 @@ def _is_separating_line(row: Union[list, str]) -> Literal[True, False]: return is_sl -def _pipe_segment_with_colons( - align: str, - colwidth: int -) -> str: +def _pipe_segment_with_colons(align: str, colwidth: int) -> str: """Return a segment of a horizontal line with optional colons which indicate column's alignment (as in `pipe` output format).""" w = colwidth @@ -217,10 +207,7 @@ def _pipe_segment_with_colons( return "-" * w -def _pipe_line_with_colons( - colwidths: List[int], - colaligns: List[str] -) -> str: +def _pipe_line_with_colons(colwidths: List[int], colaligns: List[str]) -> str: """Return a horizontal line with optional colons to indicate column's alignment (as in `pipe` output format).""" if not colaligns: # e.g. printing an empty data frame (github issue #15) @@ -263,7 +250,12 @@ def _textile_row_with_attrs( ], ) -> str: cell_values[0] += " " - alignment: ALIGNMENT = {"left": "<.", "right": ">.", "center": "=.", "decimal": ">."} + alignment: ALIGNMENT = { + "left": "<.", + "right": ">.", + "center": "=.", + "decimal": ">.", + } values = (alignment.get(a, "") + v for a, v in zip(colaligns, cell_values)) return "|" + "|".join(values) + "|" @@ -307,7 +299,7 @@ def _moin_row_with_attrs( cell_values: List[str], colwidths: List[int], colaligns: List[Literal["left", "right", "center", "decimal"]], - header: str="", + header: str = "", ) -> str: alignment: ALIGNMENT = { "left": "", @@ -325,16 +317,16 @@ def _moin_row_with_attrs( def _latex_line_begin_tabular( colwidths: List[int], colaligns: List[Literal["left", "right", "center", "decimal"]], - booktabs: bool=False, - longtable: bool=False, + booktabs: bool = False, + longtable: bool = False, ) -> str: alignment = {"left": "l", "right": "r", "center": "c", "decimal": "r"} tabular_columns_fmt = "".join([alignment.get(a, "l") for a in colaligns]) return "\n".join( [ ("\\begin{tabular}{" if not longtable else "\\begin{longtable}{") - +tabular_columns_fmt - +"}", + + tabular_columns_fmt + + "}", "\\toprule" if booktabs else "\\hline", ] ) @@ -346,7 +338,12 @@ def _asciidoc_row(is_header: bool, *args: Any): def make_header_line(is_header: bool, colwidths: List[int], colaligns: List[str]): # generate the column specifiers - alignment: Dict[str, str] = {"left": "<", "right": ">", "center": "^", "decimal": ">"} + alignment: Dict[str, str] = { + "left": "<", + "right": ">", + "center": "^", + "decimal": ">", + } # use the column widths generated by tabulate for the asciidoc column width specifiers asciidoc_alignments = zip( colwidths, [alignment[colalign] for colalign in colaligns] @@ -389,7 +386,7 @@ def make_header_line(is_header: bool, colwidths: List[int], colaligns: List[str] else: raise ValueError( " _asciidoc_row() requires two (colwidths, colaligns) " - +"or three (cell_values, colwidths, colaligns) arguments) " + + "or three (cell_values, colwidths, colaligns) arguments) " ) @@ -412,10 +409,9 @@ def make_header_line(is_header: bool, colwidths: List[int], colaligns: List[str] def _latex_row( cell_values: List[str], colwidths: List[int], - colaligns: List[Literal['left', 'center', 'right']], - escrules: Dict[str, str]=LATEX_ESCAPE_RULES + colaligns: List[Literal["left", "center", "right"]], + escrules: Dict[str, str] = LATEX_ESCAPE_RULES, ): - def escape_char(c): return escrules.get(c, c) @@ -427,8 +423,9 @@ def escape_char(c): def _rst_escape_first_column( rows: Union[List[List[Union[str, bytes]]], Any], headers: List[Union[str, bytes]], -) -> Tuple[List[List[Union[str, bytes, Literal[".."]]]], List[Union[str, bytes, Literal[".."]]]]: - +) -> Tuple[ + List[List[Union[str, bytes, Literal[".."]]]], List[Union[str, bytes, Literal[".."]]] +]: def escape_empty(val: Union[str, bytes]) -> Union[str, bytes, Literal[".."]]: if isinstance(val, (str, bytes)) and not val.strip(): return ".." @@ -916,7 +913,9 @@ def escape_empty(val: Union[str, bytes]) -> Union[str, bytes, Literal[".."]]: ) """ _ansi_codes: Pattern[str] = re.compile(_ansi_escape_pat, re.VERBOSE) -_ansi_codes_bytes: Pattern[bytes] = re.compile(_ansi_escape_pat.encode("utf8"), re.VERBOSE) +_ansi_codes_bytes: Pattern[bytes] = re.compile( + _ansi_escape_pat.encode("utf8"), re.VERBOSE +) _ansi_color_reset_code: str = "\033[0m" _float_with_thousands_separators: Pattern[str] = re.compile( @@ -1007,7 +1006,7 @@ def _isnumber(string: Any) -> bool: return True -def _isint(string: Any, inttype: Type[int]=int) -> bool: +def _isint(string: Any, inttype: Type[int] = int) -> bool: """ >>> _isint("123") True @@ -1040,11 +1039,7 @@ def _isbool(string: Any) -> bool: ) -def _type( - string: Any, - has_invisible: bool=True, - numparse: bool=True -) -> Any: +def _type(string: Any, has_invisible: bool = True, numparse: bool = True) -> Any: """The least generic type (type(None), int, float, str, unicode). >>> _type(None) is type(None) @@ -1191,18 +1186,13 @@ def _is_multiline(s: Union[str, bytes]) -> bool: def _multiline_width( - multiline_s: str, - line_width_fn: Callable[[str], int]=len + multiline_s: str, line_width_fn: Callable[[str], int] = len ) -> int: """Visible width of a potentially multiline content.""" return max(map(line_width_fn, re.split("[\r\n]", multiline_s))) -def _choose_width_fn( - has_invisible: bool, - enable_widechars: bool, - is_multiline: bool -): +def _choose_width_fn(has_invisible: bool, enable_widechars: bool, is_multiline: bool): """Return a function to calculate visible cell width.""" if has_invisible: line_width_fn = _visible_width @@ -1218,9 +1208,7 @@ def _choose_width_fn( def _align_column_choose_padfn( - strings: List[str], - alignment: str, - has_invisible: bool + strings: List[str], alignment: str, has_invisible: bool ) -> Tuple[List[str], Callable]: if alignment == "right": if not PRESERVE_WHITESPACE: @@ -1248,9 +1236,7 @@ def _align_column_choose_padfn( def _align_column_choose_width_fn( - has_invisible: bool, - enable_widechars: bool, - is_multiline: bool + has_invisible: bool, enable_widechars: bool, is_multiline: bool ): if has_invisible: line_width_fn = _visible_width @@ -1266,8 +1252,7 @@ def _align_column_choose_width_fn( def _align_column_multiline_width( - multiline_s: str, - line_width_fn: Callable[[str], int]=len + multiline_s: str, line_width_fn: Callable[[str], int] = len ) -> List[int]: """Visible width of a potentially multiline content.""" return list(map(line_width_fn, re.split("[\r\n]", multiline_s))) @@ -1287,10 +1272,10 @@ def _flat_list(nested_list: List[Union[List[int], int]]) -> List[int]: def _align_column( strings: List[Union[AST, Any]], alignment: str, - minwidth: int=0, - has_invisible: bool=True, - enable_widechars: bool=False, - is_multiline: bool=False, + minwidth: int = 0, + has_invisible: bool = True, + enable_widechars: bool = False, + is_multiline: bool = False, ): """[string] -> [padded_string]""" strings, padfn = _align_column_choose_padfn(strings, alignment, has_invisible) # type: ignore @@ -1367,9 +1352,7 @@ def _more_generic(type1: Type, type2: Type) -> Type: def _column_type( - strings: Any, - has_invisible: bool=True, - numparse: bool=True + strings: Any, has_invisible: bool = True, numparse: bool = True ) -> Type: """The least generic type all column values are convertible to. @@ -1401,8 +1384,8 @@ def _format( valtype: type, floatfmt: str, intfmt: str, - missingval: str="", - has_invisible: bool=True + missingval: str = "", + has_invisible: bool = True, ) -> str: """Format a value according to its type. @@ -1447,8 +1430,8 @@ def _align_header( alignment: Any, width: int, visible_width: int, - is_multiline: Optional[bool]=False, - width_fn: Optional[Callable[[str], int]]=None, + is_multiline: Optional[bool] = False, + width_fn: Optional[Callable[[str], int]] = None, ) -> str: "Pad string header to width chars given known visible_width of the header." if is_multiline: @@ -1499,8 +1482,7 @@ def _reinsert_separating_lines( def _prepend_row_index( - rows: Any, - index: Union[IndexType, Any] + rows: Any, index: Union[IndexType, Any] ) -> List[List[Union[int, float, str]]]: """Add a left-most index column.""" if index is None or index is False: @@ -1508,7 +1490,7 @@ def _prepend_row_index( if isinstance(index, Sized) and len(index) != len(rows): raise ValueError( "index must be as long as the number of data rows: " - +"len(index)={} len(rows)={}".format(len(index), len(rows)) + + "len(index)={} len(rows)={}".format(len(index), len(rows)) ) if not isinstance(index, Iterable): raise ValueError("Index must be iterable") @@ -1532,9 +1514,7 @@ def _bool(val: Any) -> bool: def _normalize_tabular_data( - tabular_data: Any, - headers: Any, - showindex: Union[str, int, bool]="default" + tabular_data: Any, headers: Any, showindex: Union[str, int, bool] = "default" ) -> Any: """Transform a supported data type to a list of lists, and a list of headers. @@ -1737,9 +1717,7 @@ def _normalize_tabular_data( def _wrap_text_to_colwidths( - list_of_lists: Any, - colwidths: Any, - numparses: List[bool]=[True] + list_of_lists: Any, colwidths: Any, numparses: List[bool] = [True] ): if len(list_of_lists): num_cols = len(list_of_lists[0]) @@ -1777,8 +1755,8 @@ def _wrap_text_to_colwidths( def _to_str( s: Union[bytes, Any], - encoding: str="utf8", - errors: str="ignore", + encoding: str = "utf8", + errors: str = "ignore", ) -> str: """ A type safe wrapper for converting a bytestring to str. This is essentially just @@ -1806,19 +1784,19 @@ def _to_str( def tabulate( tabular_data: Any, - headers: Any=(), - tablefmt: Any="simple", - floatfmt: str=_DEFAULT_FLOATFMT, - intfmt: str=_DEFAULT_INTFMT, - numalign: str=_DEFAULT_ALIGN, - stralign: str=_DEFAULT_ALIGN, - missingval: str=_DEFAULT_MISSINGVAL, - showindex: Literal["default", True, False]="default", - disable_numparse: bool=False, - colalign: Optional[List[str]]=None, - maxcolwidths: Any=None, - rowalign: Optional[List[str]]=None, - maxheadercolwidths: Any=None, + headers: Any = (), + tablefmt: Any = "simple", + floatfmt: str = _DEFAULT_FLOATFMT, + intfmt: str = _DEFAULT_INTFMT, + numalign: str = _DEFAULT_ALIGN, + stralign: str = _DEFAULT_ALIGN, + missingval: str = _DEFAULT_MISSINGVAL, + showindex: Literal["default", True, False] = "default", + disable_numparse: bool = False, + colalign: Any = None, + maxcolwidths: Any = None, + rowalign: Any = None, + maxheadercolwidths: Any = None, ) -> Any: """Format a fixed width table for pretty printing. @@ -2466,8 +2444,7 @@ def tabulate( def _expand_numparse( - disable_numparse: Union[bool, Iterable[int]], - column_count: int + disable_numparse: Union[bool, Iterable[int]], column_count: int ) -> List[bool]: """ Return a list of bools of length `column_count` which indicates whether @@ -2541,7 +2518,7 @@ def _append_basic_row( colwidths: Any, colaligns: Any, rowfmt: str, - rowalign: Optional[str]=None, + rowalign: Optional[str] = None, ) -> List[str]: # NOTE: rowalign is ignored and exists for api compatibility with _append_multiline_row lines.append(_build_row(padded_cells, colwidths, colaligns, rowfmt)) @@ -2552,7 +2529,7 @@ def _align_cell_veritically( text_lines: Any, num_lines: int, column_width: int, - row_alignment: Union[Literal["bottom", "center", "top"], None] + row_alignment: Union[Literal["bottom", "center", "top"], None], ) -> List[str]: delta_lines = num_lines - len(text_lines) blank = [" " * column_width] @@ -2573,7 +2550,7 @@ def _append_multiline_row( colaligns: List[Literal["left", "center", "right"]], rowfmt: str, pad: int, - rowalign: Any=None, + rowalign: Any = None, ): colwidths = [w - 2 * pad for w in padded_widths] cells_lines = [c.splitlines() for c in padded_multiline_cells] @@ -2611,10 +2588,7 @@ def _build_line( def _append_line( - lines: Any, - colwidths: List[int], - colaligns: List[str], - linefmt: Any + lines: Any, colwidths: List[int], colaligns: List[str], linefmt: Any ) -> str: lines.append(_build_line(colwidths, colaligns, linefmt)) return lines @@ -2631,14 +2605,13 @@ def _repr_html_(self) -> str: def str(self) -> str: """add a .str property so that the raw string is still accessible""" return self - -class JupyterHTML(Protocol): +class JupyterHTML(Protocol): def _repr_html(self) -> str: ... - + def display_in_jupyter(item: Type[JupyterHTML]) -> None: """Wrapper function for Jupyter display.""" ... # TODO: add something that'd return the html. @@ -2651,7 +2624,7 @@ def _format_table( colwidths: Any, colaligns: Any, is_multiline: bool, - rowaligns: Any + rowaligns: Any, ): """Produce a plain-text representation of the table.""" lines: List[str] = [] @@ -2754,7 +2727,7 @@ def _update_lines(self, lines: List[str], new_line: str) -> None: """ code_matches = [x for x in _ansi_codes.finditer(new_line)] color_codes = [ - code.string[code.span()[0]: code.span()[1]] for code in code_matches + code.string[code.span()[0] : code.span()[1]] for code in code_matches ] # Add color codes from earlier in the unwrapped line, and then track any new ones we add. @@ -2774,11 +2747,7 @@ def _update_lines(self, lines: List[str], new_line: str) -> None: lines.append(new_line) def _handle_long_word( - self, - reversed_chunks: List[str], - cur_line: List[str], - cur_len: int, - width: int + self, reversed_chunks: List[str], cur_line: List[str], cur_len: int, width: int ) -> None: """_handle_long_word( chunks : [string], @@ -2805,7 +2774,7 @@ def _handle_long_word( while self._len(chunk[:i]) <= space_left: i = i + 1 cur_line.append(chunk[: i - 1]) - reversed_chunks[-1] = chunk[i - 1:] + reversed_chunks[-1] = chunk[i - 1 :] # Otherwise, we have to preserve the long word intact. Only add # it to the current line if there's nothing already there -- From f4c04fc6a7f7bebe05a13bc0dcb7903b339830a9 Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Mon, 6 Feb 2023 14:42:48 +0530 Subject: [PATCH 11/22] tests: more input tests --- test/test_input.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/test/test_input.py b/test/test_input.py index a178bd9d..c1d93e6c 100644 --- a/test/test_input.py +++ b/test/test_input.py @@ -528,3 +528,35 @@ def test_list_bytes(): ) result = tabulate(lb, headers=["bytes"]) assert_equal(expected, result) + +def test_colalign_with_bad_type(): + """ + Input: colalign which is not a list or tuple. + """ + table = [] + with raises(ValueError, match="colalign should be a list or tuple."): + tabulate(table, colalign="right") + +def test_more_colalign_than_headers(): + """ + Input: a list of headers and colalign with more colalign than headers. + """ + table = [ + ("sequare", 4), + ("triangle", 3), + ("circle", 1) + ] + headers = ["name", "sides"] + colalign = ["left", "right", "right"] + + tabulate(table, headers=headers, colalign=colalign) + +def test_colalign_with_empty_input(): + """ + Input: a list of headers and colalign with more colalign than headers. + """ + table = [] + headers = ["h1", "h2"] + colalign = ["left", "right"] + + tabulate(table, headers=headers, colalign=colalign) From 1f33e84dc2f48fb4a801457074c0d63c7cc7632a Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Mon, 6 Feb 2023 14:52:26 +0530 Subject: [PATCH 12/22] tests: more output tests --- test/test_output.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/test/test_output.py b/test/test_output.py index 9043aed3..8a0f7193 100644 --- a/test/test_output.py +++ b/test/test_output.py @@ -2973,3 +2973,26 @@ def test_preserve_whitespace(): expected = "\n".join(["h1 h2 h3", "---- ---- ----", "foo bar foo"]) result = tabulate(test_table, table_headers) assert_equal(expected, result) + +def test_colalign_with_no_data(): + """ + Output: a table with empty data, but with colalign specified. + """ + test_table = [] + table_headers = ["h1", "h2"] + colalign = ["left", "right"] + expected = "\n".join(["h1 h2", "---- ----"]) + result = tabulate(test_table, headers=table_headers, colalign=colalign) + assert_equal(expected, result) + +def test_colalign_with_mixed_data(): + """ + Output: a table with mixed data and a specified colalign. + """ + table_headers = ["h1", "h2"] + test_table = [["", "abcdef"], [1, "abc"]] + expected = "\n".join( + ["h1 h2", "---- ------", " abcdef", "1 abc"] + ) + result = tabulate(test_table, headers=table_headers, colalign=("left", "right")) + assert_equal(expected, result) From cd0b806258010aa8aef12ac7914601139d60091e Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Mon, 6 Feb 2023 16:03:31 +0530 Subject: [PATCH 13/22] fix: throw error when non-iterable data is given --- tabulate/__init__.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/tabulate/__init__.py b/tabulate/__init__.py index 9a8a5e43..ea4f2280 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -1554,6 +1554,12 @@ def _normalize_tabular_data( except ValueError: # numpy.ndarray, pandas.core.index.Index, ... is_headers2bool_broken = True # noqa headers = list(headers) + + error_message = ( + "\n\nBuilding a table using python-tabulate requires two-dimensional data " + "like a list of lists or similar." + "\nDid you forget a pair of extra [] or ',' in ()?" + ) index = None if hasattr(tabular_data, "keys") and hasattr(tabular_data, "values"): @@ -1561,9 +1567,12 @@ def _normalize_tabular_data( if hasattr(tabular_data.values, "__call__"): # likely a conventional dict keys = tabular_data.keys() - rows = list( - izip_longest(*tabular_data.values()) - ) # columns have to be transposed + try: + rows = list( + izip_longest(*tabular_data.values()) + ) # columns have to be transposed + except TypeError: # not iterable + raise TypeError(error_message) elif hasattr(tabular_data, "index"): # values is a property, has .index => it's likely a pandas.DataFrame (pandas 0.11.0) keys = list(tabular_data) @@ -1586,7 +1595,10 @@ def _normalize_tabular_data( headers = list(map(str, keys)) # headers should be strings else: # it's a usual iterable of iterables, or a NumPy array, or an iterable of dataclasses - rows = list(tabular_data) + try: + rows = list(tabular_data) + except TypeError: # not iterable + raise TypeError(error_message) if headers == "keys" and not rows: # an empty table (issue #81) From 40be98db9f5143df6de6eff95fdaf219e86fec4a Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Mon, 6 Feb 2023 16:16:01 +0530 Subject: [PATCH 14/22] add `typing_extensions` as deps --- tox.ini | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tox.ini b/tox.ini index c6260d21..ea2a182c 100644 --- a/tox.ini +++ b/tox.ini @@ -15,6 +15,7 @@ isolated_build = True commands = pytest -v --doctest-modules --ignore benchmark.py {posargs} deps = pytest + typing_extensions passenv = CURL_CA_BUNDLE REQUESTS_CA_BUNDLE @@ -30,6 +31,7 @@ basepython = python3.7 commands = pytest -v --doctest-modules --ignore benchmark.py {posargs} deps = pytest + typing_extensions [testenv:py37-extra] basepython = python3.7 @@ -39,12 +41,14 @@ deps = numpy pandas wcwidth + typing_extensions [testenv:py38] basepython = python3.8 commands = pytest -v --doctest-modules --ignore benchmark.py {posargs} deps = pytest + typing_extensions [testenv:py38-extra] basepython = python3.8 @@ -54,6 +58,7 @@ deps = numpy pandas wcwidth + typing_extensions [testenv:py39] @@ -61,6 +66,7 @@ basepython = python3.9 commands = pytest -v --doctest-modules --ignore benchmark.py {posargs} deps = pytest + typing_extensions [testenv:py39-extra] basepython = python3.9 @@ -70,6 +76,7 @@ deps = numpy pandas wcwidth + typing_extensions [testenv:py310] @@ -77,6 +84,7 @@ basepython = python3.10 commands = pytest -v --doctest-modules --ignore benchmark.py {posargs} deps = pytest + typing_extensions [testenv:py310-extra] basepython = python3.10 @@ -87,6 +95,7 @@ deps = numpy pandas wcwidth + typing_extensions [testenv:py311] @@ -94,6 +103,7 @@ basepython = python3.11 commands = pytest -v --doctest-modules --ignore benchmark.py {posargs} deps = pytest + typing_extensions [testenv:py311-extra] basepython = python3.11 @@ -104,6 +114,7 @@ deps = numpy pandas wcwidth + typing_extensions [flake8] From d957ac76c70024532563dc748af0d1d32ea21833 Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Mon, 6 Feb 2023 16:23:30 +0530 Subject: [PATCH 15/22] fix: sep line not working when request format pads --- tabulate/__init__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tabulate/__init__.py b/tabulate/__init__.py index ea4f2280..d6856130 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -184,11 +184,15 @@ def __len__(self) -> int: ... +def _is_separating_line_values(value: str) -> bool: + return type(value) == str and value.strip() == SEPARATING_LINE + + def _is_separating_line(row: Union[list, str]) -> Literal[True, False]: row_type = type(row) is_sl = (row_type == list or row_type == str) and ( - (len(row) >= 1 and row[0] == SEPARATING_LINE) - or (len(row) >= 2 and row[1] == SEPARATING_LINE) + (len(row) >= 1 and _is_separating_line_values(row[0])) + or (len(row) >= 2 and _is_separating_line_values(row[1])) ) return is_sl From 8f710d88c8dc88d1499ba5c8dc3f171d0bad14e3 Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Mon, 6 Feb 2023 16:24:00 +0530 Subject: [PATCH 16/22] tests: add a test case for padded columns with sep --- test/test_output.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/test/test_output.py b/test/test_output.py index 8a0f7193..84432f9e 100644 --- a/test/test_output.py +++ b/test/test_output.py @@ -2879,6 +2879,27 @@ def test_list_of_lists_with_index_with_sep_line(): ) result = tabulate(dd, headers=["a", "b"], showindex=True) assert_equal(expected, result) + + +def test_with_padded_columns_with_sep_line(): + table = [ + ["1", "one"], # "1" as a str on purpose + [1_000, "one K"], + SEPARATING_LINE, + [1_000_000, "one M"], + ] + expected = "\n".join( + [ + "+---------+-------+", + "| 1 | one |", + "| 1000 | one K |", + "|---------+-------|", + "| 1000000 | one M |", + "+---------+-------+", + ] + ) + result = tabulate(table, tablefmt="psql") + assert_equal(expected, result) def test_list_of_lists_with_supplied_index(): From eade44ea413c607eefb44c53abb91536b09fa3b1 Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Mon, 6 Feb 2023 16:38:43 +0530 Subject: [PATCH 17/22] feat: break long words and on hyphens --- tabulate/__init__.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/tabulate/__init__.py b/tabulate/__init__.py index d6856130..94149dc6 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -50,6 +50,11 @@ def _is_file(f): # Whether or not to preserve leading/trailing whitespace in data. PRESERVE_WHITESPACE: bool = False +# TextWrapper breaks words longer than 'width'. +_BREAK_LONG_WORDS: bool = True +# TextWrapper is breaking hyphenated words. +_BREAK_ON_HYPHENS: bool = True + _DEFAULT_FLOATFMT: str = "g" _DEFAULT_INTFMT: str = "" _DEFAULT_MISSINGVAL: str = "" @@ -1733,7 +1738,11 @@ def _normalize_tabular_data( def _wrap_text_to_colwidths( - list_of_lists: Any, colwidths: Any, numparses: List[bool] = [True] + list_of_lists: Any, + colwidths: Any, + numparses: List[bool] = [True], + break_long_words: bool = _BREAK_LONG_WORDS, + break_on_hyphens: bool = _BREAK_ON_HYPHENS, ): if len(list_of_lists): num_cols = len(list_of_lists[0]) @@ -1751,7 +1760,11 @@ def _wrap_text_to_colwidths( continue if width is not None: - wrapper = _CustomTextWrap(width=width) + wrapper = _CustomTextWrap( + width=width, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, + ) # Cast based on our internal type handling # Any future custom formatting of types (such as datetimes) # may need to be more explicit than just `str` of the object @@ -1813,6 +1826,8 @@ def tabulate( maxcolwidths: Any = None, rowalign: Any = None, maxheadercolwidths: Any = None, + break_long_words: bool = _BREAK_LONG_WORDS, + break_on_hyphens: bool = _BREAK_ON_HYPHENS, ) -> Any: """Format a fixed width table for pretty printing. @@ -2311,7 +2326,11 @@ def tabulate( numparses = _expand_numparse(disable_numparse, num_cols) list_of_lists = _wrap_text_to_colwidths( - list_of_lists, maxcolwidths, numparses=numparses + list_of_lists, + maxcolwidths, + numparses=numparses, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, ) if maxheadercolwidths is not None: @@ -2325,7 +2344,11 @@ def tabulate( numparses = _expand_numparse(disable_numparse, num_cols) headers = _wrap_text_to_colwidths( - [headers], maxheadercolwidths, numparses=numparses + [headers], + maxheadercolwidths, + numparses=numparses, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, )[0] # empty values in the first column of RST tables should be escaped (issue #82) From cb6b277cabfeb47bbd726c61573697294991cbe4 Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Mon, 6 Feb 2023 16:38:56 +0530 Subject: [PATCH 18/22] tests: update break --- test/test_api.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_api.py b/test/test_api.py index 046d7526..b70f2e0b 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -52,6 +52,8 @@ def test_tabulate_signature(): ("maxcolwidths", None), ("rowalign", None), ("maxheadercolwidths", None), + ("break_long_words", True), + ("break_on_hyphens", True), ] _check_signature(tabulate, expected_sig) From 55258aa4870ae4dd0a24216c30eca486a47ed951 Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Mon, 6 Feb 2023 16:39:06 +0530 Subject: [PATCH 19/22] tests: add break tests --- test/test_output.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/test/test_output.py b/test/test_output.py index 84432f9e..4b6a6382 100644 --- a/test/test_output.py +++ b/test/test_output.py @@ -3017,3 +3017,32 @@ def test_colalign_with_mixed_data(): ) result = tabulate(test_table, headers=table_headers, colalign=("left", "right")) assert_equal(expected, result) + +def test_break_long_words(): + "Output: Default table output, with breakwords true." + table_headers = ["h1", "h2", "h3"] + test_table = [[" foo1", " bar2 ", "foo3"]] + + # Table is not wrapped on 3 letters due to long word + expected = "h1 h2 h3\n---- ---- ----\nfoo1 bar2 foo3" + result = tabulate(test_table, table_headers, maxcolwidths=3, break_long_words=False) + assert_equal(expected, result) + + # Table max width is 3 letters + expected = "h1 h2 h3\n---- ---- ----\nf ba foo\noo1 r2 3" + result = tabulate(test_table, table_headers, maxcolwidths=3, break_long_words=True) + assert_equal(expected, result) + +def test_break_on_hyphens(): + "Output: Default table output, with break on hyphens true." + table_headers = ["h1", "h2", "h3"] + test_table = [[" foo-bar", " bar-bar ", "foo-foo"]] + # Table max width is 5, long lines breaks on hyphens + expected = "h1 h2 h3\n---- ---- -----\nfoo bar- foo-f\n-bar bar oo" + result = tabulate(test_table, table_headers, maxcolwidths=5, break_on_hyphens=False) + assert_equal(expected, result) + + # Table data is no longer breaks on hyphens + expected = "h1 h2 h3\n---- ---- ----\nfoo- bar- foo-\nbar bar foo" + result = tabulate(test_table, table_headers, maxcolwidths=5, break_on_hyphens=True) + assert_equal(expected, result) From 013ffb8098c0d09635b768ae01551a7162d90c09 Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Mon, 6 Feb 2023 17:38:11 +0530 Subject: [PATCH 20/22] Update README.md --- README.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/README.md b/README.md index 26a6e612..c66d57ed 100644 --- a/README.md +++ b/README.md @@ -948,6 +948,30 @@ the lines being wrapped would probably be significantly longer than this. +------------+---------+ ``` +Text is preferably wrapped on whitespaces and right after the hyphens in hyphenated words. + +break_long_words (default: True) If true, then words longer than width will be broken in order to ensure that no lines are longer than width. +If it is false, long words will not be broken, and some lines may be longer than width. +(Long words will be put on a line by themselves, in order to minimize the amount by which width is exceeded.) + +break_on_hyphens (default: True) If true, wrapping will occur preferably on whitespaces and right after hyphens in compound words, as it is customary in English. +If false, only whitespaces will be considered as potentially good places for line breaks. + +```pycon +>>> print(tabulate([["John Smith", "Middle-Manager"]], headers=["Name", "Title"], tablefmt="grid", maxcolwidths=[None, 5], break_long_words=False)) ++------------+---------+ +| Name | Title | ++============+=========+ +| John Smith | Middle- | +| | Manager | ++------------+---------+ +>>> print(tabulate([["John Smith", "Middle-Manager"]], headers=["Name", "Title"], tablefmt="grid", maxcolwidths=[None, 5], break_long_words=False, break_on_hyphens=False)) ++------------+----------------+ +| Name | Title | ++============+================+ +| John Smith | Middle-Manager | ++------------+----------------+ +``` ### Adding Separating lines One might want to add one or more separating lines to highlight different sections in a table. From 31fd00feb7fab158e6a48044719f6fb220b2d2b5 Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Mon, 6 Feb 2023 17:39:05 +0530 Subject: [PATCH 21/22] format: black --- tabulate/__init__.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tabulate/__init__.py b/tabulate/__init__.py index 94149dc6..e53ce14d 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -1563,7 +1563,7 @@ def _normalize_tabular_data( except ValueError: # numpy.ndarray, pandas.core.index.Index, ... is_headers2bool_broken = True # noqa headers = list(headers) - + error_message = ( "\n\nBuilding a table using python-tabulate requires two-dimensional data " "like a list of lists or similar." @@ -1580,7 +1580,7 @@ def _normalize_tabular_data( rows = list( izip_longest(*tabular_data.values()) ) # columns have to be transposed - except TypeError: # not iterable + except TypeError: # not iterable raise TypeError(error_message) elif hasattr(tabular_data, "index"): # values is a property, has .index => it's likely a pandas.DataFrame (pandas 0.11.0) @@ -1606,7 +1606,7 @@ def _normalize_tabular_data( else: # it's a usual iterable of iterables, or a NumPy array, or an iterable of dataclasses try: rows = list(tabular_data) - except TypeError: # not iterable + except TypeError: # not iterable raise TypeError(error_message) if headers == "keys" and not rows: @@ -1738,8 +1738,8 @@ def _normalize_tabular_data( def _wrap_text_to_colwidths( - list_of_lists: Any, - colwidths: Any, + list_of_lists: Any, + colwidths: Any, numparses: List[bool] = [True], break_long_words: bool = _BREAK_LONG_WORDS, break_on_hyphens: bool = _BREAK_ON_HYPHENS, @@ -2326,8 +2326,8 @@ def tabulate( numparses = _expand_numparse(disable_numparse, num_cols) list_of_lists = _wrap_text_to_colwidths( - list_of_lists, - maxcolwidths, + list_of_lists, + maxcolwidths, numparses=numparses, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens, @@ -2344,8 +2344,8 @@ def tabulate( numparses = _expand_numparse(disable_numparse, num_cols) headers = _wrap_text_to_colwidths( - [headers], - maxheadercolwidths, + [headers], + maxheadercolwidths, numparses=numparses, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens, From 2d11f1fe2ac6ae68337a3891d1f7f6498a82c17d Mon Sep 17 00:00:00 2001 From: Lemon Rose Date: Mon, 6 Feb 2023 17:47:30 +0530 Subject: [PATCH 22/22] feat: get rid of exceptions when converting values --- tabulate/__init__.py | 46 ++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/tabulate/__init__.py b/tabulate/__init__.py index e53ce14d..68a71481 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -1396,16 +1396,7 @@ def _format( missingval: str = "", has_invisible: bool = True, ) -> str: - """Format a value according to its type. - - Unicode is supported: - - >>> hrow = ['\u0431\u0443\u043a\u0432\u0430', '\u0446\u0438\u0444\u0440\u0430'] ; \ - tbl = [['\u0430\u0437', 2], ['\u0431\u0443\u043a\u0438', 4]] ; \ - good_result = '\\u0431\\u0443\\u043a\\u0432\\u0430 \\u0446\\u0438\\u0444\\u0440\\u0430\\n------- -------\\n\\u0430\\u0437 2\\n\\u0431\\u0443\\u043a\\u0438 4' ; \ - tabulate(tbl, headers=hrow) == good_result - True - + """Format a value according to the type of the column. """ # noqa if val is None: return missingval @@ -1419,19 +1410,32 @@ def _format( return str(val, "ascii") except (TypeError, UnicodeDecodeError): return str(val) + + # the rest is for bool and number values in bool and number column types + is_ansi_colored = has_invisible and isinstance(val, (str, bytes)) + # strip color if needed, reapply before return + if is_ansi_colored: + colored_val = val + val = raw_val = _strip_ansi(val) + + if _isbool(val): + val = val in (True, "True") + + if valtype is int: + if intfmt: + val = int(val) + val = format(val, intfmt) + elif valtype is float: - is_a_colored_number = has_invisible and isinstance(val, (str, bytes)) - if is_a_colored_number: - raw_val = _strip_ansi(val) - formatted_val = format(float(raw_val), floatfmt) - if isinstance(val, str): - return val.replace(str(raw_val), formatted_val) - else: - return val.decode().replace(str(raw_val), formatted_val) - else: - return format(float(val), floatfmt) + val = format(float(val), floatfmt) + else: - return f"{val}" + val = f"{val}" + + if is_ansi_colored: + val = colored_val.replace(raw_val, val) # type: ignore + + return val def _align_header(