diff --git a/great-docs.yml b/great-docs.yml index 792706f..81f75fb 100644 --- a/great-docs.yml +++ b/great-docs.yml @@ -313,3 +313,10 @@ reference: - GreatDocs.build - GreatDocs.preview - GreatDocs.check_links + + - title: Table Preview + desc: Generate styled HTML table previews from any tabular data source + contents: + - tbl_preview + - enable_tbl_preview + - disable_tbl_preview diff --git a/great_docs/_tbl_preview.py b/great_docs/_tbl_preview.py index 538f386..115b68e 100644 --- a/great_docs/_tbl_preview.py +++ b/great_docs/_tbl_preview.py @@ -482,24 +482,31 @@ def _compute_head_tail( n_head: int, n_tail: int, show_all: bool, + row_index_offset: int = 0, ) -> tuple[list[list], list[int], bool]: """Select head and tail rows, compute row numbers. + Parameters + ---------- + row_index_offset + Starting number for row indices (default 0). + Returns ------- tuple (display_rows, row_numbers, is_full_dataset) """ if show_all or n_head + n_tail >= total_rows: - row_numbers = list(range(1, total_rows + 1)) + row_numbers = list(range(row_index_offset, row_index_offset + total_rows)) return rows, row_numbers, True head_rows = rows[:n_head] tail_rows = rows[-n_tail:] if n_tail > 0 else [] display_rows = head_rows + tail_rows - head_nums = list(range(1, n_head + 1)) - tail_nums = list(range(total_rows - n_tail + 1, total_rows + 1)) if n_tail > 0 else [] + head_nums = list(range(row_index_offset, row_index_offset + n_head)) + tail_start = row_index_offset + total_rows - n_tail + tail_nums = list(range(tail_start, row_index_offset + total_rows)) if n_tail > 0 else [] row_numbers = head_nums + tail_nums return display_rows, row_numbers, False @@ -1108,46 +1115,199 @@ def tbl_preview( min_tbl_width: int = 500, caption: str | None = None, highlight_missing: bool = True, + row_index_offset: int = 0, id: str | None = None, ) -> TblPreview: - """Generate a beautiful table preview. + """ + Generate a self-contained HTML table preview from almost any tabular data source. + + The `tbl_preview()` function gives you a quick, polished look at a dataset without pulling in + heavy rendering dependencies. Pass it a Polars DataFrame, a Pandas DataFrame, a PyArrow Table, + a file path to a CSV / TSV / JSONL / Parquet / Feather file, a column-oriented dictionary, or a + list of row dictionaries—and get back a styled HTML table that renders identically in notebooks, + Quarto documents, and static HTML pages. + + The preview shows a configurable number of rows from the top and bottom of the table, separated + by a blue divider line when the full dataset exceeds the requested row count. Each column header + displays the column name and, beneath it, a compact dtype label (e.g., `i64`, `str`, `f64`). A + header banner shows a colored badge identifying the data source type (Polars, Pandas, CSV, + Parquet, etc.) alongside row and column counts. Missing values (`None`, `NaN`, `NA`) are + highlighted in red so they stand out immediately. + + The output is a :class:`TblPreview` object with `_repr_html_()` support, so it displays + automatically in Jupyter notebooks and Quarto code cells. All CSS is scoped to a unique id, + and the table includes full dark-mode support. No JavaScript is required. Parameters ---------- data - The table data. Accepts a Polars DataFrame, Pandas DataFrame, PyArrow Table, file path (CSV, - TSV, JSONL, Parquet, Feather/Arrow IPC), column-oriented dict, or list of row dicts. + The table to preview. This can be a Polars DataFrame, a Pandas DataFrame, a PyArrow Table, a + file path (as a string or `pathlib.Path` object), a column-oriented dictionary, or a list of + row dictionaries. When providing a file path, the extension determines the loader: `.csv`, + `.tsv`, `.jsonl` (or `.ndjson`), `.parquet`, `.feather`, and `.arrow` (Arrow IPC) are all + supported. Read the *Supported Input Data Types* section for details on each accepted + format. columns - Subset of columns to display. `None` shows all columns. + The columns to display in the preview, by default `None` (all columns are shown). This + can be a list of column name strings. If any name does not match a column in the table, a + `KeyError` is raised. This is useful for focusing on a subset of a wide dataset. n_head - Number of rows to show from the start of the table. + The number of rows to show from the start of the table. Set to `5` by default. When the + table has fewer rows than `n_head + n_tail`, the full table is displayed without a + divider. n_tail - Number of rows to show from the end of the table. + The number of rows to show from the end of the table. Set to `5` by default. limit - Maximum allowed sum of `n_head` and `n_tail`. + The limit value for the sum of `n_head=` and `n_tail=` (the total number of rows shown). + If the sum of `n_head=` and `n_tail=` exceeds the limit, a `ValueError` is raised. The + default value is `50`. Increase this when you need to display more rows. show_all - If `True`, display the entire table (ignores `n_head`/`n_tail`). + Should the entire table be displayed? If `True`, all rows are shown regardless of the + `n_head=` and `n_tail=` settings. By default, this is `False`. show_row_numbers - Whether to show a row-number column on the left. + Should row numbers be shown? The numbers appear in a narrow gutter column on the left side + of the table, separated from the data columns by a subtle blue vertical line. By default, + this is set to `True`. show_dtypes - Whether to show dtype sublabels under column names. + Should data type labels be displayed beneath each column name? The labels use short + abbreviations (e.g., `i64` for 64-bit integer, `str` for string, `f64` for 64-bit float). By + default, this is set to `True`. show_dimensions - Whether to show the header banner with row/column counts. + Should the header banner be shown? The banner displays a colored badge identifying the data + source type alongside row and column counts in labeled pill badges. By default, this is set + to `True`. max_col_width - Maximum width of any column in pixels. + The maximum width of any single column in pixels. Column widths are computed automatically + to fit their content up to this ceiling, beyond which cell text is truncated with an + ellipsis. The default value is `250` pixels. min_tbl_width - Minimum total table width in pixels. + The minimum total width of the table in pixels. If the sum of the computed column widths is + less than this value, columns are proportionally widened to fill the available space. The + default value is `500` pixels. caption - Optional caption displayed below the header banner. + An optional caption string displayed below the header banner and above the column headers. + Useful for labeling a preview with a dataset name or description. By default, no caption is + shown. highlight_missing - Whether to highlight missing values (None/NaN/NA). + Should missing values (`None`, `NaN`, `NA`) be highlighted? When `True` (the default), + missing cells are displayed in red text on a light red background so they stand out at a + glance. + row_index_offset + The starting number for row indices. Defaults to `0`, matching the zero-based indexing + convention of Python, Polars, and Pandas. Set to `1` for one-based numbering (e.g., for + presentation to audiences unfamiliar with zero-based indexing). id - HTML id for the table container. Auto-generated if `None`. + An HTML `id` attribute for the outer `