Skip to content

Commit bf63d4a

Browse files
committed
cleanups
1 parent 309ef14 commit bf63d4a

9 files changed

Lines changed: 249 additions & 188 deletions

File tree

CLAUDE.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,14 @@ uv run coverage html # Generate HTML report in htmlcov/
2323
uv run ruff check --fix . && uv run ruff format . && uv run mypy vgi/
2424
```
2525

26+
Before running `pytest`, you must run ruff's check and fix commands, otherwise fixing problems
27+
takes longer:
28+
29+
```bash
30+
uv run ruff check --fix . && uv run ruff format .
31+
```
32+
33+
2634
## Project Overview
2735

2836
VGI (Vector Gateway Interface) provides an Apache Arrow-based protocol for connecting DuckDB to external programs. It enables user-defined functions to run in separate processes, communicating via stdin/stdout using Arrow IPC streaming.
@@ -74,7 +82,7 @@ vgi/
7482
__init__.py # Package exports
7583
function.py # Invocation, OutputSpec, Arguments, FunctionType
7684
scalar_function.py # ScalarFunction, ScalarFunctionGenerator
77-
table_function.py # TableFunctionGenerator, CardinalityInfo, Output
85+
table_function.py # TableFunctionGenerator, TableCardinality, Output
7886
table_in_out_function.py # TableInOutFunction, TableInOutGeneratorFunction
7987
metadata.py # Function metadata for introspection
8088
schema_utils.py # Schema builder helpers (schema, schema_like)

docs/generator-api.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ Use `TableFunctionGenerator` when you need to generate data without receiving in
7979
```python
8080
import pyarrow as pa
8181
from vgi import TableFunctionGenerator, Output, Arg
82-
from vgi.table_function import CardinalityInfo
82+
from vgi.table_function import TableCardinality
8383

8484
class MyTableFunction(TableFunctionGenerator):
8585
"""Generate data without input."""
@@ -95,9 +95,9 @@ class MyTableFunction(TableFunctionGenerator):
9595
def output_schema(self) -> pa.Schema:
9696
return pa.schema([("value", pa.int64())])
9797

98-
def cardinality(self) -> CardinalityInfo:
98+
def cardinality(self) -> TableCardinality:
9999
"""Optional: provide row count estimate."""
100-
return CardinalityInfo(estimate=self.count, max=self.count)
100+
return TableCardinality(estimate=self.count, max=self.count)
101101

102102
def process(self):
103103
"""Generate output batches."""

tests/table/test_function.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from tests.conftest import make_invocation, make_schema
1010
from vgi.function import Arguments
1111
from vgi.table_function import (
12-
CardinalityInfo,
12+
TableCardinality,
1313
Output,
1414
OutputGenerator,
1515
TableFunctionGenerator,
@@ -206,8 +206,8 @@ class CardinalityFunction(TableFunctionGenerator):
206206
def output_schema(self) -> pa.Schema:
207207
return make_schema([pa.field("x", pa.int64())])
208208

209-
def cardinality(self) -> CardinalityInfo:
210-
return CardinalityInfo(estimate=100, max=1000)
209+
def cardinality(self) -> TableCardinality:
210+
return TableCardinality(estimate=100, max=1000)
211211

212212
invocation = make_invocation()
213213
func = CardinalityFunction(invocation=invocation, logger=structlog.get_logger())

tests/test_protocol_classes.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
)
2020
from vgi.log import Level, Message
2121
from vgi.table_function import (
22-
CardinalityInfo,
22+
TableCardinality,
2323
OutputSpec,
2424
TableFunctionInitInput,
2525
)
@@ -477,39 +477,39 @@ def test_repr(self) -> None:
477477
assert "test message" in repr_str
478478

479479

480-
class TestCardinalityInfo:
481-
"""Tests for CardinalityInfo dataclass."""
480+
class TestTableCardinality:
481+
"""Tests for TableCardinality dataclass."""
482482

483483
def test_basic_creation(self) -> None:
484-
"""CardinalityInfo should store estimate and max values."""
485-
info = CardinalityInfo(estimate=100, max=1000)
484+
"""TableCardinality should store estimate and max values."""
485+
info = TableCardinality(estimate=100, max=1000)
486486
assert info.estimate == 100
487487
assert info.max == 1000
488488

489489
def test_null_values(self) -> None:
490-
"""CardinalityInfo should allow null estimate and max."""
491-
info = CardinalityInfo(estimate=None, max=None)
490+
"""TableCardinality should allow null estimate and max."""
491+
info = TableCardinality(estimate=None, max=None)
492492
assert info.estimate is None
493493
assert info.max is None
494494

495495
def test_partial_values(self) -> None:
496-
"""CardinalityInfo should allow partial information."""
497-
estimate_only = CardinalityInfo(estimate=50, max=None)
496+
"""TableCardinality should allow partial information."""
497+
estimate_only = TableCardinality(estimate=50, max=None)
498498
assert estimate_only.estimate == 50
499499
assert estimate_only.max is None
500500

501-
max_only = CardinalityInfo(estimate=None, max=100)
501+
max_only = TableCardinality(estimate=None, max=100)
502502
assert max_only.estimate is None
503503
assert max_only.max == 100
504504

505505
def test_exact_cardinality(self) -> None:
506-
"""CardinalityInfo with equal estimate and max indicates exact count."""
507-
exact = CardinalityInfo(estimate=1, max=1)
506+
"""TableCardinality with equal estimate and max indicates exact count."""
507+
exact = TableCardinality(estimate=1, max=1)
508508
assert exact.estimate == exact.max == 1
509509

510510
def test_frozen(self) -> None:
511-
"""CardinalityInfo should be immutable (frozen dataclass)."""
512-
info = CardinalityInfo(estimate=100, max=1000)
511+
"""TableCardinality should be immutable (frozen dataclass)."""
512+
info = TableCardinality(estimate=100, max=1000)
513513
with pytest.raises(AttributeError):
514514
info.estimate = 200 # type: ignore[misc]
515515

@@ -573,7 +573,7 @@ def test_serialization_with_cardinality(self) -> None:
573573
output_schema=make_schema([pa.field("col1", pa.int64())]),
574574
max_processes=4,
575575
invocation_id=b"test-id",
576-
cardinality=CardinalityInfo(estimate=100, max=1000),
576+
cardinality=TableCardinality(estimate=100, max=1000),
577577
)
578578

579579
serialized = spec.serialize()
@@ -598,7 +598,7 @@ def test_serialize_schema_includes_cardinality_fields(self) -> None:
598598
output_schema=make_schema([pa.field("col1", pa.int64())]),
599599
max_processes=1,
600600
invocation_id=b"test-id",
601-
cardinality=CardinalityInfo(estimate=50, max=100),
601+
cardinality=TableCardinality(estimate=50, max=100),
602602
)
603603

604604
schema = spec.serialize_schema()
@@ -611,7 +611,7 @@ def test_serialize_dict_includes_cardinality_values(self) -> None:
611611
output_schema=make_schema([pa.field("col1", pa.int64())]),
612612
max_processes=1,
613613
invocation_id=b"test-id",
614-
cardinality=CardinalityInfo(estimate=50, max=100),
614+
cardinality=TableCardinality(estimate=50, max=100),
615615
)
616616

617617
data = spec.serialize_dict()

vgi/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ class Meta:
107107
vgi.client - Client class for invoking functions on workers
108108
vgi.log - Level and Message for function diagnostics
109109
vgi.ipc_utils - RecordBatchState for distributed function state
110-
vgi.table_function - CardinalityInfo for row count hints
110+
vgi.table_function - TableCardinality for row count hints
111111
112112
CLASS HIERARCHY
113113
---------------
@@ -148,12 +148,12 @@ class Meta:
148148
functions_to_arrow,
149149
)
150150
from vgi.scalar_function import (
151+
RowCountMismatchError,
151152
ScalarFunction,
152153
ScalarFunctionGenerator,
153154
ScalarOutputGenerator,
154155
)
155156
from vgi.schema_utils import schema, schema_like
156-
from vgi.table_function import RowCountMismatchError
157157
from vgi.table_in_out_function import (
158158
Output,
159159
OutputGenerator,

vgi/examples/table.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from vgi.log import Level, Message
2424
from vgi.metadata import FunctionExample
2525
from vgi.table_function import (
26-
CardinalityInfo,
26+
TableCardinality,
2727
Output,
2828
OutputGenerator,
2929
TableFunctionGenerator,
@@ -90,9 +90,9 @@ def output_schema(self) -> pa.Schema:
9090
"""Return output schema with single integer column."""
9191
return pa.schema([pa.field("n", pa.int64())])
9292

93-
def cardinality(self) -> CardinalityInfo:
93+
def cardinality(self) -> TableCardinality:
9494
"""Return exact cardinality since we know the count."""
95-
return CardinalityInfo(estimate=self.count, max=self.count)
95+
return TableCardinality(estimate=self.count, max=self.count)
9696

9797
def process(self) -> OutputGenerator:
9898
"""Generate the sequence in batches."""
@@ -159,13 +159,13 @@ def output_schema(self) -> pa.Schema:
159159
"""Return output schema with single integer column."""
160160
return pa.schema([pa.field("value", pa.int64())])
161161

162-
def cardinality(self) -> CardinalityInfo:
162+
def cardinality(self) -> TableCardinality:
163163
"""Return cardinality based on range parameters."""
164164
if self.end <= self.start:
165165
count = 0
166166
else:
167167
count = (self.end - self.start + self.step - 1) // self.step
168-
return CardinalityInfo(estimate=count, max=count)
168+
return TableCardinality(estimate=count, max=count)
169169

170170
def process(self) -> OutputGenerator:
171171
"""Generate the range in batches."""
@@ -232,9 +232,9 @@ def output_schema(self) -> pa.Schema:
232232
"""Return output schema with single integer column."""
233233
return pa.schema([pa.field("value", pa.int64())])
234234

235-
def cardinality(self) -> CardinalityInfo:
235+
def cardinality(self) -> TableCardinality:
236236
"""Return cardinality of exactly one row."""
237-
return CardinalityInfo(estimate=1, max=1)
237+
return TableCardinality(estimate=1, max=1)
238238

239239
def process(self) -> OutputGenerator:
240240
"""Emit a single batch with one row."""
@@ -298,9 +298,9 @@ def output_schema(self) -> pa.Schema:
298298
]
299299
return pa.schema(fields)
300300

301-
def cardinality(self) -> CardinalityInfo:
301+
def cardinality(self) -> TableCardinality:
302302
"""Return cardinality estimate."""
303-
return CardinalityInfo(estimate=self.count, max=self.count)
303+
return TableCardinality(estimate=self.count, max=self.count)
304304

305305
def setup(self) -> None:
306306
"""Initialize random number generator with seed."""
@@ -465,13 +465,13 @@ def output_schema(self) -> pa.Schema:
465465
"""Return output schema with single integer column."""
466466
return pa.schema([pa.field("value", pa.int64())])
467467

468-
def cardinality(self) -> CardinalityInfo:
468+
def cardinality(self) -> TableCardinality:
469469
"""Return cardinality estimate.
470470
471471
Since work is distributed dynamically via queue, we can only provide
472472
the total count estimate, not per-worker estimates.
473473
"""
474-
return CardinalityInfo(estimate=self.count, max=self.count)
474+
return TableCardinality(estimate=self.count, max=self.count)
475475

476476
def perform_init(self, init_input: pa.RecordBatch) -> InitResult:
477477
"""Populate the work queue with range chunks."""
@@ -582,9 +582,9 @@ def output_schema(self) -> pa.Schema:
582582
"""Return the projected schema based on init_data."""
583583
return self.apply_projection(self.FULL_SCHEMA)
584584

585-
def cardinality(self) -> CardinalityInfo:
585+
def cardinality(self) -> TableCardinality:
586586
"""Return exact cardinality since we know the count."""
587-
return CardinalityInfo(estimate=self.count, max=self.count)
587+
return TableCardinality(estimate=self.count, max=self.count)
588588

589589
def _get_projected_column_indices(self) -> list[int]:
590590
"""Get the column indices to generate.

vgi/examples/table_in_out.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from vgi.ipc_utils import RecordBatchState
3535
from vgi.log import Level, Message
3636
from vgi.metadata import FunctionExample
37-
from vgi.table_function import CardinalityInfo
37+
from vgi.table_function import TableCardinality
3838
from vgi.table_in_out_function import (
3939
Output,
4040
OutputGenerator,
@@ -357,9 +357,9 @@ class Meta:
357357

358358
data: TableInput = Arg[TableInput](0, doc="Input table with numeric columns") # type: ignore[assignment]
359359

360-
def cardinality(self) -> CardinalityInfo | None:
360+
def cardinality(self) -> TableCardinality | None:
361361
"""Return cardinality estimate of exactly 1 row."""
362-
return CardinalityInfo(estimate=1, max=1)
362+
return TableCardinality(estimate=1, max=1)
363363

364364
def __init__(
365365
self, invocation: Invocation, logger: structlog.stdlib.BoundLogger
@@ -676,9 +676,9 @@ def __init__(
676676
super().__init__(invocation=invocation, logger=logger)
677677
self.sums: dict[str, pa.Scalar[Any]] = {}
678678

679-
def cardinality(self) -> CardinalityInfo | None:
679+
def cardinality(self) -> TableCardinality | None:
680680
"""Return cardinality estimate of exactly 1 row."""
681-
return CardinalityInfo(estimate=1, max=1)
681+
return TableCardinality(estimate=1, max=1)
682682

683683
@property
684684
def output_schema(self) -> pa.Schema:

0 commit comments

Comments
 (0)