From 501b72458bc3485def7ad7ef823f59e6aef397c8 Mon Sep 17 00:00:00 2001 From: Rusty Conover Date: Mon, 5 Jan 2026 12:03:35 -0500 Subject: [PATCH 1/2] refactor: Use Mapping instead of dict in extract_argument_specs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changes arg_types parameter type from dict[str, pa.DataType] to Mapping[str, pa.DataType] for more flexibility, accepting any mapping type. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .beads/issues.jsonl | 4 ++-- vgi/argument_spec.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index c6cf40b..b3e42f3 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -10,7 +10,7 @@ {"id":"vgi-python-67w","title":"Create example function using DuckDB settings","description":"Create an example function that demonstrates using DuckDB settings to determine its output.\n\nRequirements:\n- Function declares required_settings in Meta\n- Output schema depends on a setting value (e.g., include extra column based on setting)\n- Clear documentation showing the pattern\n\nExample ideas:\n1. TimezoneAwareFunction: Output includes timezone info based on 'timezone' setting\n2. VerboseOutput: Adds debug columns when 'debug_mode' setting is true\n3. NumericPrecision: Uses 'numeric_precision' to determine output type precision\n\nAdd to vgi/examples/ and register in ExampleWorker.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:48.503681-05:00","created_by":"rusty","updated_at":"2026-01-04T13:22:23.779895-05:00","closed_at":"2026-01-04T13:22:23.779895-05:00","close_reason":"Added SettingsAwareFunction example","dependencies":[{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-c2b","type":"blocks","created_at":"2026-01-04T13:06:13.865474-05:00","created_by":"rusty"},{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-ivf","type":"blocks","created_at":"2026-01-04T13:06:13.890269-05:00","created_by":"rusty"},{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-bqb","type":"blocks","created_at":"2026-01-04T13:06:13.912531-05:00","created_by":"rusty"},{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-a99","type":"blocks","created_at":"2026-01-04T13:06:13.936552-05:00","created_by":"rusty"},{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-j4t","type":"blocks","created_at":"2026-01-04T13:06:13.958494-05:00","created_by":"rusty"}]} {"id":"vgi-python-6kr","title":"Test RowCountMismatchError when output exceeds input rows","notes":"Coverage: 86% in vgi/scalar_function.py. Missing tests for:\n- Lines 134-142: Error message when output has MORE rows than input\n\nCurrent tests cover when output \u003c input but not output \u003e input.\nNeed a test that returns an array with more elements than input rows.","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-04T22:15:26.097532-05:00","created_by":"rusty","updated_at":"2026-01-04T22:32:00.720462-05:00","closed_at":"2026-01-04T22:32:00.720462-05:00","close_reason":"Added test for RowCountMismatchError when output exceeds input. Coverage improved from 86% to 93%."} {"id":"vgi-python-6o0","title":"Consolidate _OutputComplete classes into shared module","description":"Three nearly identical _OutputComplete classes exist in scalar_function.py:168-197 (_ScalarOutputComplete), table_function.py:136-175 (_OutputComplete), and table_in_out_function.py:356-400 (_OutputComplete). All are frozen dataclasses with batch field, log_message field, and from_process_result() classmethod. Extract to shared module (e.g., vgi/protocol_types.py) with a single parameterized class.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T20:06:40.893139-05:00","created_by":"rusty","updated_at":"2026-01-04T21:18:34.529683-05:00","closed_at":"2026-01-04T21:18:34.529683-05:00","close_reason":"PR #5 created: https://github.com/Query-farm/vgi-python/pull/5"} -{"id":"vgi-python-790","title":"Add slots=True to ArgumentSpec dataclass","description":"ArgumentSpec is a frozen dataclass but doesn't use slots=True. Adding slots=True would reduce memory footprint and improve attribute access speed, which matters if many specs are created during introspection.","status":"open","priority":4,"issue_type":"task","created_at":"2026-01-05T11:51:20.675386-05:00","created_by":"rusty","updated_at":"2026-01-05T11:51:20.675386-05:00"} +{"id":"vgi-python-790","title":"Add slots=True to ArgumentSpec dataclass","description":"ArgumentSpec is a frozen dataclass but doesn't use slots=True. Adding slots=True would reduce memory footprint and improve attribute access speed, which matters if many specs are created during introspection.","status":"closed","priority":4,"issue_type":"task","created_at":"2026-01-05T11:51:20.675386-05:00","created_by":"rusty","updated_at":"2026-01-05T12:02:54.104187-05:00","closed_at":"2026-01-05T12:02:54.104187-05:00","close_reason":"Closed"} {"id":"vgi-python-79e","title":"Unify ProtocolInput classes with shared base","description":"ProtocolInput classes in scalar_function.py:151-166 and table_in_out_function.py:109-142 have similar structure with batch and metadata fields. The table_in_out version adds is_finalize logic. Create shared base ProtocolInput in protocol_types.py with table_in_out extending it.","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:41.31917-05:00","created_by":"rusty","updated_at":"2026-01-04T21:53:26.965345-05:00","closed_at":"2026-01-04T21:53:26.965345-05:00","close_reason":"PR #9 created - unified ProtocolInput with shared base in protocol_types.py"} {"id":"vgi-python-8ra","title":"Implement Arrow-based argument specification serialization","description":"## Overview\n\nImplement serialization and deserialization of function argument specifications using Apache Arrow schemas. This enables functions to describe their argument signatures (types, positions, special markers) in a format that can be transmitted over IPC and understood by DuckDB for function registration.\n\n## Design\n\nUses a **single Arrow schema** where:\n- Positional arguments come first (field order = position index)\n- Named arguments follow (marked with `vgi_arg=named` metadata)\n- Special types (TableInput, AnyArrow, varargs) use field metadata markers\n\n## Key Components\n\n1. `ArgumentSpec` dataclass - represents one argument's specification\n2. `argument_specs_to_schema()` - convert specs to Arrow schema\n3. `schema_to_argument_specs()` - convert schema back to specs\n4. `extract_argument_specs()` - extract specs from function class Arg descriptors\n\n## Metadata Keys\n\n| Key | Value | Meaning |\n|-----|-------|---------|\n| `vgi_arg` | `named` | Named argument (not positional) |\n| `vgi_type` | `table` | Receives table input (Arg[TableInput]) |\n| `vgi_type` | `any` | Accepts any Arrow type (Arg[AnyArrow]) |\n| `vgi_varargs` | `true` | Collects remaining positional args |\n\n## References\n\n- Plan file: `.claude/plans/purrfect-foraging-nygaard.md`\n- Arguments module: `vgi/arguments.py`","status":"closed","priority":2,"issue_type":"feature","created_at":"2026-01-05T11:18:01.05631-05:00","created_by":"rusty","updated_at":"2026-01-05T11:34:12.712096-05:00","closed_at":"2026-01-05T11:34:12.712096-05:00","close_reason":"Implemented Arrow-based argument specification serialization with tests and documentation"} {"id":"vgi-python-a99","title":"Add settings accessor to function base classes","description":"Add a property to access DuckDB settings values in function implementations.\n\nChanges needed:\n- Add 'settings: dict[str, str]' property to Function base class\n- Property should return self.invocation.duckdb_settings or empty dict\n- Add convenience method like 'get_setting(name, default=None)'\n- Update ScalarFunction, TableFunctionGenerator, TableInOutFunction\n\nExample usage in function:\ndef compute(self, batch):\n tz = self.get_setting('timezone', 'UTC')\n # or\n tz = self.settings.get('timezone', 'UTC')","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:48.221602-05:00","created_by":"rusty","updated_at":"2026-01-04T13:20:41.171991-05:00","closed_at":"2026-01-04T13:20:41.171991-05:00","close_reason":"Implementation complete, all tests pass","dependencies":[{"issue_id":"vgi-python-a99","depends_on_id":"vgi-python-aad","type":"blocks","created_at":"2026-01-04T13:06:13.738212-05:00","created_by":"rusty"}]} @@ -36,7 +36,7 @@ {"id":"vgi-python-j8a","title":"Investigate named argument field name ambiguity","description":"For named arguments, position is set to field.name (the SQL key). But there's potential ambiguity between the Python attribute name and the named argument key if they could differ. Currently they're the same, but if ArgumentSpec.name (attribute) ever differs from ArgumentSpec.position (key), the schema only preserves one. Investigate whether this is a real concern or document the assumption that they're always equal.","status":"open","priority":4,"issue_type":"task","created_at":"2026-01-05T11:51:20.257539-05:00","created_by":"rusty","updated_at":"2026-01-05T11:51:20.257539-05:00"} {"id":"vgi-python-j9k","title":"Add protocol types for IPC stream writers in cli.py","notes":"Line 53: self._writer: Any = None\n\nCould define a Protocol type for the IPC stream writer interface:\n```python\nclass IPCWriter(Protocol):\n def write_batch(self, batch: pa.RecordBatch) -\u003e None: ...\n def close(self) -\u003e None: ...\n```\n\nPart of 14.17% imprecision in cli.py (34 Anys total).","status":"closed","priority":4,"issue_type":"task","created_at":"2026-01-04T22:19:50.31711-05:00","created_by":"rusty","updated_at":"2026-01-04T22:37:01.488788-05:00","closed_at":"2026-01-04T22:37:01.488788-05:00","close_reason":"Replaced _writer: Any with _writer: pq.ParquetWriter | None. Removes 1 Any and provides proper type information."} {"id":"vgi-python-jrf","title":"Add varargs parameter to Arg descriptor","description":"In vgi/arguments.py:\n- Add varargs: bool = False to Arg.__init__ and __slots__\n- Update _resolve() to collect positional[position:] when varargs=True\n- Validate at least 1 value provided\n- Update _validate() to validate each element in tuple\n- Add Arguments.get_varargs(start, type=None) method\n- Update __repr__ to show varargs flag","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-05T10:49:20.012964-05:00","created_by":"rusty","updated_at":"2026-01-05T10:55:22.479344-05:00","closed_at":"2026-01-05T10:55:22.479344-05:00","close_reason":"Implemented varargs parameter in Arg descriptor with get_varargs() method and _validate_single()"} -{"id":"vgi-python-k7x","title":"Use Mapping instead of dict in extract_argument_specs signature","description":"The arg_types parameter in extract_argument_specs() is typed as dict[str, pa.DataType]. Using Mapping[str, pa.DataType] from collections.abc would be more flexible, accepting any mapping type.","status":"open","priority":4,"issue_type":"task","created_at":"2026-01-05T11:51:21.021496-05:00","created_by":"rusty","updated_at":"2026-01-05T11:51:21.021496-05:00"} +{"id":"vgi-python-k7x","title":"Use Mapping instead of dict in extract_argument_specs signature","description":"The arg_types parameter in extract_argument_specs() is typed as dict[str, pa.DataType]. Using Mapping[str, pa.DataType] from collections.abc would be more flexible, accepting any mapping type.","status":"in_progress","priority":4,"issue_type":"task","created_at":"2026-01-05T11:51:21.021496-05:00","created_by":"rusty","updated_at":"2026-01-05T12:03:02.525562-05:00"} {"id":"vgi-python-kz4","title":"Rename TableInOutGeneratorFunction to TableInOutGenerator for consistency","description":"Naming inconsistency: TableFunctionGenerator uses *Generator suffix, but TableInOutGeneratorFunction uses *GeneratorFunction suffix. Rename TableInOutGeneratorFunction to TableInOutGenerator for consistency. Also consider renaming ScalarFunctionGenerator if needed.","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:41.581028-05:00","created_by":"rusty","updated_at":"2026-01-04T21:43:58.141038-05:00","closed_at":"2026-01-04T21:43:58.141038-05:00","close_reason":"PR #7 created: https://github.com/Query-farm/vgi-python/pull/7"} {"id":"vgi-python-l1u","title":"Consider custom __repr__ for ArgumentSpec","description":"The default dataclass __repr__ includes the full Arrow type repr which can be verbose. Consider a custom __repr__ that's more concise for debugging, e.g., 'ArgumentSpec(name=\"count\", pos=0, type=int64)' instead of showing the full pa.DataType object.","status":"open","priority":4,"issue_type":"task","created_at":"2026-01-05T11:51:21.415976-05:00","created_by":"rusty","updated_at":"2026-01-05T11:51:21.415976-05:00"} {"id":"vgi-python-lec","title":"Add test coverage for testing.py helper edge cases","notes":"Coverage: 89% in vgi/testing.py. Missing tests for:\n- Lines 421-422, 450-451: StopIteration handling in _process_batch\n- Lines 468-472: FINISHED status during data phase\n- Lines 485-486, 502-503: _finalize edge cases\n\nLow priority since these are test helpers.","status":"open","priority":4,"issue_type":"task","created_at":"2026-01-04T22:15:34.006563-05:00","created_by":"rusty","updated_at":"2026-01-04T22:16:18.592782-05:00"} diff --git a/vgi/argument_spec.py b/vgi/argument_spec.py index 3133641..0ac8851 100644 --- a/vgi/argument_spec.py +++ b/vgi/argument_spec.py @@ -34,7 +34,7 @@ """ import warnings -from collections.abc import Sequence +from collections.abc import Mapping, Sequence from dataclasses import dataclass from typing import Any, get_type_hints @@ -257,7 +257,7 @@ def schema_to_argument_specs(schema: pa.Schema) -> list[ArgumentSpec]: def extract_argument_specs( cls: type, - arg_types: dict[str, pa.DataType], + arg_types: Mapping[str, pa.DataType], ) -> list[ArgumentSpec]: """Extract ArgumentSpecs from a function class with Arg descriptors. From 7f3b44e759270fef5e584ff8004eb99a4b3fda0c Mon Sep 17 00:00:00 2001 From: Rusty Conover Date: Mon, 5 Jan 2026 12:03:51 -0500 Subject: [PATCH 2/2] bd sync: 2026-01-05 12:03:51 --- .beads/issues.jsonl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index b3e42f3..73e3814 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -36,7 +36,7 @@ {"id":"vgi-python-j8a","title":"Investigate named argument field name ambiguity","description":"For named arguments, position is set to field.name (the SQL key). But there's potential ambiguity between the Python attribute name and the named argument key if they could differ. Currently they're the same, but if ArgumentSpec.name (attribute) ever differs from ArgumentSpec.position (key), the schema only preserves one. Investigate whether this is a real concern or document the assumption that they're always equal.","status":"open","priority":4,"issue_type":"task","created_at":"2026-01-05T11:51:20.257539-05:00","created_by":"rusty","updated_at":"2026-01-05T11:51:20.257539-05:00"} {"id":"vgi-python-j9k","title":"Add protocol types for IPC stream writers in cli.py","notes":"Line 53: self._writer: Any = None\n\nCould define a Protocol type for the IPC stream writer interface:\n```python\nclass IPCWriter(Protocol):\n def write_batch(self, batch: pa.RecordBatch) -\u003e None: ...\n def close(self) -\u003e None: ...\n```\n\nPart of 14.17% imprecision in cli.py (34 Anys total).","status":"closed","priority":4,"issue_type":"task","created_at":"2026-01-04T22:19:50.31711-05:00","created_by":"rusty","updated_at":"2026-01-04T22:37:01.488788-05:00","closed_at":"2026-01-04T22:37:01.488788-05:00","close_reason":"Replaced _writer: Any with _writer: pq.ParquetWriter | None. Removes 1 Any and provides proper type information."} {"id":"vgi-python-jrf","title":"Add varargs parameter to Arg descriptor","description":"In vgi/arguments.py:\n- Add varargs: bool = False to Arg.__init__ and __slots__\n- Update _resolve() to collect positional[position:] when varargs=True\n- Validate at least 1 value provided\n- Update _validate() to validate each element in tuple\n- Add Arguments.get_varargs(start, type=None) method\n- Update __repr__ to show varargs flag","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-05T10:49:20.012964-05:00","created_by":"rusty","updated_at":"2026-01-05T10:55:22.479344-05:00","closed_at":"2026-01-05T10:55:22.479344-05:00","close_reason":"Implemented varargs parameter in Arg descriptor with get_varargs() method and _validate_single()"} -{"id":"vgi-python-k7x","title":"Use Mapping instead of dict in extract_argument_specs signature","description":"The arg_types parameter in extract_argument_specs() is typed as dict[str, pa.DataType]. Using Mapping[str, pa.DataType] from collections.abc would be more flexible, accepting any mapping type.","status":"in_progress","priority":4,"issue_type":"task","created_at":"2026-01-05T11:51:21.021496-05:00","created_by":"rusty","updated_at":"2026-01-05T12:03:02.525562-05:00"} +{"id":"vgi-python-k7x","title":"Use Mapping instead of dict in extract_argument_specs signature","description":"The arg_types parameter in extract_argument_specs() is typed as dict[str, pa.DataType]. Using Mapping[str, pa.DataType] from collections.abc would be more flexible, accepting any mapping type.","status":"closed","priority":4,"issue_type":"task","created_at":"2026-01-05T11:51:21.021496-05:00","created_by":"rusty","updated_at":"2026-01-05T12:03:51.771301-05:00","closed_at":"2026-01-05T12:03:51.771301-05:00","close_reason":"Closed"} {"id":"vgi-python-kz4","title":"Rename TableInOutGeneratorFunction to TableInOutGenerator for consistency","description":"Naming inconsistency: TableFunctionGenerator uses *Generator suffix, but TableInOutGeneratorFunction uses *GeneratorFunction suffix. Rename TableInOutGeneratorFunction to TableInOutGenerator for consistency. Also consider renaming ScalarFunctionGenerator if needed.","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:41.581028-05:00","created_by":"rusty","updated_at":"2026-01-04T21:43:58.141038-05:00","closed_at":"2026-01-04T21:43:58.141038-05:00","close_reason":"PR #7 created: https://github.com/Query-farm/vgi-python/pull/7"} {"id":"vgi-python-l1u","title":"Consider custom __repr__ for ArgumentSpec","description":"The default dataclass __repr__ includes the full Arrow type repr which can be verbose. Consider a custom __repr__ that's more concise for debugging, e.g., 'ArgumentSpec(name=\"count\", pos=0, type=int64)' instead of showing the full pa.DataType object.","status":"open","priority":4,"issue_type":"task","created_at":"2026-01-05T11:51:21.415976-05:00","created_by":"rusty","updated_at":"2026-01-05T11:51:21.415976-05:00"} {"id":"vgi-python-lec","title":"Add test coverage for testing.py helper edge cases","notes":"Coverage: 89% in vgi/testing.py. Missing tests for:\n- Lines 421-422, 450-451: StopIteration handling in _process_batch\n- Lines 468-472: FINISHED status during data phase\n- Lines 485-486, 502-503: _finalize edge cases\n\nLow priority since these are test helpers.","status":"open","priority":4,"issue_type":"task","created_at":"2026-01-04T22:15:34.006563-05:00","created_by":"rusty","updated_at":"2026-01-04T22:16:18.592782-05:00"}