From d92093c4e1ca04e9b3f19af6e6b84d2548d2d27c Mon Sep 17 00:00:00 2001 From: Rusty Conover Date: Mon, 5 Jan 2026 12:02:37 -0500 Subject: [PATCH 1/2] perf: Add slots=True to ArgumentSpec dataclass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduces memory footprint and improves attribute access speed for ArgumentSpec instances during function introspection. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .beads/issues.jsonl | 2 +- vgi/argument_spec.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index c6cf40b..36e6c42 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -10,7 +10,7 @@ {"id":"vgi-python-67w","title":"Create example function using DuckDB settings","description":"Create an example function that demonstrates using DuckDB settings to determine its output.\n\nRequirements:\n- Function declares required_settings in Meta\n- Output schema depends on a setting value (e.g., include extra column based on setting)\n- Clear documentation showing the pattern\n\nExample ideas:\n1. TimezoneAwareFunction: Output includes timezone info based on 'timezone' setting\n2. VerboseOutput: Adds debug columns when 'debug_mode' setting is true\n3. NumericPrecision: Uses 'numeric_precision' to determine output type precision\n\nAdd to vgi/examples/ and register in ExampleWorker.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:48.503681-05:00","created_by":"rusty","updated_at":"2026-01-04T13:22:23.779895-05:00","closed_at":"2026-01-04T13:22:23.779895-05:00","close_reason":"Added SettingsAwareFunction example","dependencies":[{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-c2b","type":"blocks","created_at":"2026-01-04T13:06:13.865474-05:00","created_by":"rusty"},{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-ivf","type":"blocks","created_at":"2026-01-04T13:06:13.890269-05:00","created_by":"rusty"},{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-bqb","type":"blocks","created_at":"2026-01-04T13:06:13.912531-05:00","created_by":"rusty"},{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-a99","type":"blocks","created_at":"2026-01-04T13:06:13.936552-05:00","created_by":"rusty"},{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-j4t","type":"blocks","created_at":"2026-01-04T13:06:13.958494-05:00","created_by":"rusty"}]} {"id":"vgi-python-6kr","title":"Test RowCountMismatchError when output exceeds input rows","notes":"Coverage: 86% in vgi/scalar_function.py. Missing tests for:\n- Lines 134-142: Error message when output has MORE rows than input\n\nCurrent tests cover when output \u003c input but not output \u003e input.\nNeed a test that returns an array with more elements than input rows.","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-04T22:15:26.097532-05:00","created_by":"rusty","updated_at":"2026-01-04T22:32:00.720462-05:00","closed_at":"2026-01-04T22:32:00.720462-05:00","close_reason":"Added test for RowCountMismatchError when output exceeds input. Coverage improved from 86% to 93%."} {"id":"vgi-python-6o0","title":"Consolidate _OutputComplete classes into shared module","description":"Three nearly identical _OutputComplete classes exist in scalar_function.py:168-197 (_ScalarOutputComplete), table_function.py:136-175 (_OutputComplete), and table_in_out_function.py:356-400 (_OutputComplete). All are frozen dataclasses with batch field, log_message field, and from_process_result() classmethod. Extract to shared module (e.g., vgi/protocol_types.py) with a single parameterized class.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T20:06:40.893139-05:00","created_by":"rusty","updated_at":"2026-01-04T21:18:34.529683-05:00","closed_at":"2026-01-04T21:18:34.529683-05:00","close_reason":"PR #5 created: https://github.com/Query-farm/vgi-python/pull/5"} -{"id":"vgi-python-790","title":"Add slots=True to ArgumentSpec dataclass","description":"ArgumentSpec is a frozen dataclass but doesn't use slots=True. Adding slots=True would reduce memory footprint and improve attribute access speed, which matters if many specs are created during introspection.","status":"open","priority":4,"issue_type":"task","created_at":"2026-01-05T11:51:20.675386-05:00","created_by":"rusty","updated_at":"2026-01-05T11:51:20.675386-05:00"} +{"id":"vgi-python-790","title":"Add slots=True to ArgumentSpec dataclass","description":"ArgumentSpec is a frozen dataclass but doesn't use slots=True. Adding slots=True would reduce memory footprint and improve attribute access speed, which matters if many specs are created during introspection.","status":"in_progress","priority":4,"issue_type":"task","created_at":"2026-01-05T11:51:20.675386-05:00","created_by":"rusty","updated_at":"2026-01-05T12:02:16.137844-05:00"} {"id":"vgi-python-79e","title":"Unify ProtocolInput classes with shared base","description":"ProtocolInput classes in scalar_function.py:151-166 and table_in_out_function.py:109-142 have similar structure with batch and metadata fields. The table_in_out version adds is_finalize logic. Create shared base ProtocolInput in protocol_types.py with table_in_out extending it.","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:41.31917-05:00","created_by":"rusty","updated_at":"2026-01-04T21:53:26.965345-05:00","closed_at":"2026-01-04T21:53:26.965345-05:00","close_reason":"PR #9 created - unified ProtocolInput with shared base in protocol_types.py"} {"id":"vgi-python-8ra","title":"Implement Arrow-based argument specification serialization","description":"## Overview\n\nImplement serialization and deserialization of function argument specifications using Apache Arrow schemas. This enables functions to describe their argument signatures (types, positions, special markers) in a format that can be transmitted over IPC and understood by DuckDB for function registration.\n\n## Design\n\nUses a **single Arrow schema** where:\n- Positional arguments come first (field order = position index)\n- Named arguments follow (marked with `vgi_arg=named` metadata)\n- Special types (TableInput, AnyArrow, varargs) use field metadata markers\n\n## Key Components\n\n1. `ArgumentSpec` dataclass - represents one argument's specification\n2. `argument_specs_to_schema()` - convert specs to Arrow schema\n3. `schema_to_argument_specs()` - convert schema back to specs\n4. `extract_argument_specs()` - extract specs from function class Arg descriptors\n\n## Metadata Keys\n\n| Key | Value | Meaning |\n|-----|-------|---------|\n| `vgi_arg` | `named` | Named argument (not positional) |\n| `vgi_type` | `table` | Receives table input (Arg[TableInput]) |\n| `vgi_type` | `any` | Accepts any Arrow type (Arg[AnyArrow]) |\n| `vgi_varargs` | `true` | Collects remaining positional args |\n\n## References\n\n- Plan file: `.claude/plans/purrfect-foraging-nygaard.md`\n- Arguments module: `vgi/arguments.py`","status":"closed","priority":2,"issue_type":"feature","created_at":"2026-01-05T11:18:01.05631-05:00","created_by":"rusty","updated_at":"2026-01-05T11:34:12.712096-05:00","closed_at":"2026-01-05T11:34:12.712096-05:00","close_reason":"Implemented Arrow-based argument specification serialization with tests and documentation"} {"id":"vgi-python-a99","title":"Add settings accessor to function base classes","description":"Add a property to access DuckDB settings values in function implementations.\n\nChanges needed:\n- Add 'settings: dict[str, str]' property to Function base class\n- Property should return self.invocation.duckdb_settings or empty dict\n- Add convenience method like 'get_setting(name, default=None)'\n- Update ScalarFunction, TableFunctionGenerator, TableInOutFunction\n\nExample usage in function:\ndef compute(self, batch):\n tz = self.get_setting('timezone', 'UTC')\n # or\n tz = self.settings.get('timezone', 'UTC')","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:48.221602-05:00","created_by":"rusty","updated_at":"2026-01-04T13:20:41.171991-05:00","closed_at":"2026-01-04T13:20:41.171991-05:00","close_reason":"Implementation complete, all tests pass","dependencies":[{"issue_id":"vgi-python-a99","depends_on_id":"vgi-python-aad","type":"blocks","created_at":"2026-01-04T13:06:13.738212-05:00","created_by":"rusty"}]} diff --git a/vgi/argument_spec.py b/vgi/argument_spec.py index 3133641..e5edad0 100644 --- a/vgi/argument_spec.py +++ b/vgi/argument_spec.py @@ -87,7 +87,7 @@ def _argument_spec_sort_key(spec: "ArgumentSpec") -> tuple[int, int | str]: # ============================================================================= -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) class ArgumentSpec: """Specification for a single function argument. From ec15d9690bf0f5d2ec68f9d07626266d0ecf44a2 Mon Sep 17 00:00:00 2001 From: Rusty Conover Date: Mon, 5 Jan 2026 12:02:54 -0500 Subject: [PATCH 2/2] bd sync: 2026-01-05 12:02:54 --- .beads/issues.jsonl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 36e6c42..0590853 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -10,7 +10,7 @@ {"id":"vgi-python-67w","title":"Create example function using DuckDB settings","description":"Create an example function that demonstrates using DuckDB settings to determine its output.\n\nRequirements:\n- Function declares required_settings in Meta\n- Output schema depends on a setting value (e.g., include extra column based on setting)\n- Clear documentation showing the pattern\n\nExample ideas:\n1. TimezoneAwareFunction: Output includes timezone info based on 'timezone' setting\n2. VerboseOutput: Adds debug columns when 'debug_mode' setting is true\n3. NumericPrecision: Uses 'numeric_precision' to determine output type precision\n\nAdd to vgi/examples/ and register in ExampleWorker.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:48.503681-05:00","created_by":"rusty","updated_at":"2026-01-04T13:22:23.779895-05:00","closed_at":"2026-01-04T13:22:23.779895-05:00","close_reason":"Added SettingsAwareFunction example","dependencies":[{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-c2b","type":"blocks","created_at":"2026-01-04T13:06:13.865474-05:00","created_by":"rusty"},{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-ivf","type":"blocks","created_at":"2026-01-04T13:06:13.890269-05:00","created_by":"rusty"},{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-bqb","type":"blocks","created_at":"2026-01-04T13:06:13.912531-05:00","created_by":"rusty"},{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-a99","type":"blocks","created_at":"2026-01-04T13:06:13.936552-05:00","created_by":"rusty"},{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-j4t","type":"blocks","created_at":"2026-01-04T13:06:13.958494-05:00","created_by":"rusty"}]} {"id":"vgi-python-6kr","title":"Test RowCountMismatchError when output exceeds input rows","notes":"Coverage: 86% in vgi/scalar_function.py. Missing tests for:\n- Lines 134-142: Error message when output has MORE rows than input\n\nCurrent tests cover when output \u003c input but not output \u003e input.\nNeed a test that returns an array with more elements than input rows.","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-04T22:15:26.097532-05:00","created_by":"rusty","updated_at":"2026-01-04T22:32:00.720462-05:00","closed_at":"2026-01-04T22:32:00.720462-05:00","close_reason":"Added test for RowCountMismatchError when output exceeds input. Coverage improved from 86% to 93%."} {"id":"vgi-python-6o0","title":"Consolidate _OutputComplete classes into shared module","description":"Three nearly identical _OutputComplete classes exist in scalar_function.py:168-197 (_ScalarOutputComplete), table_function.py:136-175 (_OutputComplete), and table_in_out_function.py:356-400 (_OutputComplete). All are frozen dataclasses with batch field, log_message field, and from_process_result() classmethod. Extract to shared module (e.g., vgi/protocol_types.py) with a single parameterized class.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T20:06:40.893139-05:00","created_by":"rusty","updated_at":"2026-01-04T21:18:34.529683-05:00","closed_at":"2026-01-04T21:18:34.529683-05:00","close_reason":"PR #5 created: https://github.com/Query-farm/vgi-python/pull/5"} -{"id":"vgi-python-790","title":"Add slots=True to ArgumentSpec dataclass","description":"ArgumentSpec is a frozen dataclass but doesn't use slots=True. Adding slots=True would reduce memory footprint and improve attribute access speed, which matters if many specs are created during introspection.","status":"in_progress","priority":4,"issue_type":"task","created_at":"2026-01-05T11:51:20.675386-05:00","created_by":"rusty","updated_at":"2026-01-05T12:02:16.137844-05:00"} +{"id":"vgi-python-790","title":"Add slots=True to ArgumentSpec dataclass","description":"ArgumentSpec is a frozen dataclass but doesn't use slots=True. Adding slots=True would reduce memory footprint and improve attribute access speed, which matters if many specs are created during introspection.","status":"closed","priority":4,"issue_type":"task","created_at":"2026-01-05T11:51:20.675386-05:00","created_by":"rusty","updated_at":"2026-01-05T12:02:54.104187-05:00","closed_at":"2026-01-05T12:02:54.104187-05:00","close_reason":"Closed"} {"id":"vgi-python-79e","title":"Unify ProtocolInput classes with shared base","description":"ProtocolInput classes in scalar_function.py:151-166 and table_in_out_function.py:109-142 have similar structure with batch and metadata fields. The table_in_out version adds is_finalize logic. Create shared base ProtocolInput in protocol_types.py with table_in_out extending it.","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:41.31917-05:00","created_by":"rusty","updated_at":"2026-01-04T21:53:26.965345-05:00","closed_at":"2026-01-04T21:53:26.965345-05:00","close_reason":"PR #9 created - unified ProtocolInput with shared base in protocol_types.py"} {"id":"vgi-python-8ra","title":"Implement Arrow-based argument specification serialization","description":"## Overview\n\nImplement serialization and deserialization of function argument specifications using Apache Arrow schemas. This enables functions to describe their argument signatures (types, positions, special markers) in a format that can be transmitted over IPC and understood by DuckDB for function registration.\n\n## Design\n\nUses a **single Arrow schema** where:\n- Positional arguments come first (field order = position index)\n- Named arguments follow (marked with `vgi_arg=named` metadata)\n- Special types (TableInput, AnyArrow, varargs) use field metadata markers\n\n## Key Components\n\n1. `ArgumentSpec` dataclass - represents one argument's specification\n2. `argument_specs_to_schema()` - convert specs to Arrow schema\n3. `schema_to_argument_specs()` - convert schema back to specs\n4. `extract_argument_specs()` - extract specs from function class Arg descriptors\n\n## Metadata Keys\n\n| Key | Value | Meaning |\n|-----|-------|---------|\n| `vgi_arg` | `named` | Named argument (not positional) |\n| `vgi_type` | `table` | Receives table input (Arg[TableInput]) |\n| `vgi_type` | `any` | Accepts any Arrow type (Arg[AnyArrow]) |\n| `vgi_varargs` | `true` | Collects remaining positional args |\n\n## References\n\n- Plan file: `.claude/plans/purrfect-foraging-nygaard.md`\n- Arguments module: `vgi/arguments.py`","status":"closed","priority":2,"issue_type":"feature","created_at":"2026-01-05T11:18:01.05631-05:00","created_by":"rusty","updated_at":"2026-01-05T11:34:12.712096-05:00","closed_at":"2026-01-05T11:34:12.712096-05:00","close_reason":"Implemented Arrow-based argument specification serialization with tests and documentation"} {"id":"vgi-python-a99","title":"Add settings accessor to function base classes","description":"Add a property to access DuckDB settings values in function implementations.\n\nChanges needed:\n- Add 'settings: dict[str, str]' property to Function base class\n- Property should return self.invocation.duckdb_settings or empty dict\n- Add convenience method like 'get_setting(name, default=None)'\n- Update ScalarFunction, TableFunctionGenerator, TableInOutFunction\n\nExample usage in function:\ndef compute(self, batch):\n tz = self.get_setting('timezone', 'UTC')\n # or\n tz = self.settings.get('timezone', 'UTC')","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:48.221602-05:00","created_by":"rusty","updated_at":"2026-01-04T13:20:41.171991-05:00","closed_at":"2026-01-04T13:20:41.171991-05:00","close_reason":"Implementation complete, all tests pass","dependencies":[{"issue_id":"vgi-python-a99","depends_on_id":"vgi-python-aad","type":"blocks","created_at":"2026-01-04T13:06:13.738212-05:00","created_by":"rusty"}]}