From f4a1b3610786362980e5fb1afffc7f799e330ba0 Mon Sep 17 00:00:00 2001 From: Rusty Conover Date: Mon, 5 Jan 2026 19:40:37 -0500 Subject: [PATCH] Add InvocationType.CATALOG for catalog interface invocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend InvocationType enum to support catalog method invocations: - CATALOG = "catalog" for CatalogInterface method calls - Documents simplified protocol: invoke → stream (no bind→init→stream) - function_name field contains method name (e.g., 'catalog_attach') - Input batch has 1 row with columns matching method parameters 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .beads/issues.jsonl | 4 ++-- vgi/invocation.py | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 1480617..a12959a 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -1,4 +1,4 @@ -{"id":"vgi-python-085","title":"Add serialize/deserialize methods to catalog dataclasses","description":"Add Arrow IPC serialization directly to the dataclasses in vgi/catalog/catalog_interface.py.\n\nAdd to each dataclass:\n- serialize() -\u003e bytes method\n- @classmethod deserialize(batch: pa.RecordBatch) -\u003e Self method\n- Arrow schema class variable for each type\n\nDataclasses to update:\n- CatalogAttachResult\n- SchemaInfo \n- TableInfo\n- ViewInfo\n- FunctionInfo\n- ScanFunctionResult\n\nSerialization rules from plan:\n- Single-row batches for scalar returns\n- Multi-row batches for streaming (Iterable returns)\n- None = 0-row/0-column batch\n- Column names match field names exactly\n- SerializedSchema fields use pa.binary()\n- tags fields use pa.map_(pa.string(), pa.string())\n\nAlso create vgi/catalog/__init__.py with package exports.","status":"open","priority":1,"issue_type":"task","created_at":"2026-01-05T19:26:40.362177-05:00","created_by":"rusty","updated_at":"2026-01-05T19:26:40.362177-05:00"} +{"id":"vgi-python-085","title":"Add serialize/deserialize methods to catalog dataclasses","description":"Add Arrow IPC serialization directly to the dataclasses in vgi/catalog/catalog_interface.py.\n\nAdd to each dataclass:\n- serialize() -\u003e bytes method\n- @classmethod deserialize(batch: pa.RecordBatch) -\u003e Self method\n- Arrow schema class variable for each type\n\nDataclasses to update:\n- CatalogAttachResult\n- SchemaInfo \n- TableInfo\n- ViewInfo\n- FunctionInfo\n- ScanFunctionResult\n\nSerialization rules from plan:\n- Single-row batches for scalar returns\n- Multi-row batches for streaming (Iterable returns)\n- None = 0-row/0-column batch\n- Column names match field names exactly\n- SerializedSchema fields use pa.binary()\n- tags fields use pa.map_(pa.string(), pa.string())\n\nAlso create vgi/catalog/__init__.py with package exports.","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-05T19:26:40.362177-05:00","created_by":"rusty","updated_at":"2026-01-05T19:39:06.385062-05:00","closed_at":"2026-01-05T19:39:06.385062-05:00","close_reason":"PR #24 created with serialize/deserialize methods"} {"id":"vgi-python-0fe","title":"Add is_varargs to ParameterInfo and metadata extraction","description":"In vgi/metadata.py:\n- Add is_varargs: bool = False to ParameterInfo\n- Update to_dict() and from_dict()\n- Add is_varargs field to _PARAMETER_STRUCT for Arrow serialization\n- Extract varargs flag in extract_parameters()\n- Add _validate_varargs() with rules:\n - Only one varargs parameter allowed\n - Must be positional (not named)\n - Must be last positional (before TableInput if present)\n - Cannot have default value","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-05T10:49:20.141375-05:00","created_by":"rusty","updated_at":"2026-01-05T10:58:21.242603-05:00","closed_at":"2026-01-05T10:58:21.242603-05:00","close_reason":"Added is_varargs to ParameterInfo, _PARAMETER_STRUCT, extract_parameters(), and _validate_varargs()","dependencies":[{"issue_id":"vgi-python-0fe","depends_on_id":"vgi-python-jrf","type":"blocks","created_at":"2026-01-05T10:49:26.421664-05:00","created_by":"rusty"}]} {"id":"vgi-python-0hr","title":"Remove redundant InitInputType class attribute","description":"InitInputType class attribute duplicates the generic type parameter: 'class ScalarFunctionGenerator(Function[FunctionInitInput])' already specifies the type, but 'InitInputType = FunctionInitInput' repeats it. Investigate using get_type_hints or __orig_bases__ to infer the type and remove the redundant attribute.","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:53.780529-05:00","created_by":"rusty","updated_at":"2026-01-04T22:00:40.221423-05:00","closed_at":"2026-01-04T22:00:40.221423-05:00","close_reason":"PR #10 created - uses _get_init_input_type() to infer type from generic parameter"} {"id":"vgi-python-1s5","title":"Move distributed state management to optional mixin","description":"The Function base class in function.py includes ~200 lines for distributed state management (store_state, collect_states, enqueue_work, dequeue_work, work queue storage). Not all functions need this. Extract to DistributedStateMixin that functions can opt into, keeping Function base class simpler for basic use cases.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T20:06:53.606614-05:00","created_by":"rusty","updated_at":"2026-01-04T21:22:09.772825-05:00","closed_at":"2026-01-04T21:22:09.772825-05:00","close_reason":"Analysis complete: extraction not recommended. The distributed state methods are tightly coupled with execution_identifier and storage, which are used by core initialization methods. Extraction would require moving initialize_global_state/load_global_state to the mixin, breaking the protocol and requiring multiple inheritance. Current API is already opt-in (just don't call the methods) and well-documented."} @@ -68,7 +68,7 @@ {"id":"vgi-python-odi","title":"Change max_processes from method to property in Function hierarchy","description":"Refactor max_processes from a method to a property across the Function class hierarchy (Function, ScalarFunction, TableFunctionGenerator, TableInOutFunction, etc.). This makes the API more consistent since max_processes is effectively a constant per function class and properties are more idiomatic for such values.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T11:25:29.750648-05:00","created_by":"rusty","updated_at":"2026-01-04T11:50:57.566545-05:00","closed_at":"2026-01-04T11:50:57.566545-05:00","close_reason":"Closed"} {"id":"vgi-python-p91","title":"Move exception classes from function.py to own file","description":"Move InitIdentifierError and SchemaValidationError from vgi/function.py to a new vgi/exceptions.py file. Update imports in function.py and any other files that reference these exceptions.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T09:12:28.058227-05:00","created_by":"rusty","updated_at":"2026-01-04T09:17:52.477661-05:00","closed_at":"2026-01-04T09:17:52.477661-05:00","close_reason":"Closed"} {"id":"vgi-python-pnm","title":"Create vgi/catalog/read_only_catalog.py - ReadOnlyCatalogInterface","description":"Create ReadOnlyCatalogInterface that prevents all DDL operations.\n\nFiles to create:\n- vgi/catalog/read_only_catalog.py\n\nReadOnlyCatalogInterface(CatalogInterface):\n- Override all DDL methods to raise ReadOnlyError\n- catalog_create, catalog_drop\n- schema_create, schema_drop\n- All table_* DDL methods\n- All view_* DDL methods\n- Transaction methods (optional - could allow read-only transactions)\n\nProperties:\n- supports_transactions = False (class attribute)\n- catalog_version_frozen = True (class attribute)\n\nCreate ReadOnlyError exception class in vgi/exceptions.py.\n\nInclude tests that verify all DDL operations raise ReadOnlyError.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-05T19:17:30.998165-05:00","created_by":"rusty","updated_at":"2026-01-05T19:21:50.075345-05:00","closed_at":"2026-01-05T19:21:50.075345-05:00","close_reason":"User requested closure","dependencies":[{"issue_id":"vgi-python-pnm","depends_on_id":"vgi-python-ik9","type":"blocks","created_at":"2026-01-05T19:18:36.574236-05:00","created_by":"rusty"}]} -{"id":"vgi-python-po3","title":"Add InvocationType.CATALOG to protocol","description":"Extend InvocationType enum to support catalog invocations.\n\nFile: vgi/invocation.py\n\nChanges:\n1. Add CATALOG = 'catalog' to InvocationType enum\n2. Update docstring to document the new type\n\nThe CATALOG invocation type indicates:\n- function_name field contains a CatalogInterface method name (e.g., 'catalog_attach', 'schemas', 'table_get')\n- Simplified protocol: invoke → stream (no bind→init→stream phases)\n- Input batch has exactly 1 row with column names matching method parameters\n\nEnsure existing serialization/deserialization handles the new value.","status":"open","priority":1,"issue_type":"task","created_at":"2026-01-05T19:26:40.477214-05:00","created_by":"rusty","updated_at":"2026-01-05T19:26:40.477214-05:00"} +{"id":"vgi-python-po3","title":"Add InvocationType.CATALOG to protocol","description":"Extend InvocationType enum to support catalog invocations.\n\nFile: vgi/invocation.py\n\nChanges:\n1. Add CATALOG = 'catalog' to InvocationType enum\n2. Update docstring to document the new type\n\nThe CATALOG invocation type indicates:\n- function_name field contains a CatalogInterface method name (e.g., 'catalog_attach', 'schemas', 'table_get')\n- Simplified protocol: invoke → stream (no bind→init→stream phases)\n- Input batch has exactly 1 row with column names matching method parameters\n\nEnsure existing serialization/deserialization handles the new value.","status":"in_progress","priority":1,"issue_type":"task","created_at":"2026-01-05T19:26:40.477214-05:00","created_by":"rusty","updated_at":"2026-01-05T19:39:29.693569-05:00"} {"id":"vgi-python-q1w","title":"Implement optional CatalogStorage with SQLite default","description":"Create optional storage layer for catalog attach_id and transaction_id persistence.\n\nFile: vgi/catalog/storage.py\n\nCatalogStorage protocol:\n- attach_put(attach_id, catalog_name, options) -\u003e None\n- attach_get(attach_id) -\u003e tuple[str, dict] | None\n- attach_delete(attach_id) -\u003e None\n- attach_list() -\u003e list[AttachId]\n- transaction_put(transaction_id, attach_id, state) -\u003e None\n- transaction_get(transaction_id) -\u003e tuple[AttachId, bytes] | None\n- transaction_delete(transaction_id) -\u003e None\n\nCatalogStorageSqlite implementation:\n- Default location: ~/.state/vgi/vgi_catalog.db\n- WAL mode for concurrent access\n- Similar pattern to FunctionStorageSqlite\n\nUsage:\n- CatalogInterface subclasses can optionally use storage\n- Simple catalogs can ignore (return empty attach_id bytes)\n- Catalogs needing persistence override storage attribute\n\nAdd storage class attribute to CatalogInterface with None default.","status":"open","priority":2,"issue_type":"task","created_at":"2026-01-05T19:27:15.084387-05:00","created_by":"rusty","updated_at":"2026-01-05T19:27:15.084387-05:00"} {"id":"vgi-python-qud","title":"Test FunctionStorageSqlite: global_delete, global_exists, queue_clear","notes":"Coverage: 83% in vgi/function_storage.py. Missing tests for:\n- Line 266: KeyError path in global_get (key not found)\n- Lines 273-278: global_delete method\n- Lines 282-290: global_exists method \n- Line 337: queue_push with empty list\n- Lines 376-385: queue_clear method\n\nThese storage operations need direct unit tests to ensure correctness.","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-04T22:15:25.982124-05:00","created_by":"rusty","updated_at":"2026-01-04T22:30:05.625934-05:00","closed_at":"2026-01-04T22:30:05.625934-05:00","close_reason":"Added comprehensive tests for FunctionStorageSqlite. Coverage improved from 83% to 98%."} {"id":"vgi-python-r3t","title":"Consolidate test client infrastructure in testing.py","description":"testing.py has three test client classes (FunctionTestClient, TableFunctionTestClient, ScalarFunctionTestClient) with shared infrastructure patterns. Extend _BaseTestClient pattern to reduce code duplication. Consider using a single unified client with method dispatch based on function type.","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:53.913912-05:00","created_by":"rusty","updated_at":"2026-01-04T22:02:51.368907-05:00","closed_at":"2026-01-04T22:02:51.368907-05:00","close_reason":"Not warranted - _BaseTestClient already provides shared infrastructure (context manager, log capture, logging). The three clients handle genuinely different protocols (TableInOut with finalize, TableFunction with no input, Scalar with different protocol). Unifying would add type detection complexity without real benefit."} diff --git a/vgi/invocation.py b/vgi/invocation.py index 5978015..1828274 100644 --- a/vgi/invocation.py +++ b/vgi/invocation.py @@ -4,7 +4,7 @@ in the VGI protocol. Classes: - InvocationType: Enum distinguishing scalar vs table invocation types. + InvocationType: Enum distinguishing scalar, table, and catalog invocation types. InitResult: Result from global initialization phase. Invocation: Complete function invocation request. @@ -46,11 +46,17 @@ class InvocationType(Enum): SCALAR: Scalar function that transforms input batches to single-column output. TABLE: Table function (either generator or table-in-out) that produces multi-column output. + CATALOG: Catalog interface method invocation. The function_name field + contains the CatalogInterface method name (e.g., 'catalog_attach', + 'schemas', 'table_get'). Uses simplified protocol: invoke → stream + (no bind→init→stream phases). Input batch has exactly 1 row with + column names matching method parameters. """ SCALAR = "scalar" TABLE = "table" + CATALOG = "catalog" @dataclass(frozen=True, slots=True)