From b99ee015cb9cebf92a4763dc53f57565c6f77aef Mon Sep 17 00:00:00 2001 From: Rusty Conover Date: Sun, 4 Jan 2026 21:49:29 -0500 Subject: [PATCH] refactor: extract exception handling to Function base class MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move _should_terminate and add _create_error_output helper to the Function base class, eliminating duplication across ScalarFunctionGenerator, TableFunctionGenerator, and TableInOutGeneratorFunction. - Add _should_terminate() to Function (was duplicated 3x) - Add _create_error_output() helper to Function - Update _process_with_exception_handling in all 3 classes to use helper - Net reduction of 9 lines of code 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .beads/issues.jsonl | 4 ++-- vgi/function.py | 18 ++++++++++++++++++ vgi/scalar_function.py | 13 ++----------- vgi/table_function.py | 13 ++----------- vgi/table_in_out_function.py | 13 ++----------- 5 files changed, 26 insertions(+), 35 deletions(-) diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 72ce516..d3fd059 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -8,7 +8,7 @@ {"id":"vgi-python-79e","title":"Unify ProtocolInput classes with shared base","description":"ProtocolInput classes in scalar_function.py:151-166 and table_in_out_function.py:109-142 have similar structure with batch and metadata fields. The table_in_out version adds is_finalize logic. Create shared base ProtocolInput in protocol_types.py with table_in_out extending it.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:41.31917-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:16.240397-05:00"} {"id":"vgi-python-a99","title":"Add settings accessor to function base classes","description":"Add a property to access DuckDB settings values in function implementations.\n\nChanges needed:\n- Add 'settings: dict[str, str]' property to Function base class\n- Property should return self.invocation.duckdb_settings or empty dict\n- Add convenience method like 'get_setting(name, default=None)'\n- Update ScalarFunction, TableFunctionGenerator, TableInOutFunction\n\nExample usage in function:\ndef compute(self, batch):\n tz = self.get_setting('timezone', 'UTC')\n # or\n tz = self.settings.get('timezone', 'UTC')","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:48.221602-05:00","created_by":"rusty","updated_at":"2026-01-04T13:20:41.171991-05:00","closed_at":"2026-01-04T13:20:41.171991-05:00","close_reason":"Implementation complete, all tests pass","dependencies":[{"issue_id":"vgi-python-a99","depends_on_id":"vgi-python-aad","type":"blocks","created_at":"2026-01-04T13:06:13.738212-05:00","created_by":"rusty"}]} {"id":"vgi-python-aad","title":"Design: DuckDB settings/pragmas access for VGI functions","description":"Design how VGI functions can declare required DuckDB settings/pragmas in their Meta class, and how these settings values should be passed during the bind phase.\n\nKey design decisions:\n1. How to declare required settings in function Meta (e.g., required_settings = ['timezone', 'threads'])\n2. How to add settings to Invocation dataclass\n3. How settings values should be accessed in function code\n4. Serialization format for settings in Arrow IPC\n\nRecommendation: Add 'duckdb_settings: dict[str, str] | None' to Invocation and 'required_settings: list[str]' to Meta class.","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-04T13:05:47.619105-05:00","created_by":"rusty","updated_at":"2026-01-04T13:11:13.197139-05:00","closed_at":"2026-01-04T13:11:13.197139-05:00","close_reason":"Design document created at docs/design-duckdb-settings.md"} -{"id":"vgi-python-bi8","title":"Extract common _process_with_exception_handling into mixin","description":"The _process_with_exception_handling and _process_and_validate methods are duplicated across scalar_function.py:296-346, table_function.py:386-438, and table_in_out_function.py:586-642. All follow same pattern: try _process_and_validate, catch exceptions, return OutputComplete with error message. Extract to ProcessingMixin that all function types inherit from.","status":"open","priority":2,"issue_type":"task","created_at":"2026-01-04T20:06:41.02111-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:15.947758-05:00","dependencies":[{"issue_id":"vgi-python-bi8","depends_on_id":"vgi-python-6o0","type":"blocks","created_at":"2026-01-04T20:07:49.181408-05:00","created_by":"rusty"}]} +{"id":"vgi-python-bi8","title":"Extract common _process_with_exception_handling into mixin","description":"The _process_with_exception_handling and _process_and_validate methods are duplicated across scalar_function.py:296-346, table_function.py:386-438, and table_in_out_function.py:586-642. All follow same pattern: try _process_and_validate, catch exceptions, return OutputComplete with error message. Extract to ProcessingMixin that all function types inherit from.","status":"in_progress","priority":2,"issue_type":"task","created_at":"2026-01-04T20:06:41.02111-05:00","created_by":"rusty","updated_at":"2026-01-04T21:44:11.818421-05:00","dependencies":[{"issue_id":"vgi-python-bi8","depends_on_id":"vgi-python-6o0","type":"blocks","created_at":"2026-01-04T20:07:49.181408-05:00","created_by":"rusty"}]} {"id":"vgi-python-bku","title":"Change cardinality() method to property for consistency with output_schema","description":"Inconsistent access patterns: output_schema is a property but cardinality() is a method. Both return immutable data. Change cardinality() to a property for API consistency. Located in table_function.py:304-314.","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:53.211782-05:00","created_by":"rusty","updated_at":"2026-01-04T21:33:10.617152-05:00","closed_at":"2026-01-04T21:33:10.617152-05:00","close_reason":"PR #6 created: https://github.com/Query-farm/vgi-python/pull/6"} {"id":"vgi-python-bqb","title":"Update worker to handle DuckDB settings during bind","description":"Update vgi/worker.py to process DuckDB settings from Invocation during the bind phase.\n\nChanges needed:\n- Read settings from invocation.duckdb_settings\n- Validate that all required_settings (from Meta) are present in invocation\n- Pass settings to function instance for access\n- Log settings usage for debugging\n\nThe worker should validate settings early in bind to fail fast if required settings are missing.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:48.04037-05:00","created_by":"rusty","updated_at":"2026-01-04T13:20:41.17079-05:00","closed_at":"2026-01-04T13:20:41.17079-05:00","close_reason":"Implementation complete, all tests pass","dependencies":[{"issue_id":"vgi-python-bqb","depends_on_id":"vgi-python-aad","type":"blocks","created_at":"2026-01-04T13:06:13.714281-05:00","created_by":"rusty"}]} {"id":"vgi-python-c2b","title":"Add duckdb_settings field to Invocation class","description":"Update vgi/invocation.py to add a duckdb_settings field to the Invocation dataclass.\n\nChanges needed:\n- Add 'duckdb_settings: dict[str, str] | None = None' field to Invocation\n- Update serialize() to include settings in Arrow IPC batch\n- Update deserialize() to read settings from Arrow IPC batch\n- Handle None case (no settings requested)\n\nSerialization: Use a struct field with string key-value pairs or a map type.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:47.765077-05:00","created_by":"rusty","updated_at":"2026-01-04T13:20:41.167817-05:00","closed_at":"2026-01-04T13:20:41.167817-05:00","close_reason":"Implementation complete, all tests pass","dependencies":[{"issue_id":"vgi-python-c2b","depends_on_id":"vgi-python-aad","type":"blocks","created_at":"2026-01-04T13:06:13.664038-05:00","created_by":"rusty"}]} @@ -16,7 +16,7 @@ {"id":"vgi-python-e9q","title":"Unify ProtocolOutput classes with shared base","description":"ProtocolOutput classes in table_function.py:177-224 and table_in_out_function.py:144-207 share similar metadata() method and from_process_result() classmethod. The table_in_out version adds status field. Create shared base with table_in_out extending it for status support.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:41.45014-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:16.371419-05:00"} {"id":"vgi-python-ivf","title":"Add required_settings to function Meta class","description":"Update function metadata to support declaring required DuckDB settings.\n\nChanges needed:\n- Add 'required_settings: list[str]' to FunctionMeta in vgi/metadata.py\n- Update Meta class resolution in vgi/function.py\n- Add validation that required_settings is a list of strings\n- Make it available via get_metadata() for introspection\n\nExample usage:\nclass MyFunction(TableInOutFunction):\n class Meta:\n required_settings = ['timezone', 'threads']","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:47.903747-05:00","created_by":"rusty","updated_at":"2026-01-04T13:20:41.169516-05:00","closed_at":"2026-01-04T13:20:41.169516-05:00","close_reason":"Implementation complete, all tests pass","dependencies":[{"issue_id":"vgi-python-ivf","depends_on_id":"vgi-python-aad","type":"blocks","created_at":"2026-01-04T13:06:13.690253-05:00","created_by":"rusty"}]} {"id":"vgi-python-j4t","title":"Update client to pass DuckDB settings in Invocation","description":"Update vgi/client/client.py to support passing DuckDB settings.\n\nChanges needed:\n- Add 'duckdb_settings: dict[str, str] | None = None' parameter to relevant methods\n- Include settings in Invocation creation\n- Add helper to query function's required_settings from metadata\n\nThe client needs to know what settings to pass. Options:\n1. Client queries worker for function metadata first\n2. Settings passed explicitly by caller\n3. Client introspects function class if available locally","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:48.358656-05:00","created_by":"rusty","updated_at":"2026-01-04T13:20:41.173178-05:00","closed_at":"2026-01-04T13:20:41.173178-05:00","close_reason":"Implementation complete, all tests pass","dependencies":[{"issue_id":"vgi-python-j4t","depends_on_id":"vgi-python-aad","type":"blocks","created_at":"2026-01-04T13:06:13.761572-05:00","created_by":"rusty"}]} -{"id":"vgi-python-kz4","title":"Rename TableInOutGeneratorFunction to TableInOutGenerator for consistency","description":"Naming inconsistency: TableFunctionGenerator uses *Generator suffix, but TableInOutGeneratorFunction uses *GeneratorFunction suffix. Rename TableInOutGeneratorFunction to TableInOutGenerator for consistency. Also consider renaming ScalarFunctionGenerator if needed.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:41.581028-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:16.506499-05:00"} +{"id":"vgi-python-kz4","title":"Rename TableInOutGeneratorFunction to TableInOutGenerator for consistency","description":"Naming inconsistency: TableFunctionGenerator uses *Generator suffix, but TableInOutGeneratorFunction uses *GeneratorFunction suffix. Rename TableInOutGeneratorFunction to TableInOutGenerator for consistency. Also consider renaming ScalarFunctionGenerator if needed.","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:41.581028-05:00","created_by":"rusty","updated_at":"2026-01-04T21:43:58.141038-05:00","closed_at":"2026-01-04T21:43:58.141038-05:00","close_reason":"PR #7 created: https://github.com/Query-farm/vgi-python/pull/7"} {"id":"vgi-python-odi","title":"Change max_processes from method to property in Function hierarchy","description":"Refactor max_processes from a method to a property across the Function class hierarchy (Function, ScalarFunction, TableFunctionGenerator, TableInOutFunction, etc.). This makes the API more consistent since max_processes is effectively a constant per function class and properties are more idiomatic for such values.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T11:25:29.750648-05:00","created_by":"rusty","updated_at":"2026-01-04T11:50:57.566545-05:00","closed_at":"2026-01-04T11:50:57.566545-05:00","close_reason":"Closed"} {"id":"vgi-python-p91","title":"Move exception classes from function.py to own file","description":"Move InitIdentifierError and SchemaValidationError from vgi/function.py to a new vgi/exceptions.py file. Update imports in function.py and any other files that reference these exceptions.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T09:12:28.058227-05:00","created_by":"rusty","updated_at":"2026-01-04T09:17:52.477661-05:00","closed_at":"2026-01-04T09:17:52.477661-05:00","close_reason":"Closed"} {"id":"vgi-python-r3t","title":"Consolidate test client infrastructure in testing.py","description":"testing.py has three test client classes (FunctionTestClient, TableFunctionTestClient, ScalarFunctionTestClient) with shared infrastructure patterns. Extend _BaseTestClient pattern to reduce code duplication. Consider using a single unified client with method dispatch based on function type.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:53.913912-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:38.132591-05:00"} diff --git a/vgi/function.py b/vgi/function.py index e174eef..19d7621 100644 --- a/vgi/function.py +++ b/vgi/function.py @@ -42,10 +42,12 @@ import structlog import vgi.ipc_utils +import vgi.log from vgi.exceptions import ExecutionIdentifierError, SchemaValidationError from vgi.function_storage import FunctionStorage, FunctionStorageSqlite from vgi.invocation import InitResult, Invocation from vgi.metadata import MetadataMixin, ResolvedMetadata +from vgi.output_complete import OutputComplete __all__ = [ "Function", @@ -584,6 +586,22 @@ def _validate_output_schema(self, batch: pa.RecordBatch) -> None: context=f"output from {type(self).__name__}", ) + @final + def _should_terminate(self, result: OutputComplete) -> bool: + """Check if processing should terminate due to an exception.""" + return ( + result.log_message is not None + and result.log_message.level == vgi.log.Level.EXCEPTION + ) + + @final + def _create_error_output(self, exception: Exception) -> OutputComplete: + """Create an OutputComplete with error message from exception.""" + return OutputComplete( + batch=self.empty_output_batch, + log_message=vgi.log.Message.from_exception(exception), + ) + @property def input_schema(self) -> pa.Schema: """Return the input schema from the invocation. diff --git a/vgi/scalar_function.py b/vgi/scalar_function.py index c101d47..2ee9f2c 100644 --- a/vgi/scalar_function.py +++ b/vgi/scalar_function.py @@ -309,18 +309,9 @@ def _process_with_exception_handling( try: return self._process_and_validate(generator, input_batch) except Exception as e: - return OutputComplete( - batch=self.empty_output_batch, - log_message=vgi.log.Message.from_exception(e), - ) + return self._create_error_output(e) - @final - def _should_terminate(self, result: OutputComplete) -> bool: - """Check if processing should terminate due to an exception.""" - return ( - result.log_message is not None - and result.log_message.level == vgi.log.Level.EXCEPTION - ) + # _should_terminate inherited from Function @abstractmethod def process(self, batch: pa.RecordBatch) -> ScalarOutputGenerator: diff --git a/vgi/table_function.py b/vgi/table_function.py index c742726..6415b29 100644 --- a/vgi/table_function.py +++ b/vgi/table_function.py @@ -385,18 +385,9 @@ def _process_with_exception_handling( except StopIteration: raise except Exception as e: - return OutputComplete( - batch=self.empty_output_batch, - log_message=vgi.log.Message.from_exception(e), - ) + return self._create_error_output(e) - @final - def _should_terminate(self, result: OutputComplete) -> bool: - """Check if processing should terminate due to an exception.""" - return ( - result.log_message is not None - and result.log_message.level == vgi.log.Level.EXCEPTION - ) + # _should_terminate inherited from Function def process(self) -> OutputGenerator: """Process batches during the DATA phase. diff --git a/vgi/table_in_out_function.py b/vgi/table_in_out_function.py index a3d8378..0781281 100644 --- a/vgi/table_in_out_function.py +++ b/vgi/table_in_out_function.py @@ -583,18 +583,9 @@ def _process_with_exception_handling( try: return self._process_and_validate(generator, batch) except Exception as e: - return OutputComplete( - batch=self.empty_output_batch, - log_message=vgi.log.Message.from_exception(e), - ) + return self._create_error_output(e) - @final - def _should_terminate(self, result: OutputComplete) -> bool: - """Check if processing should terminate due to an exception.""" - return ( - result.log_message is not None - and result.log_message.level == vgi.log.Level.EXCEPTION - ) + # _should_terminate inherited from Function def process(self, batch: pa.RecordBatch) -> OutputGenerator: """Process input batches during the DATA phase.