From e8c390fd9a5ece9480b8b475c6589b3d3d788be6 Mon Sep 17 00:00:00 2001 From: Rusty Conover Date: Mon, 5 Jan 2026 19:38:47 -0500 Subject: [PATCH] Add serialize/deserialize methods to catalog dataclasses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Arrow IPC serialization support to all catalog dataclasses: - CatalogAttachResult - SchemaInfo - TableInfo - ViewInfo - FunctionInfo - ScanFunctionResult Each dataclass now has: - ARROW_SCHEMA class variable defining the Arrow schema - serialize() method returning bytes - deserialize() classmethod reconstructing from RecordBatch Also creates vgi/catalog/__init__.py with public exports. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .beads/issues.jsonl | 2 +- vgi/catalog/__init__.py | 65 ++++++ vgi/catalog/catalog_interface.py | 361 +++++++++++++++++++++++++++---- 3 files changed, 389 insertions(+), 39 deletions(-) create mode 100644 vgi/catalog/__init__.py diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 1480617..d72632d 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -1,4 +1,4 @@ -{"id":"vgi-python-085","title":"Add serialize/deserialize methods to catalog dataclasses","description":"Add Arrow IPC serialization directly to the dataclasses in vgi/catalog/catalog_interface.py.\n\nAdd to each dataclass:\n- serialize() -\u003e bytes method\n- @classmethod deserialize(batch: pa.RecordBatch) -\u003e Self method\n- Arrow schema class variable for each type\n\nDataclasses to update:\n- CatalogAttachResult\n- SchemaInfo \n- TableInfo\n- ViewInfo\n- FunctionInfo\n- ScanFunctionResult\n\nSerialization rules from plan:\n- Single-row batches for scalar returns\n- Multi-row batches for streaming (Iterable returns)\n- None = 0-row/0-column batch\n- Column names match field names exactly\n- SerializedSchema fields use pa.binary()\n- tags fields use pa.map_(pa.string(), pa.string())\n\nAlso create vgi/catalog/__init__.py with package exports.","status":"open","priority":1,"issue_type":"task","created_at":"2026-01-05T19:26:40.362177-05:00","created_by":"rusty","updated_at":"2026-01-05T19:26:40.362177-05:00"} +{"id":"vgi-python-085","title":"Add serialize/deserialize methods to catalog dataclasses","description":"Add Arrow IPC serialization directly to the dataclasses in vgi/catalog/catalog_interface.py.\n\nAdd to each dataclass:\n- serialize() -\u003e bytes method\n- @classmethod deserialize(batch: pa.RecordBatch) -\u003e Self method\n- Arrow schema class variable for each type\n\nDataclasses to update:\n- CatalogAttachResult\n- SchemaInfo \n- TableInfo\n- ViewInfo\n- FunctionInfo\n- ScanFunctionResult\n\nSerialization rules from plan:\n- Single-row batches for scalar returns\n- Multi-row batches for streaming (Iterable returns)\n- None = 0-row/0-column batch\n- Column names match field names exactly\n- SerializedSchema fields use pa.binary()\n- tags fields use pa.map_(pa.string(), pa.string())\n\nAlso create vgi/catalog/__init__.py with package exports.","status":"in_progress","priority":1,"issue_type":"task","created_at":"2026-01-05T19:26:40.362177-05:00","created_by":"rusty","updated_at":"2026-01-05T19:29:02.815022-05:00"} {"id":"vgi-python-0fe","title":"Add is_varargs to ParameterInfo and metadata extraction","description":"In vgi/metadata.py:\n- Add is_varargs: bool = False to ParameterInfo\n- Update to_dict() and from_dict()\n- Add is_varargs field to _PARAMETER_STRUCT for Arrow serialization\n- Extract varargs flag in extract_parameters()\n- Add _validate_varargs() with rules:\n - Only one varargs parameter allowed\n - Must be positional (not named)\n - Must be last positional (before TableInput if present)\n - Cannot have default value","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-05T10:49:20.141375-05:00","created_by":"rusty","updated_at":"2026-01-05T10:58:21.242603-05:00","closed_at":"2026-01-05T10:58:21.242603-05:00","close_reason":"Added is_varargs to ParameterInfo, _PARAMETER_STRUCT, extract_parameters(), and _validate_varargs()","dependencies":[{"issue_id":"vgi-python-0fe","depends_on_id":"vgi-python-jrf","type":"blocks","created_at":"2026-01-05T10:49:26.421664-05:00","created_by":"rusty"}]} {"id":"vgi-python-0hr","title":"Remove redundant InitInputType class attribute","description":"InitInputType class attribute duplicates the generic type parameter: 'class ScalarFunctionGenerator(Function[FunctionInitInput])' already specifies the type, but 'InitInputType = FunctionInitInput' repeats it. Investigate using get_type_hints or __orig_bases__ to infer the type and remove the redundant attribute.","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:53.780529-05:00","created_by":"rusty","updated_at":"2026-01-04T22:00:40.221423-05:00","closed_at":"2026-01-04T22:00:40.221423-05:00","close_reason":"PR #10 created - uses _get_init_input_type() to infer type from generic parameter"} {"id":"vgi-python-1s5","title":"Move distributed state management to optional mixin","description":"The Function base class in function.py includes ~200 lines for distributed state management (store_state, collect_states, enqueue_work, dequeue_work, work queue storage). Not all functions need this. Extract to DistributedStateMixin that functions can opt into, keeping Function base class simpler for basic use cases.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T20:06:53.606614-05:00","created_by":"rusty","updated_at":"2026-01-04T21:22:09.772825-05:00","closed_at":"2026-01-04T21:22:09.772825-05:00","close_reason":"Analysis complete: extraction not recommended. The distributed state methods are tightly coupled with execution_identifier and storage, which are used by core initialization methods. Extraction would require moving initialize_global_state/load_global_state to the mixin, breaking the protocol and requiring multiple inheritance. Current API is already opt-in (just don't call the methods) and well-documented."} diff --git a/vgi/catalog/__init__.py b/vgi/catalog/__init__.py new file mode 100644 index 0000000..35b0f41 --- /dev/null +++ b/vgi/catalog/__init__.py @@ -0,0 +1,65 @@ +"""VGI Catalog Interface for exposing catalogs, schemas, tables, and views. + +This module provides the abstract base class and data types for implementing +catalog interfaces in VGI workers, enabling DuckDB ATTACH support. + +Example: + from vgi.catalog import CatalogInterface, CatalogAttachResult, SchemaInfo + + class MyCatalog(CatalogInterface): + def catalogs(self) -> Iterable[str]: + return ["my_catalog"] + + def catalog_attach(self, *, name: str, options: dict) -> CatalogAttachResult: + return CatalogAttachResult( + attach_id=AttachId(b"my-id"), + supports_transactions=False, + supports_time_travel=False, + catalog_version_frozen=True, + catalog_version=1, + ) + # ... implement other abstract methods + +""" + +from vgi.catalog.catalog_interface import ( + AttachId, + CatalogAttachResult, + CatalogInterface, + CatalogObject, + CatalogSchemaObject, + FunctionInfo, + FunctionType, + OnConflict, + ReadOnlyCatalogInterface, + ScanFunctionResult, + SchemaInfo, + SerializedSchema, + SqlExpression, + TableInfo, + TransactionId, + ViewInfo, +) + +__all__ = [ + # Type aliases + "AttachId", + "TransactionId", + "SerializedSchema", + "SqlExpression", + # Enums + "FunctionType", + "OnConflict", + # Data classes + "CatalogAttachResult", + "CatalogObject", + "CatalogSchemaObject", + "SchemaInfo", + "TableInfo", + "ViewInfo", + "FunctionInfo", + "ScanFunctionResult", + # Interfaces + "CatalogInterface", + "ReadOnlyCatalogInterface", +] diff --git a/vgi/catalog/catalog_interface.py b/vgi/catalog/catalog_interface.py index dce8e21..6e8d673 100644 --- a/vgi/catalog/catalog_interface.py +++ b/vgi/catalog/catalog_interface.py @@ -1,11 +1,18 @@ +"""VGI Catalog Interface for exposing catalogs, schemas, tables, and views. + +This module provides the abstract base class and data types for implementing +catalog interfaces in VGI workers, enabling DuckDB ATTACH support. +""" + from abc import ABC, abstractmethod from collections.abc import Iterable from dataclasses import dataclass -from typing import Any, NewType from enum import Enum +from typing import Any, ClassVar, NewType, Self import pyarrow as pa +import vgi.ipc_utils # Type aliases for improved code clarity and type checking. # At runtime, these are equivalent to their underlying types. @@ -17,6 +24,8 @@ @dataclass(frozen=True) class CatalogAttachResult: + """Result from attaching to a catalog.""" + # The unique id for the attached catalog. attach_id: AttachId # Indicate if the worker supports transactions or not. @@ -32,6 +41,54 @@ class CatalogAttachResult: # or other objects change. catalog_version: int + ARROW_SCHEMA: ClassVar[pa.Schema] = pa.schema( + [ + pa.field("attach_id", pa.binary(), nullable=False), + pa.field("supports_transactions", pa.bool_(), nullable=False), + pa.field("supports_time_travel", pa.bool_(), nullable=False), + pa.field("catalog_version_frozen", pa.bool_(), nullable=False), + pa.field("catalog_version", pa.int64(), nullable=False), + ] # type: ignore[arg-type] + ) + + def serialize(self) -> bytes: + """Serialize to Arrow IPC bytes.""" + batch = pa.RecordBatch.from_pylist( + [ + { + "attach_id": self.attach_id, + "supports_transactions": self.supports_transactions, + "supports_time_travel": self.supports_time_travel, + "catalog_version_frozen": self.catalog_version_frozen, + "catalog_version": self.catalog_version, + } + ], + schema=self.ARROW_SCHEMA, + ) + return vgi.ipc_utils.serialize_record_batch(batch) + + @classmethod + def deserialize(cls, batch: pa.RecordBatch) -> Self: + """Deserialize from Arrow RecordBatch.""" + row = vgi.ipc_utils.validate_single_row_batch( + batch, + cls.__name__, + required_fields=[ + "attach_id", + "supports_transactions", + "supports_time_travel", + "catalog_version_frozen", + "catalog_version", + ], + ) + return cls( + attach_id=AttachId(row["attach_id"]), + supports_transactions=row["supports_transactions"], + supports_time_travel=row["supports_time_travel"], + catalog_version_frozen=row["catalog_version_frozen"], + catalog_version=row["catalog_version"], + ) + @dataclass(frozen=True) class CatalogObject: @@ -63,6 +120,48 @@ class SchemaInfo(CatalogObject): # Is this the default schema of the catalog is_default: bool + ARROW_SCHEMA: ClassVar[pa.Schema] = pa.schema( + [ + pa.field("attach_id", pa.binary(), nullable=False), + pa.field("name", pa.string(), nullable=False), + pa.field("is_default", pa.bool_(), nullable=False), + pa.field("comment", pa.string(), nullable=True), + pa.field("tags", pa.map_(pa.string(), pa.string()), nullable=False), + ] # type: ignore[arg-type] + ) + + def serialize(self) -> bytes: + """Serialize to Arrow IPC bytes.""" + batch = pa.RecordBatch.from_pylist( + [ + { + "attach_id": self.attach_id, + "name": self.name, + "is_default": self.is_default, + "comment": self.comment, + "tags": self.tags, + } + ], + schema=self.ARROW_SCHEMA, + ) + return vgi.ipc_utils.serialize_record_batch(batch) + + @classmethod + def deserialize(cls, batch: pa.RecordBatch) -> Self: + """Deserialize from Arrow RecordBatch.""" + row = vgi.ipc_utils.validate_single_row_batch( + batch, + cls.__name__, + required_fields=["attach_id", "name", "is_default", "tags"], + ) + return cls( + attach_id=AttachId(row["attach_id"]), + name=row["name"], + is_default=row["is_default"], + comment=row.get("comment"), + tags=dict(row["tags"]) if row["tags"] else {}, + ) + @dataclass(frozen=True) class TableInfo(CatalogSchemaObject): @@ -76,6 +175,67 @@ class TableInfo(CatalogSchemaObject): unique_constraints: list[list[int]] check_constraints: list[str] + ARROW_SCHEMA: ClassVar[pa.Schema] = pa.schema( + [ + pa.field("name", pa.string(), nullable=False), + pa.field("schema_name", pa.string(), nullable=False), + pa.field("columns", pa.binary(), nullable=False), + pa.field("not_null_constraints", pa.list_(pa.int32()), nullable=False), + pa.field( + "unique_constraints", pa.list_(pa.list_(pa.int32())), nullable=False + ), + pa.field("check_constraints", pa.list_(pa.string()), nullable=False), + pa.field("comment", pa.string(), nullable=True), + pa.field("tags", pa.map_(pa.string(), pa.string()), nullable=False), + ] # type: ignore[arg-type] + ) + + def serialize(self) -> bytes: + """Serialize to Arrow IPC bytes.""" + batch = pa.RecordBatch.from_pylist( + [ + { + "name": self.name, + "schema_name": self.schema_name, + "columns": self.columns, + "not_null_constraints": self.not_null_constraints, + "unique_constraints": self.unique_constraints, + "check_constraints": self.check_constraints, + "comment": self.comment, + "tags": self.tags, + } + ], + schema=self.ARROW_SCHEMA, + ) + return vgi.ipc_utils.serialize_record_batch(batch) + + @classmethod + def deserialize(cls, batch: pa.RecordBatch) -> Self: + """Deserialize from Arrow RecordBatch.""" + row = vgi.ipc_utils.validate_single_row_batch( + batch, + cls.__name__, + required_fields=[ + "name", + "schema_name", + "columns", + "not_null_constraints", + "unique_constraints", + "check_constraints", + "tags", + ], + ) + return cls( + name=row["name"], + schema_name=row["schema_name"], + columns=SerializedSchema(row["columns"]), + not_null_constraints=list(row["not_null_constraints"]), + unique_constraints=[list(c) for c in row["unique_constraints"]], + check_constraints=list(row["check_constraints"]), + comment=row.get("comment"), + tags=dict(row["tags"]) if row["tags"] else {}, + ) + @dataclass(frozen=True) class ViewInfo(CatalogSchemaObject): @@ -84,6 +244,48 @@ class ViewInfo(CatalogSchemaObject): # The definition of the view which is a SQL query string. definition: str + ARROW_SCHEMA: ClassVar[pa.Schema] = pa.schema( + [ + pa.field("name", pa.string(), nullable=False), + pa.field("schema_name", pa.string(), nullable=False), + pa.field("definition", pa.string(), nullable=False), + pa.field("comment", pa.string(), nullable=True), + pa.field("tags", pa.map_(pa.string(), pa.string()), nullable=False), + ] + ) + + def serialize(self) -> bytes: + """Serialize to Arrow IPC bytes.""" + batch = pa.RecordBatch.from_pylist( + [ + { + "name": self.name, + "schema_name": self.schema_name, + "definition": self.definition, + "comment": self.comment, + "tags": self.tags, + } + ], + schema=self.ARROW_SCHEMA, + ) + return vgi.ipc_utils.serialize_record_batch(batch) + + @classmethod + def deserialize(cls, batch: pa.RecordBatch) -> Self: + """Deserialize from Arrow RecordBatch.""" + row = vgi.ipc_utils.validate_single_row_batch( + batch, + cls.__name__, + required_fields=["name", "schema_name", "definition", "tags"], + ) + return cls( + name=row["name"], + schema_name=row["schema_name"], + definition=row["definition"], + comment=row.get("comment"), + tags=dict(row["tags"]) if row["tags"] else {}, + ) + class FunctionType(Enum): """The type of function in a schema.""" @@ -120,9 +322,66 @@ class FunctionInfo(CatalogSchemaObject): # schema.serialize().to_pybytes() output_schema: SerializedSchema + ARROW_SCHEMA: ClassVar[pa.Schema] = pa.schema( + [ + pa.field("name", pa.string(), nullable=False), + pa.field("schema_name", pa.string(), nullable=False), + pa.field("function_type", pa.string(), nullable=False), + pa.field("arguments", pa.binary(), nullable=False), + pa.field("output_schema", pa.binary(), nullable=False), + pa.field("comment", pa.string(), nullable=True), + pa.field("tags", pa.map_(pa.string(), pa.string()), nullable=False), + ] # type: ignore[arg-type] + ) + + def serialize(self) -> bytes: + """Serialize to Arrow IPC bytes.""" + batch = pa.RecordBatch.from_pylist( + [ + { + "name": self.name, + "schema_name": self.schema_name, + "function_type": self.function_type.value, + "arguments": self.arguments, + "output_schema": self.output_schema, + "comment": self.comment, + "tags": self.tags, + } + ], + schema=self.ARROW_SCHEMA, + ) + return vgi.ipc_utils.serialize_record_batch(batch) + + @classmethod + def deserialize(cls, batch: pa.RecordBatch) -> Self: + """Deserialize from Arrow RecordBatch.""" + row = vgi.ipc_utils.validate_single_row_batch( + batch, + cls.__name__, + required_fields=[ + "name", + "schema_name", + "function_type", + "arguments", + "output_schema", + "tags", + ], + ) + return cls( + name=row["name"], + schema_name=row["schema_name"], + function_type=FunctionType(row["function_type"]), + arguments=SerializedSchema(row["arguments"]), + output_schema=SerializedSchema(row["output_schema"]), + comment=row.get("comment"), + tags=dict(row["tags"]) if row["tags"] else {}, + ) + @dataclass(frozen=True) class ScanFunctionResult: + """Result from getting a table scan function.""" + # The name of the VGI table function to call to scan data from the table, # when duckdb attempts to scan a table it will change that call into a # call to VGI to call this named table function. @@ -135,6 +394,42 @@ class ScanFunctionResult: # phase again. invocation_id: bytes | None + ARROW_SCHEMA: ClassVar[pa.Schema] = pa.schema( + [ + pa.field("function_name", pa.string(), nullable=False), + pa.field("max_processes", pa.int32(), nullable=False), + pa.field("invocation_id", pa.binary(), nullable=True), + ] # type: ignore[arg-type] + ) + + def serialize(self) -> bytes: + """Serialize to Arrow IPC bytes.""" + batch = pa.RecordBatch.from_pylist( + [ + { + "function_name": self.function_name, + "max_processes": self.max_processes, + "invocation_id": self.invocation_id, + } + ], + schema=self.ARROW_SCHEMA, + ) + return vgi.ipc_utils.serialize_record_batch(batch) + + @classmethod + def deserialize(cls, batch: pa.RecordBatch) -> Self: + """Deserialize from Arrow RecordBatch.""" + row = vgi.ipc_utils.validate_single_row_batch( + batch, + cls.__name__, + required_fields=["function_name", "max_processes"], + ) + return cls( + function_name=row["function_name"], + max_processes=row["max_processes"], + invocation_id=row.get("invocation_id"), + ) + class CatalogInterface(ABC): """Provides an interface to manage catalogs, schemas, tables, and views for VGI. @@ -149,8 +444,8 @@ class CatalogInterface(ABC): API limitations: - Functions are not able to be created or dropped. - Tags are not able to be updated on catalog objects. - - Comments and tags are not able to be updated or created on schemas (SchemaInfo). - - Constraints are not able to be added or dropped on tables (with the exception of not null constraints). + - Comments and tags are not updatable on schemas (SchemaInfo). + - Constraints cannot be added/dropped (except NOT NULL). A VGI worker will offer a single implementation of this interface to clients to manage their catalogs. @@ -158,9 +453,9 @@ class CatalogInterface(ABC): @property def interface_feature_flags(self) -> set[str]: - """Get the set of feature flags supported by this CatalogInterface implementation. + """Get the feature flags supported by this CatalogInterface. - Feature flags are used to indicate optional capabilities of the implementation. + Feature flags indicate optional capabilities of the implementation. The default implementation returns an empty set. """ return set() @@ -207,7 +502,7 @@ def catalog_transaction_begin(self, *, attach_id: AttachId) -> TransactionId | N def catalog_transaction_commit( self, *, attach_id: AttachId, transaction_id: TransactionId ) -> None: - """Commit the transaction with the given transaction_id for the given attachment. + """Commit the transaction for the given attachment. If the transaction cannot be committed, an exception should be raised. """ @@ -216,7 +511,7 @@ def catalog_transaction_commit( def catalog_transaction_rollback( self, *, attach_id: AttachId, transaction_id: TransactionId ) -> None: - """Rollback the transaction with the given transaction_id for the given attachment. + """Rollback the transaction for the given attachment. If the transaction cannot be rolled back, an exception should be raised. """ @@ -236,10 +531,9 @@ def catalog_detach(self, *, attach_id: AttachId) -> None: """Detach from the catalog with the given attach_id. Any open transactions should be rolled back. - The default implementation does nothing. """ - pass + return # Default no-op def catalog_version( self, *, attach_id: AttachId, transaction_id: TransactionId | None @@ -260,7 +554,7 @@ def schemas( ) -> Iterable[SchemaInfo]: """Get a list of schemas for the given attach_id and transaction_id. - The default implementation returns a schema called "main" with no comment or tags. + The default returns a schema called "main" with no comment or tags. """ return iter( [ @@ -304,10 +598,8 @@ def schema_contents( """Get the contents of the schema with the given name. Schemas can contain tables, views, and various types of functions. - - The default implementation returns everything registered to the Worker. """ - # FIXME: write this implementation for the worker. + raise NotImplementedError("Schema contents not implemented.") @abstractmethod def schema_get( @@ -398,8 +690,6 @@ def table_rename( """Rename the table with the given name to the new name.""" raise NotImplementedError("Table rename not implemented.") - # Add a column to a table, the name is serialized, but the column_type is the Arrow data type - # of the column to add. def table_column_add( self, *, @@ -407,10 +697,8 @@ def table_column_add( transaction_id: TransactionId | None, schema_name: str, name: str, - # column should be Arrow schema with a single field representing the column to add. - # the name and type are taken from that field. it is serialized as bytes using: - # schema.serialize().to_pybytes() - # the schema can only have one field, if it has more than one field an error should be raised. + # Arrow schema with single field for column to add. + # Serialized via schema.serialize().to_pybytes() column_definition: SerializedSchema, ignore_not_found: bool, if_column_not_exists: bool, @@ -458,7 +746,7 @@ def table_column_default_set( expression: SqlExpression, ignore_not_found: bool, ) -> None: - """Set the default expression for the column in the table with the given name.""" + """Set the default expression for the column.""" raise NotImplementedError("Table column default set not implemented.") def table_column_default_drop( @@ -471,7 +759,7 @@ def table_column_default_drop( column_name: str, ignore_not_found: bool, ) -> None: - """Drop the default expression for the column in the table with the given name.""" + """Drop the default expression for the column.""" raise NotImplementedError("Table column default drop not implemented.") def table_column_type_change( @@ -481,11 +769,8 @@ def table_column_type_change( transaction_id: TransactionId | None, schema_name: str, name: str, - # This is an Arrow schema with a single field representing the column to change - # the type of. The new type is taken from the single field in this schema. - # it is serialized as bytes using: - # schema.serialize().to_pybytes() - # The schema can only have one field + # Arrow schema with single field for the new column type. + # Serialized via schema.serialize().to_pybytes() column_definition: SerializedSchema, expression: SqlExpression | None, ignore_not_found: bool, @@ -506,7 +791,7 @@ def table_not_null_drop( column_name: str, ignore_not_found: bool, ) -> None: - """Drop the NOT NULL constraint from the column in the table with the given name.""" + """Drop the NOT NULL constraint from the column.""" raise NotImplementedError("Table NOT NULL drop not implemented.") def table_not_null_set( @@ -519,7 +804,7 @@ def table_not_null_set( column_name: str, ignore_not_found: bool, ) -> None: - """Set the NOT NULL constraint on the column in the table with the given name.""" + """Set the NOT NULL constraint on the column.""" raise NotImplementedError("Table NOT NULL set not implemented.") def table_scan_function_get( @@ -529,20 +814,14 @@ def table_scan_function_get( transaction_id: TransactionId | None, schema_name: str, name: str, - # These fields are used for iceberg style time travel. - # provided later on. + # Time travel fields (iceberg style) at_unit: str | None, at_value: str | None, ) -> ScanFunctionResult: - """Get the ScanFunctionResult for scanning the table with the given name. - - Get the ScanFunctionResult of the table function to call to read data from a particular table. - This is necessary since this method may yield the bind data identifier for the scan function. + """Get the ScanFunctionResult for scanning the table. - The at_unit and at_value will be passed by DuckDB, basically there is a bind function called - in the duckdb process and the additional parameters will be sent to the CatalogInterface, - the projection pushdown and (later on predicate pushdown) will be done in the init phase of the call - to the actual VGI function to scan the table. + Returns information about the VGI table function to call when scanning + this table. The at_unit and at_value support time travel queries. """ raise NotImplementedError("Table scan function get not implemented.") @@ -613,5 +892,11 @@ def view_comment_set( class ReadOnlyCatalogInterface(CatalogInterface): + """A read-only catalog interface that does not support DDL operations. + + This is a convenience base class for catalogs that only support reading + metadata and data, not creating or modifying objects. + """ + supports_transactions = False catalog_version_frozen = True