diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 873290a..928192d 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -27,7 +27,7 @@ {"id":"vgi-python-c2b","title":"Add duckdb_settings field to Invocation class","description":"Update vgi/invocation.py to add a duckdb_settings field to the Invocation dataclass.\n\nChanges needed:\n- Add 'duckdb_settings: dict[str, str] | None = None' field to Invocation\n- Update serialize() to include settings in Arrow IPC batch\n- Update deserialize() to read settings from Arrow IPC batch\n- Handle None case (no settings requested)\n\nSerialization: Use a struct field with string key-value pairs or a map type.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:47.765077-05:00","created_by":"rusty","updated_at":"2026-01-04T13:20:41.167817-05:00","closed_at":"2026-01-04T13:20:41.167817-05:00","close_reason":"Implementation complete, all tests pass","dependencies":[{"issue_id":"vgi-python-c2b","depends_on_id":"vgi-python-aad","type":"blocks","created_at":"2026-01-04T13:06:13.664038-05:00","created_by":"rusty"}]} {"id":"vgi-python-cd0","title":"Create vgi/argument_spec.py module","description":"## Overview\n\nCreate the core module implementing Arrow-based argument specification serialization.\n\n## File Location\n\n`vgi/argument_spec.py`\n\n## Constants to Define\n\n```python\n# Metadata keys (all bytes for Arrow compatibility)\nVGI_ARG_KEY = b\"vgi_arg\"\nVGI_ARG_NAMED = b\"named\"\n\nVGI_TYPE_KEY = b\"vgi_type\"\nVGI_TYPE_TABLE = b\"table\"\nVGI_TYPE_ANY = b\"any\"\n\nVGI_VARARGS_KEY = b\"vgi_varargs\"\nVGI_VARARGS_TRUE = b\"true\"\n```\n\n## ArgumentSpec Dataclass\n\n```python\n@dataclass(frozen=True)\nclass ArgumentSpec:\n \"\"\"Specification for a single function argument.\"\"\"\n name: str # Python attribute name\n position: int | str # int for positional index, str for named key\n arrow_type: pa.DataType # Arrow type (pa.null() for special types)\n is_table_input: bool = False # Arg[TableInput]\n is_any_type: bool = False # Arg[AnyArrow]\n is_varargs: bool = False # varargs=True\n```\n\n## Functions to Implement\n\n### argument_specs_to_schema(specs: Sequence[ArgumentSpec]) -\u003e pa.Schema\n\nConvert ArgumentSpecs to a single Arrow schema:\n1. Sort specs: positional first (by index), then named\n2. For each spec, create a pa.field with:\n - name = spec.name\n - type = spec.arrow_type (or pa.null() for table/any)\n - metadata = appropriate markers based on flags\n3. Return pa.schema(fields)\n\n### schema_to_argument_specs(schema: pa.Schema) -\u003e list[ArgumentSpec]\n\nConvert schema back to ArgumentSpecs:\n1. Iterate through schema fields in order\n2. Track position index (increments for non-named args)\n3. Check field metadata for markers:\n - `vgi_arg=named` -\u003e position is field name string\n - `vgi_type=table` -\u003e is_table_input=True\n - `vgi_type=any` -\u003e is_any_type=True\n - `vgi_varargs=true` -\u003e is_varargs=True\n4. Return list of ArgumentSpec\n\n### extract_argument_specs(cls: type, arg_types: dict[str, pa.DataType]) -\u003e list[ArgumentSpec]\n\nExtract specs from a function class with Arg descriptors:\n1. Walk class MRO to find all Arg descriptors (like extract_parameters in metadata.py)\n2. For each Arg descriptor:\n - Get name from attribute name\n - Get position from arg.position\n - Get arrow_type from arg_types dict\n - Check type hints for TableInput/AnyArrow\n - Check arg.varargs flag\n3. Sort and return list\n\n## Dependencies\n\n- Import `Arg`, `TableInput`, `AnyArrow` from `vgi.arguments`\n- Reference `extract_parameters()` pattern in `vgi/metadata.py`","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-05T11:18:32.777241-05:00","created_by":"rusty","updated_at":"2026-01-05T11:28:07.227452-05:00","closed_at":"2026-01-05T11:28:07.227452-05:00","close_reason":"Created vgi/argument_spec.py with ArgumentSpec dataclass and serialization functions","dependencies":[{"issue_id":"vgi-python-cd0","depends_on_id":"vgi-python-8ra","type":"blocks","created_at":"2026-01-05T11:19:30.743936-05:00","created_by":"rusty"}]} {"id":"vgi-python-ckg","title":"Add AnyValue sentinel class to vgi/arguments.py","description":"Add AnyValue class similar to TableInput, export in __all__","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-05T10:41:41.392694-05:00","created_by":"rusty","updated_at":"2026-01-05T11:05:38.37392-05:00","closed_at":"2026-01-05T11:05:38.37392-05:00","close_reason":"Added AnyArrow sentinel class to arguments.py","dependencies":[{"issue_id":"vgi-python-ckg","depends_on_id":"vgi-python-awm","type":"blocks","created_at":"2026-01-05T10:41:52.658405-05:00","created_by":"rusty"}]} -{"id":"vgi-python-coi","title":"Update extract_argument_specs() to remove arg_types parameter","description":"In vgi/argument_spec.py:\n1. Remove arg_types parameter from function signature\n2. Update arrow_type resolution logic:\n - Use arg.arrow_type if explicitly set\n - Infer from Python type hint using PYTHON_TO_ARROW\n - Handle TableInput/AnyArrow → pa.null()\n - Warn and default to pa.null() for unknown types\n3. Import PYTHON_TO_ARROW from vgi.arguments","status":"open","priority":2,"issue_type":"task","created_at":"2026-01-05T15:44:38.141157-05:00","created_by":"rusty","updated_at":"2026-01-05T15:44:38.141157-05:00","dependencies":[{"issue_id":"vgi-python-coi","depends_on_id":"vgi-python-cvj","type":"blocks","created_at":"2026-01-05T15:45:13.831745-05:00","created_by":"rusty"},{"issue_id":"vgi-python-coi","depends_on_id":"vgi-python-dv0","type":"blocks","created_at":"2026-01-05T15:45:13.864608-05:00","created_by":"rusty"}]} +{"id":"vgi-python-coi","title":"Update extract_argument_specs() to remove arg_types parameter","description":"In vgi/argument_spec.py:\n1. Remove arg_types parameter from function signature\n2. Update arrow_type resolution logic:\n - Use arg.arrow_type if explicitly set\n - Infer from Python type hint using PYTHON_TO_ARROW\n - Handle TableInput/AnyArrow → pa.null()\n - Warn and default to pa.null() for unknown types\n3. Import PYTHON_TO_ARROW from vgi.arguments","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-05T15:44:38.141157-05:00","created_by":"rusty","updated_at":"2026-01-05T15:56:24.271544-05:00","closed_at":"2026-01-05T15:56:24.271544-05:00","close_reason":"PR #20 created","dependencies":[{"issue_id":"vgi-python-coi","depends_on_id":"vgi-python-cvj","type":"blocks","created_at":"2026-01-05T15:45:13.831745-05:00","created_by":"rusty"},{"issue_id":"vgi-python-coi","depends_on_id":"vgi-python-dv0","type":"blocks","created_at":"2026-01-05T15:45:13.864608-05:00","created_by":"rusty"}]} {"id":"vgi-python-cvj","title":"Add PYTHON_TO_ARROW type mapping to vgi/arguments.py","description":"Add the Python→Arrow type mapping dict after imports:\n```python\nPYTHON_TO_ARROW: dict[type, pa.DataType] = {\n int: pa.int64(),\n str: pa.utf8(),\n float: pa.float64(),\n bool: pa.bool_(),\n bytes: pa.binary(),\n}\n```\nExport in __all__.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-05T15:44:37.900421-05:00","created_by":"rusty","updated_at":"2026-01-05T15:48:42.422086-05:00","closed_at":"2026-01-05T15:48:42.422086-05:00","close_reason":"PR #18 created"} {"id":"vgi-python-d73","title":"Create docs/argument-serialization.md","description":"## Overview\n\nCreate LLM-friendly documentation explaining the argument specification serialization format. This document should enable future implementors (human or AI) to understand how function argument signatures are serialized to Arrow schemas.\n\n## File Location\n\n`docs/argument-serialization.md`\n\n## Document Structure\n\n### Title and Purpose\n\nExplain that this document describes how VGI function argument specifications are serialized to Apache Arrow schemas for IPC transmission and DuckDB function registration.\n\n### Quick Reference\n\nA concise summary table showing:\n- Metadata keys and their meanings\n- Special type representations\n\n### Schema Format\n\nExplain the single-schema design:\n1. All arguments are fields in one Arrow schema\n2. Positional arguments come first, in order (field index = position index)\n3. Named arguments follow, marked with metadata\n4. Field name = Python attribute name (or argument key for named)\n5. Field type = exact Arrow type\n\n### Metadata Keys Reference\n\nComplete table of all metadata keys:\n\n| Key | Value | Description |\n|-----|-------|-------------|\n| `vgi_arg` | `named` | Field is a named argument, not positional. The field name is the argument key. |\n| `vgi_type` | `table` | Argument receives streaming table input (Arg[TableInput]). Arrow type is pa.null(). |\n| `vgi_type` | `any` | Argument accepts any Arrow type (Arg[AnyArrow]). Arrow type is pa.null(). |\n| `vgi_varargs` | `true` | Argument collects all remaining positional args. Arrow type is the element type. |\n\n### Special Type Handling\n\nExplain how special argument types are represented:\n\n#### TableInput\n- Arrow type: `pa.null()`\n- Metadata: `{b\"vgi_type\": b\"table\"}`\n- Meaning: This position receives streaming RecordBatches, not a scalar value\n\n#### AnyArrow\n- Arrow type: `pa.null()`\n- Metadata: `{b\"vgi_type\": b\"any\"}`\n- Meaning: Accepts any valid Arrow scalar type at runtime\n\n#### Varargs\n- Arrow type: The element type (e.g., `pa.int64()` for `Arg[int](..., varargs=True)`)\n- Metadata: `{b\"vgi_varargs\": b\"true\"}`\n- Meaning: Collects all remaining positional arguments from this position onwards\n\n### Examples\n\n#### Example 1: Simple Function\n\n```python\nclass MyFunction(TableInOutFunction):\n count = Arg[int](0) # Positional 0\n name = Arg[str](1) # Positional 1\n verbose = Arg[bool](\"verbose\") # Named\n\n# Serializes to:\nschema = pa.schema([\n pa.field(\"count\", pa.int64()),\n pa.field(\"name\", pa.utf8()),\n pa.field(\"verbose\", pa.bool_(), metadata={b\"vgi_arg\": b\"named\"}),\n])\n```\n\n#### Example 2: Function with Table Input\n\n```python\nclass TransformFunction(TableInOutFunction):\n multiplier = Arg[float](0)\n data = Arg[TableInput](1)\n\n# Serializes to:\nschema = pa.schema([\n pa.field(\"multiplier\", pa.float64()),\n pa.field(\"data\", pa.null(), metadata={b\"vgi_type\": b\"table\"}),\n])\n```\n\n#### Example 3: Function with Varargs\n\n```python\nclass SumFunction(TableInOutFunction):\n columns = Arg[str](0, varargs=True)\n\n# Serializes to:\nschema = pa.schema([\n pa.field(\"columns\", pa.utf8(), metadata={b\"vgi_varargs\": b\"true\"}),\n])\n```\n\n#### Example 4: Complex Function\n\n```python\nclass ComplexFunction(TableInOutFunction):\n count = Arg[int](0)\n data = Arg[TableInput](1)\n extra = Arg[float](2, varargs=True)\n format = Arg[str](\"format\")\n threshold = Arg[AnyArrow](\"threshold\")\n\n# Serializes to:\nschema = pa.schema([\n pa.field(\"count\", pa.int64()),\n pa.field(\"data\", pa.null(), metadata={b\"vgi_type\": b\"table\"}),\n pa.field(\"extra\", pa.float64(), metadata={b\"vgi_varargs\": b\"true\"}),\n pa.field(\"format\", pa.utf8(), metadata={b\"vgi_arg\": b\"named\"}),\n pa.field(\"threshold\", pa.null(), metadata={b\"vgi_arg\": b\"named\", b\"vgi_type\": b\"any\"}),\n])\n```\n\n### Serialization Code\n\nShow how to serialize and deserialize:\n\n```python\n# Serialize to bytes\nschema_bytes = schema.serialize().to_pybytes()\n\n# Deserialize from bytes\nschema = pa.ipc.read_schema(pa.py_buffer(schema_bytes))\n```\n\n### Parsing Algorithm\n\nExplain how to parse a schema back to argument specs:\n\n1. Initialize position_index = 0\n2. For each field in schema:\n a. Check if field has `vgi_arg=named` metadata\n b. If named: position = field.name (string)\n c. If positional: position = position_index, then increment position_index\n d. Check for `vgi_type` metadata (table or any)\n e. Check for `vgi_varargs` metadata\n f. Create ArgumentSpec with extracted info\n\n### Not Included\n\nExplicitly state what is NOT serialized:\n- Default values\n- Validation constraints (ge, le, choices, pattern)\n- Documentation strings\n\nThese are Python-side concerns handled by the Arg descriptor at runtime.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-05T11:19:17.488877-05:00","created_by":"rusty","updated_at":"2026-01-05T11:33:29.168007-05:00","closed_at":"2026-01-05T11:33:29.168007-05:00","close_reason":"Created comprehensive LLM-friendly documentation","dependencies":[{"issue_id":"vgi-python-d73","depends_on_id":"vgi-python-8ra","type":"blocks","created_at":"2026-01-05T11:19:30.820384-05:00","created_by":"rusty"}]} {"id":"vgi-python-dv0","title":"Add arrow_type parameter to Arg class","description":"In vgi/arguments.py:\n1. Add 'arrow_type' to __slots__\n2. Add parameter: arrow_type: pa.DataType | None = None\n3. Store: self.arrow_type = arrow_type\n4. Update __repr__ to include arrow_type if set","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-05T15:44:38.020395-05:00","created_by":"rusty","updated_at":"2026-01-05T15:50:51.513273-05:00","closed_at":"2026-01-05T15:50:51.513273-05:00","close_reason":"PR #19 created","dependencies":[{"issue_id":"vgi-python-dv0","depends_on_id":"vgi-python-cvj","type":"blocks","created_at":"2026-01-05T15:45:13.696822-05:00","created_by":"rusty"}]} diff --git a/tests/test_argument_spec.py b/tests/test_argument_spec.py index ce63b0d..d1af9f2 100644 --- a/tests/test_argument_spec.py +++ b/tests/test_argument_spec.py @@ -336,11 +336,10 @@ def test_extract_from_simple_function(self) -> None: """Extract specs from function with basic Arg descriptors.""" class SimpleFunction(TableInOutFunction): - count = Arg[int](0) - name = Arg[str](1) + count: int = Arg[int](0) # type: ignore[assignment] + name: str = Arg[str](1) # type: ignore[assignment] - arg_types: dict[str, pa.DataType] = {"count": pa.int64(), "name": pa.utf8()} - specs = extract_argument_specs(SimpleFunction, arg_types) + specs = extract_argument_specs(SimpleFunction) assert len(specs) == 2 assert specs[0].name == "count" @@ -354,18 +353,16 @@ def test_extract_table_input(self) -> None: """Extract specs should detect Arg[TableInput].""" class FunctionWithTable(TableInOutFunction): - multiplier = Arg[float](0) + multiplier: float = Arg[float](0) # type: ignore[assignment] data: TableInput = Arg[TableInput](1) # type: ignore[assignment] - arg_types: dict[str, pa.DataType] = { - "multiplier": pa.float64(), - "data": pa.null(), - } - specs = extract_argument_specs(FunctionWithTable, arg_types) + specs = extract_argument_specs(FunctionWithTable) assert len(specs) == 2 + assert specs[0].arrow_type == pa.float64() assert specs[1].name == "data" assert specs[1].is_table_input is True + assert specs[1].arrow_type == pa.null() def test_extract_any_arrow(self) -> None: """Extract specs should detect Arg[AnyArrow].""" @@ -373,78 +370,77 @@ def test_extract_any_arrow(self) -> None: class FunctionWithAny(TableInOutFunction): value: AnyArrow = Arg[AnyArrow](0) # type: ignore[assignment] - arg_types: dict[str, pa.DataType] = {"value": pa.null()} - specs = extract_argument_specs(FunctionWithAny, arg_types) + specs = extract_argument_specs(FunctionWithAny) assert len(specs) == 1 assert specs[0].is_any_type is True + assert specs[0].arrow_type == pa.null() def test_extract_varargs(self) -> None: """Extract specs should detect varargs=True.""" class FunctionWithVarargs(TableInOutFunction): - columns = Arg[str](0, varargs=True) + columns: str = Arg[str](0, varargs=True) # type: ignore[assignment] - arg_types: dict[str, pa.DataType] = {"columns": pa.utf8()} - specs = extract_argument_specs(FunctionWithVarargs, arg_types) + specs = extract_argument_specs(FunctionWithVarargs) assert len(specs) == 1 assert specs[0].is_varargs is True + assert specs[0].arrow_type == pa.utf8() def test_extract_named_arguments(self) -> None: """Extract specs should handle named arguments.""" class FunctionWithNamed(TableInOutFunction): - count = Arg[int](0) - format = Arg[str]("format") + count: int = Arg[int](0) # type: ignore[assignment] + format: str = Arg[str]("format") # type: ignore[assignment] - arg_types: dict[str, pa.DataType] = {"count": pa.int64(), "format": pa.utf8()} - specs = extract_argument_specs(FunctionWithNamed, arg_types) + specs = extract_argument_specs(FunctionWithNamed) assert len(specs) == 2 assert specs[0].position == 0 + assert specs[0].arrow_type == pa.int64() assert specs[1].position == "format" + assert specs[1].arrow_type == pa.utf8() def test_extract_mixed_arguments(self) -> None: """Extract specs should handle mixed positional and named args.""" class ComplexFunction(TableInOutFunction): - count = Arg[int](0) + count: int = Arg[int](0) # type: ignore[assignment] data: TableInput = Arg[TableInput](1) # type: ignore[assignment] - extra = Arg[float](2, varargs=True) - format = Arg[str]("format") + extra: float = Arg[float](2, varargs=True) # type: ignore[assignment] + format: str = Arg[str]("format") # type: ignore[assignment] threshold: AnyArrow = Arg[AnyArrow]("threshold") # type: ignore[assignment] - arg_types: dict[str, pa.DataType] = { - "count": pa.int64(), - "data": pa.null(), - "extra": pa.float64(), - "format": pa.utf8(), - "threshold": pa.null(), - } - specs = extract_argument_specs(ComplexFunction, arg_types) + specs = extract_argument_specs(ComplexFunction) assert len(specs) == 5 # Positional first assert specs[0].name == "count" assert specs[0].position == 0 + assert specs[0].arrow_type == pa.int64() assert specs[1].name == "data" assert specs[1].position == 1 assert specs[1].is_table_input is True + assert specs[1].arrow_type == pa.null() assert specs[2].name == "extra" assert specs[2].position == 2 assert specs[2].is_varargs is True + assert specs[2].arrow_type == pa.float64() # Named after assert specs[3].name == "format" assert specs[3].position == "format" + assert specs[3].arrow_type == pa.utf8() assert specs[4].name == "threshold" assert specs[4].position == "threshold" assert specs[4].is_any_type is True + assert specs[4].arrow_type == pa.null() class TestArgumentSpecToSchemaValidation: @@ -491,32 +487,54 @@ def test_contiguous_indices_no_warning(self) -> None: class TestExtractArgumentSpecsValidation: """Test validation in extract_argument_specs.""" - def test_missing_arg_type_warns(self) -> None: - """Missing arg_types entry should issue a warning.""" + def test_missing_type_hint_warns(self) -> None: + """Missing type hint and no arrow_type should issue a warning.""" class FunctionWithArg(TableInOutFunction): - count = Arg[int](0) + count = Arg[int](0) # No type annotation, no arrow_type - # Provide empty arg_types - should warn - with pytest.warns(UserWarning, match="Missing type for argument 'count'"): - specs = extract_argument_specs(FunctionWithArg, {}) + # Should warn about missing type + with pytest.warns(UserWarning, match="Cannot determine Arrow type"): + specs = extract_argument_specs(FunctionWithArg) assert len(specs) == 1 assert specs[0].arrow_type == pa.null() - def test_partial_arg_types_warns_for_missing(self) -> None: - """Only missing args should trigger warnings.""" + def test_explicit_arrow_type_no_warning(self) -> None: + """Explicit arrow_type should not trigger warning.""" - class FunctionWithTwoArgs(TableInOutFunction): - count = Arg[int](0) - name = Arg[str](1) + class FunctionWithArrowType(TableInOutFunction): + count = Arg[int](0, arrow_type=pa.int32()) # Explicit type - # Provide type for only 'count' - with pytest.warns(UserWarning, match="Missing type for argument 'name'"): - specs = extract_argument_specs(FunctionWithTwoArgs, {"count": pa.int64()}) + import warnings as w + + with w.catch_warnings(record=True) as caught: + w.simplefilter("always") + specs = extract_argument_specs(FunctionWithArrowType) + type_warnings = [ + x for x in caught if "Cannot determine Arrow type" in str(x.message) + ] + assert len(type_warnings) == 0 + + assert specs[0].arrow_type == pa.int32() + + def test_type_hint_no_warning(self) -> None: + """Type hint should be used to infer Arrow type without warning.""" + + class FunctionWithTypeHint(TableInOutFunction): + count: int = Arg[int](0) # type: ignore[assignment] + + import warnings as w + + with w.catch_warnings(record=True) as caught: + w.simplefilter("always") + specs = extract_argument_specs(FunctionWithTypeHint) + type_warnings = [ + x for x in caught if "Cannot determine Arrow type" in str(x.message) + ] + assert len(type_warnings) == 0 assert specs[0].arrow_type == pa.int64() - assert specs[1].arrow_type == pa.null() class TestEdgeCases: diff --git a/vgi/argument_spec.py b/vgi/argument_spec.py index aacc12b..b95d595 100644 --- a/vgi/argument_spec.py +++ b/vgi/argument_spec.py @@ -34,13 +34,13 @@ """ import warnings -from collections.abc import Mapping, Sequence +from collections.abc import Sequence from dataclasses import dataclass from typing import Any, get_type_hints import pyarrow as pa -from vgi.arguments import AnyArrow, Arg, TableInput +from vgi.arguments import PYTHON_TO_ARROW, AnyArrow, Arg, TableInput __all__ = [ "ArgumentSpec", @@ -257,17 +257,17 @@ def schema_to_argument_specs(schema: pa.Schema) -> list[ArgumentSpec]: def extract_argument_specs( cls: type, - arg_types: Mapping[str, pa.DataType], ) -> list[ArgumentSpec]: """Extract ArgumentSpecs from a function class with Arg descriptors. Walks the class hierarchy to find all Arg descriptors and creates - ArgumentSpec objects with the provided Arrow types. + ArgumentSpec objects with Arrow types determined by: + 1. Explicit arrow_type on Arg (highest priority) + 2. Type annotation with PYTHON_TO_ARROW mapping + 3. Default to pa.null() with warning for unknown types Args: cls: Function class with Arg descriptors. - arg_types: Mapping from argument attribute names to their Arrow types. - For TableInput and AnyArrow arguments, use pa.null(). Returns: List of ArgumentSpec objects, sorted by position (positional first, @@ -275,17 +275,16 @@ def extract_argument_specs( Example: class MyFunction(TableInOutFunction): - count = Arg[int](0) - format = Arg[str]("format") + count: int = Arg[int](0, arrow_type=pa.int64()) + format: str = Arg[str]("format") # Inferred from type hint - arg_types = {"count": pa.int64(), "format": pa.utf8()} - specs = extract_argument_specs(MyFunction, arg_types) + specs = extract_argument_specs(MyFunction) """ specs: list[ArgumentSpec] = [] seen_names: set[str] = set() - # Get type hints for detecting TableInput/AnyArrow + # Get type hints for type inference and detecting TableInput/AnyArrow try: hints = get_type_hints(cls) except (NameError, AttributeError): @@ -306,22 +305,30 @@ class MyFunction(TableInOutFunction): seen_names.add(attr_name) arg: Arg[Any] = attr_value - # Get Arrow type from provided mapping + # Check type hint for special types + hint = hints.get(attr_name) + is_table_input = hint is TableInput + is_any_type = hint is AnyArrow + + # Determine Arrow type using priority order: + # 1. Explicit arrow_type on Arg + # 2. Type hint with PYTHON_TO_ARROW mapping + # 3. Default to pa.null() with warning arrow_type: pa.DataType - if attr_name not in arg_types: + if arg.arrow_type is not None: + arrow_type = arg.arrow_type + elif is_table_input or is_any_type: + arrow_type = pa.null() + elif hint is not None and hint in PYTHON_TO_ARROW: + arrow_type = PYTHON_TO_ARROW[hint] + else: warnings.warn( - f"Missing type for argument '{attr_name}' in arg_types " - f"mapping; defaulting to pa.null(). This may indicate a bug.", + f"Cannot determine Arrow type for argument '{attr_name}'. " + f"Add explicit arrow_type to Arg or add type annotation. " + f"Defaulting to pa.null().", stacklevel=2, ) arrow_type = pa.null() - else: - arrow_type = arg_types[attr_name] - - # Check type hint for special types - hint = hints.get(attr_name) - is_table_input = hint is TableInput - is_any_type = hint is AnyArrow # Check varargs flag is_varargs = arg.varargs