Skip to content

Commit 989aaa1

Browse files
rustyconoverclaude
andcommitted
Expand FunctionInfo with metadata fields and tests
Add new metadata fields to FunctionInfo dataclass: - stability, null_handling for behavior metadata - examples, categories for documentation - projection_pushdown, filter_pushdown, order_preservation, max_workers for table functions - order_dependent, distinct_dependent for aggregate functions - required_settings for settings configuration Update ARROW_SCHEMA, serialize(), deserialize(), and _function_to_info() to support the new fields with backward compatibility. Add comprehensive tests for new fields, serialization roundtrip, enum handling, and backward compatibility with legacy data. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent b67783e commit 989aaa1

2 files changed

Lines changed: 434 additions & 6 deletions

File tree

tests/catalog/test_catalog_interface.py

Lines changed: 331 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
AttachId,
1010
CatalogAttachResult,
1111
CatalogInterface,
12+
FunctionInfo,
1213
OnConflict,
1314
SchemaInfo,
1415
SerializedSchema,
@@ -423,3 +424,333 @@ def test_class_attributes(self) -> None:
423424
"""ReadOnlyCatalogInterface has correct class attributes."""
424425
assert ReadOnlyCatalogInterface.supports_transactions is False
425426
assert ReadOnlyCatalogInterface.catalog_version_frozen is True
427+
428+
429+
class TestFunctionInfoNewFields:
430+
"""Test FunctionInfo new metadata fields and serialization."""
431+
432+
def _get_empty_schema_bytes(self) -> SerializedSchema:
433+
"""Create empty serialized schema for tests."""
434+
import pyarrow as pa
435+
436+
empty_schema = pa.schema([])
437+
return SerializedSchema(empty_schema.serialize().to_pybytes())
438+
439+
def test_default_values(self) -> None:
440+
"""Create FunctionInfo with only required fields, verify defaults."""
441+
from vgi.catalog import FunctionType
442+
from vgi.catalog.catalog_interface import (
443+
DistinctDependence,
444+
FunctionStability,
445+
NullHandling,
446+
OrderDependence,
447+
OrderPreservation,
448+
)
449+
450+
schema_bytes = self._get_empty_schema_bytes()
451+
info = FunctionInfo(
452+
name="test_func",
453+
schema_name="main",
454+
function_type=FunctionType.SCALAR,
455+
arguments=schema_bytes,
456+
output_schema=schema_bytes,
457+
comment=None,
458+
tags={},
459+
)
460+
461+
# Behavior fields
462+
assert info.stability == FunctionStability.CONSISTENT
463+
assert info.null_handling == NullHandling.DEFAULT
464+
465+
# Documentation fields
466+
assert info.examples == []
467+
assert info.categories == []
468+
469+
# Table function capabilities
470+
assert info.projection_pushdown is True
471+
assert info.filter_pushdown is False
472+
assert info.order_preservation == OrderPreservation.PRESERVES_ORDER
473+
assert info.max_workers is None
474+
475+
# Aggregate function fields
476+
assert info.order_dependent == OrderDependence.NOT_ORDER_DEPENDENT
477+
assert info.distinct_dependent == DistinctDependence.NOT_DISTINCT_DEPENDENT
478+
479+
# Settings
480+
assert info.required_settings == []
481+
482+
def test_serialization_roundtrip_with_all_fields(self) -> None:
483+
"""Serialize and deserialize FunctionInfo with all new fields set."""
484+
from vgi.catalog import FunctionType
485+
from vgi.catalog.catalog_interface import (
486+
DistinctDependence,
487+
FunctionStability,
488+
NullHandling,
489+
OrderDependence,
490+
OrderPreservation,
491+
)
492+
from vgi.ipc_utils import deserialize_record_batch
493+
494+
schema_bytes = self._get_empty_schema_bytes()
495+
info = FunctionInfo(
496+
name="test_func",
497+
schema_name="main",
498+
function_type=FunctionType.SCALAR,
499+
arguments=schema_bytes,
500+
output_schema=schema_bytes,
501+
comment=None,
502+
tags={},
503+
stability=FunctionStability.VOLATILE,
504+
null_handling=NullHandling.SPECIAL,
505+
examples=["SELECT test_func(1)", "SELECT test_func(2)"],
506+
categories=["math", "utility"],
507+
projection_pushdown=False,
508+
filter_pushdown=True,
509+
order_preservation=OrderPreservation.NO_ORDER_GUARANTEE,
510+
max_workers=4,
511+
order_dependent=OrderDependence.ORDER_DEPENDENT,
512+
distinct_dependent=DistinctDependence.DISTINCT_DEPENDENT,
513+
required_settings=["vgi_debug", "vgi_verbose"],
514+
)
515+
516+
# Serialize
517+
serialized = info.serialize()
518+
assert isinstance(serialized, bytes)
519+
520+
# Deserialize
521+
batch = deserialize_record_batch(serialized)
522+
restored = FunctionInfo.deserialize(batch)
523+
524+
# Verify all fields match
525+
assert restored.name == info.name
526+
assert restored.schema_name == info.schema_name
527+
assert restored.function_type == info.function_type
528+
assert restored.arguments == info.arguments
529+
assert restored.output_schema == info.output_schema
530+
assert restored.comment == info.comment
531+
assert restored.tags == info.tags
532+
533+
# New fields
534+
assert restored.stability == info.stability
535+
assert restored.null_handling == info.null_handling
536+
assert restored.examples == info.examples
537+
assert restored.categories == info.categories
538+
assert restored.projection_pushdown == info.projection_pushdown
539+
assert restored.filter_pushdown == info.filter_pushdown
540+
assert restored.order_preservation == info.order_preservation
541+
assert restored.max_workers == info.max_workers
542+
assert restored.order_dependent == info.order_dependent
543+
assert restored.distinct_dependent == info.distinct_dependent
544+
assert restored.required_settings == info.required_settings
545+
546+
def test_enum_serialization(self) -> None:
547+
"""Verify enums serialize to strings and deserialize back correctly."""
548+
from vgi.catalog import FunctionType
549+
from vgi.catalog.catalog_interface import (
550+
DistinctDependence,
551+
FunctionStability,
552+
NullHandling,
553+
OrderDependence,
554+
OrderPreservation,
555+
)
556+
from vgi.ipc_utils import deserialize_record_batch
557+
558+
schema_bytes = self._get_empty_schema_bytes()
559+
info = FunctionInfo(
560+
name="test_func",
561+
schema_name="main",
562+
function_type=FunctionType.SCALAR,
563+
arguments=schema_bytes,
564+
output_schema=schema_bytes,
565+
comment=None,
566+
tags={},
567+
stability=FunctionStability.CONSISTENT_WITHIN_QUERY,
568+
null_handling=NullHandling.SPECIAL,
569+
order_preservation=OrderPreservation.NO_ORDER_GUARANTEE,
570+
order_dependent=OrderDependence.ORDER_DEPENDENT,
571+
distinct_dependent=DistinctDependence.DISTINCT_DEPENDENT,
572+
)
573+
574+
# Serialize and inspect the Arrow data
575+
serialized = info.serialize()
576+
batch = deserialize_record_batch(serialized)
577+
578+
# Verify enums were serialized as strings
579+
row = batch.to_pydict()
580+
assert row["stability"][0] == "CONSISTENT_WITHIN_QUERY"
581+
assert row["null_handling"][0] == "SPECIAL"
582+
assert row["order_preservation"][0] == "NO_ORDER_GUARANTEE"
583+
assert row["order_dependent"][0] == "ORDER_DEPENDENT"
584+
assert row["distinct_dependent"][0] == "DISTINCT_DEPENDENT"
585+
586+
# Verify deserialization produces correct enum values
587+
restored = FunctionInfo.deserialize(batch)
588+
assert restored.stability == FunctionStability.CONSISTENT_WITHIN_QUERY
589+
assert restored.null_handling == NullHandling.SPECIAL
590+
assert restored.order_preservation == OrderPreservation.NO_ORDER_GUARANTEE
591+
assert restored.order_dependent == OrderDependence.ORDER_DEPENDENT
592+
assert restored.distinct_dependent == DistinctDependence.DISTINCT_DEPENDENT
593+
594+
def test_backward_compatibility_without_new_fields(self) -> None:
595+
"""Deserialize data that was serialized without new fields (legacy data)."""
596+
import pyarrow as pa
597+
598+
from vgi.catalog import FunctionInfo, FunctionType
599+
from vgi.catalog.catalog_interface import (
600+
DistinctDependence,
601+
FunctionStability,
602+
NullHandling,
603+
OrderDependence,
604+
OrderPreservation,
605+
)
606+
607+
# Create legacy schema without new fields
608+
empty_schema = pa.schema([])
609+
empty_schema_bytes = empty_schema.serialize().to_pybytes()
610+
611+
legacy_fields: list[pa.Field[pa.DataType]] = [
612+
pa.field("name", pa.string(), nullable=False),
613+
pa.field("schema_name", pa.string(), nullable=False),
614+
pa.field("function_type", pa.string(), nullable=False),
615+
pa.field("arguments", pa.binary(), nullable=False),
616+
pa.field("output_schema", pa.binary(), nullable=False),
617+
pa.field("comment", pa.string(), nullable=True),
618+
pa.field("tags", pa.map_(pa.string(), pa.string()), nullable=False),
619+
]
620+
legacy_schema = pa.schema(legacy_fields)
621+
622+
# Create legacy batch (without new fields)
623+
legacy_batch = pa.RecordBatch.from_pylist(
624+
[
625+
{
626+
"name": "legacy_func",
627+
"schema_name": "main",
628+
"function_type": "scalar",
629+
"arguments": empty_schema_bytes,
630+
"output_schema": empty_schema_bytes,
631+
"comment": "A legacy function",
632+
"tags": {"version": "1.0"},
633+
}
634+
],
635+
schema=legacy_schema,
636+
)
637+
638+
# Deserialize - should use defaults for missing fields
639+
restored = FunctionInfo.deserialize(legacy_batch)
640+
641+
# Core fields should be preserved
642+
assert restored.name == "legacy_func"
643+
assert restored.schema_name == "main"
644+
assert restored.function_type == FunctionType.SCALAR
645+
assert restored.comment == "A legacy function"
646+
assert restored.tags == {"version": "1.0"}
647+
648+
# New fields should have defaults
649+
assert restored.stability == FunctionStability.CONSISTENT
650+
assert restored.null_handling == NullHandling.DEFAULT
651+
assert restored.examples == []
652+
assert restored.categories == []
653+
assert restored.projection_pushdown is True
654+
assert restored.filter_pushdown is False
655+
assert restored.order_preservation == OrderPreservation.PRESERVES_ORDER
656+
assert restored.max_workers is None
657+
assert restored.order_dependent == OrderDependence.NOT_ORDER_DEPENDENT
658+
assert restored.distinct_dependent == DistinctDependence.NOT_DISTINCT_DEPENDENT
659+
assert restored.required_settings == []
660+
661+
def test_max_workers_nullable(self) -> None:
662+
"""Verify max_workers can be None or an integer."""
663+
from vgi.catalog import FunctionType
664+
from vgi.ipc_utils import deserialize_record_batch
665+
666+
schema_bytes = self._get_empty_schema_bytes()
667+
668+
# Test with None
669+
info_none = FunctionInfo(
670+
name="test_func",
671+
schema_name="main",
672+
function_type=FunctionType.SCALAR,
673+
arguments=schema_bytes,
674+
output_schema=schema_bytes,
675+
comment=None,
676+
tags={},
677+
max_workers=None,
678+
)
679+
assert info_none.max_workers is None
680+
681+
serialized = info_none.serialize()
682+
batch = deserialize_record_batch(serialized)
683+
restored = FunctionInfo.deserialize(batch)
684+
assert restored.max_workers is None
685+
686+
# Test with integer
687+
info_int = FunctionInfo(
688+
name="test_func",
689+
schema_name="main",
690+
function_type=FunctionType.SCALAR,
691+
arguments=schema_bytes,
692+
output_schema=schema_bytes,
693+
comment=None,
694+
tags={},
695+
max_workers=8,
696+
)
697+
assert info_int.max_workers == 8
698+
699+
serialized = info_int.serialize()
700+
batch = deserialize_record_batch(serialized)
701+
restored = FunctionInfo.deserialize(batch)
702+
assert restored.max_workers == 8
703+
704+
def test_list_fields_serialization(self) -> None:
705+
"""Verify list fields serialize and deserialize correctly."""
706+
from vgi.catalog import FunctionType
707+
from vgi.ipc_utils import deserialize_record_batch
708+
709+
schema_bytes = self._get_empty_schema_bytes()
710+
info = FunctionInfo(
711+
name="test_func",
712+
schema_name="main",
713+
function_type=FunctionType.SCALAR,
714+
arguments=schema_bytes,
715+
output_schema=schema_bytes,
716+
comment=None,
717+
tags={},
718+
examples=["SELECT f(1)", "SELECT f(2)", "SELECT f(3)"],
719+
categories=["a", "b"],
720+
required_settings=["setting1"],
721+
)
722+
723+
serialized = info.serialize()
724+
batch = deserialize_record_batch(serialized)
725+
restored = FunctionInfo.deserialize(batch)
726+
727+
assert restored.examples == ["SELECT f(1)", "SELECT f(2)", "SELECT f(3)"]
728+
assert restored.categories == ["a", "b"]
729+
assert restored.required_settings == ["setting1"]
730+
731+
def test_empty_list_fields(self) -> None:
732+
"""Verify empty list fields serialize and deserialize correctly."""
733+
from vgi.catalog import FunctionType
734+
from vgi.ipc_utils import deserialize_record_batch
735+
736+
schema_bytes = self._get_empty_schema_bytes()
737+
info = FunctionInfo(
738+
name="test_func",
739+
schema_name="main",
740+
function_type=FunctionType.SCALAR,
741+
arguments=schema_bytes,
742+
output_schema=schema_bytes,
743+
comment=None,
744+
tags={},
745+
examples=[],
746+
categories=[],
747+
required_settings=[],
748+
)
749+
750+
serialized = info.serialize()
751+
batch = deserialize_record_batch(serialized)
752+
restored = FunctionInfo.deserialize(batch)
753+
754+
assert restored.examples == []
755+
assert restored.categories == []
756+
assert restored.required_settings == []

0 commit comments

Comments
 (0)