|
9 | 9 | AttachId, |
10 | 10 | CatalogAttachResult, |
11 | 11 | CatalogInterface, |
| 12 | + FunctionInfo, |
12 | 13 | OnConflict, |
13 | 14 | SchemaInfo, |
14 | 15 | SerializedSchema, |
@@ -423,3 +424,333 @@ def test_class_attributes(self) -> None: |
423 | 424 | """ReadOnlyCatalogInterface has correct class attributes.""" |
424 | 425 | assert ReadOnlyCatalogInterface.supports_transactions is False |
425 | 426 | assert ReadOnlyCatalogInterface.catalog_version_frozen is True |
| 427 | + |
| 428 | + |
| 429 | +class TestFunctionInfoNewFields: |
| 430 | + """Test FunctionInfo new metadata fields and serialization.""" |
| 431 | + |
| 432 | + def _get_empty_schema_bytes(self) -> SerializedSchema: |
| 433 | + """Create empty serialized schema for tests.""" |
| 434 | + import pyarrow as pa |
| 435 | + |
| 436 | + empty_schema = pa.schema([]) |
| 437 | + return SerializedSchema(empty_schema.serialize().to_pybytes()) |
| 438 | + |
| 439 | + def test_default_values(self) -> None: |
| 440 | + """Create FunctionInfo with only required fields, verify defaults.""" |
| 441 | + from vgi.catalog import FunctionType |
| 442 | + from vgi.catalog.catalog_interface import ( |
| 443 | + DistinctDependence, |
| 444 | + FunctionStability, |
| 445 | + NullHandling, |
| 446 | + OrderDependence, |
| 447 | + OrderPreservation, |
| 448 | + ) |
| 449 | + |
| 450 | + schema_bytes = self._get_empty_schema_bytes() |
| 451 | + info = FunctionInfo( |
| 452 | + name="test_func", |
| 453 | + schema_name="main", |
| 454 | + function_type=FunctionType.SCALAR, |
| 455 | + arguments=schema_bytes, |
| 456 | + output_schema=schema_bytes, |
| 457 | + comment=None, |
| 458 | + tags={}, |
| 459 | + ) |
| 460 | + |
| 461 | + # Behavior fields |
| 462 | + assert info.stability == FunctionStability.CONSISTENT |
| 463 | + assert info.null_handling == NullHandling.DEFAULT |
| 464 | + |
| 465 | + # Documentation fields |
| 466 | + assert info.examples == [] |
| 467 | + assert info.categories == [] |
| 468 | + |
| 469 | + # Table function capabilities |
| 470 | + assert info.projection_pushdown is True |
| 471 | + assert info.filter_pushdown is False |
| 472 | + assert info.order_preservation == OrderPreservation.PRESERVES_ORDER |
| 473 | + assert info.max_workers is None |
| 474 | + |
| 475 | + # Aggregate function fields |
| 476 | + assert info.order_dependent == OrderDependence.NOT_ORDER_DEPENDENT |
| 477 | + assert info.distinct_dependent == DistinctDependence.NOT_DISTINCT_DEPENDENT |
| 478 | + |
| 479 | + # Settings |
| 480 | + assert info.required_settings == [] |
| 481 | + |
| 482 | + def test_serialization_roundtrip_with_all_fields(self) -> None: |
| 483 | + """Serialize and deserialize FunctionInfo with all new fields set.""" |
| 484 | + from vgi.catalog import FunctionType |
| 485 | + from vgi.catalog.catalog_interface import ( |
| 486 | + DistinctDependence, |
| 487 | + FunctionStability, |
| 488 | + NullHandling, |
| 489 | + OrderDependence, |
| 490 | + OrderPreservation, |
| 491 | + ) |
| 492 | + from vgi.ipc_utils import deserialize_record_batch |
| 493 | + |
| 494 | + schema_bytes = self._get_empty_schema_bytes() |
| 495 | + info = FunctionInfo( |
| 496 | + name="test_func", |
| 497 | + schema_name="main", |
| 498 | + function_type=FunctionType.SCALAR, |
| 499 | + arguments=schema_bytes, |
| 500 | + output_schema=schema_bytes, |
| 501 | + comment=None, |
| 502 | + tags={}, |
| 503 | + stability=FunctionStability.VOLATILE, |
| 504 | + null_handling=NullHandling.SPECIAL, |
| 505 | + examples=["SELECT test_func(1)", "SELECT test_func(2)"], |
| 506 | + categories=["math", "utility"], |
| 507 | + projection_pushdown=False, |
| 508 | + filter_pushdown=True, |
| 509 | + order_preservation=OrderPreservation.NO_ORDER_GUARANTEE, |
| 510 | + max_workers=4, |
| 511 | + order_dependent=OrderDependence.ORDER_DEPENDENT, |
| 512 | + distinct_dependent=DistinctDependence.DISTINCT_DEPENDENT, |
| 513 | + required_settings=["vgi_debug", "vgi_verbose"], |
| 514 | + ) |
| 515 | + |
| 516 | + # Serialize |
| 517 | + serialized = info.serialize() |
| 518 | + assert isinstance(serialized, bytes) |
| 519 | + |
| 520 | + # Deserialize |
| 521 | + batch = deserialize_record_batch(serialized) |
| 522 | + restored = FunctionInfo.deserialize(batch) |
| 523 | + |
| 524 | + # Verify all fields match |
| 525 | + assert restored.name == info.name |
| 526 | + assert restored.schema_name == info.schema_name |
| 527 | + assert restored.function_type == info.function_type |
| 528 | + assert restored.arguments == info.arguments |
| 529 | + assert restored.output_schema == info.output_schema |
| 530 | + assert restored.comment == info.comment |
| 531 | + assert restored.tags == info.tags |
| 532 | + |
| 533 | + # New fields |
| 534 | + assert restored.stability == info.stability |
| 535 | + assert restored.null_handling == info.null_handling |
| 536 | + assert restored.examples == info.examples |
| 537 | + assert restored.categories == info.categories |
| 538 | + assert restored.projection_pushdown == info.projection_pushdown |
| 539 | + assert restored.filter_pushdown == info.filter_pushdown |
| 540 | + assert restored.order_preservation == info.order_preservation |
| 541 | + assert restored.max_workers == info.max_workers |
| 542 | + assert restored.order_dependent == info.order_dependent |
| 543 | + assert restored.distinct_dependent == info.distinct_dependent |
| 544 | + assert restored.required_settings == info.required_settings |
| 545 | + |
| 546 | + def test_enum_serialization(self) -> None: |
| 547 | + """Verify enums serialize to strings and deserialize back correctly.""" |
| 548 | + from vgi.catalog import FunctionType |
| 549 | + from vgi.catalog.catalog_interface import ( |
| 550 | + DistinctDependence, |
| 551 | + FunctionStability, |
| 552 | + NullHandling, |
| 553 | + OrderDependence, |
| 554 | + OrderPreservation, |
| 555 | + ) |
| 556 | + from vgi.ipc_utils import deserialize_record_batch |
| 557 | + |
| 558 | + schema_bytes = self._get_empty_schema_bytes() |
| 559 | + info = FunctionInfo( |
| 560 | + name="test_func", |
| 561 | + schema_name="main", |
| 562 | + function_type=FunctionType.SCALAR, |
| 563 | + arguments=schema_bytes, |
| 564 | + output_schema=schema_bytes, |
| 565 | + comment=None, |
| 566 | + tags={}, |
| 567 | + stability=FunctionStability.CONSISTENT_WITHIN_QUERY, |
| 568 | + null_handling=NullHandling.SPECIAL, |
| 569 | + order_preservation=OrderPreservation.NO_ORDER_GUARANTEE, |
| 570 | + order_dependent=OrderDependence.ORDER_DEPENDENT, |
| 571 | + distinct_dependent=DistinctDependence.DISTINCT_DEPENDENT, |
| 572 | + ) |
| 573 | + |
| 574 | + # Serialize and inspect the Arrow data |
| 575 | + serialized = info.serialize() |
| 576 | + batch = deserialize_record_batch(serialized) |
| 577 | + |
| 578 | + # Verify enums were serialized as strings |
| 579 | + row = batch.to_pydict() |
| 580 | + assert row["stability"][0] == "CONSISTENT_WITHIN_QUERY" |
| 581 | + assert row["null_handling"][0] == "SPECIAL" |
| 582 | + assert row["order_preservation"][0] == "NO_ORDER_GUARANTEE" |
| 583 | + assert row["order_dependent"][0] == "ORDER_DEPENDENT" |
| 584 | + assert row["distinct_dependent"][0] == "DISTINCT_DEPENDENT" |
| 585 | + |
| 586 | + # Verify deserialization produces correct enum values |
| 587 | + restored = FunctionInfo.deserialize(batch) |
| 588 | + assert restored.stability == FunctionStability.CONSISTENT_WITHIN_QUERY |
| 589 | + assert restored.null_handling == NullHandling.SPECIAL |
| 590 | + assert restored.order_preservation == OrderPreservation.NO_ORDER_GUARANTEE |
| 591 | + assert restored.order_dependent == OrderDependence.ORDER_DEPENDENT |
| 592 | + assert restored.distinct_dependent == DistinctDependence.DISTINCT_DEPENDENT |
| 593 | + |
| 594 | + def test_backward_compatibility_without_new_fields(self) -> None: |
| 595 | + """Deserialize data that was serialized without new fields (legacy data).""" |
| 596 | + import pyarrow as pa |
| 597 | + |
| 598 | + from vgi.catalog import FunctionInfo, FunctionType |
| 599 | + from vgi.catalog.catalog_interface import ( |
| 600 | + DistinctDependence, |
| 601 | + FunctionStability, |
| 602 | + NullHandling, |
| 603 | + OrderDependence, |
| 604 | + OrderPreservation, |
| 605 | + ) |
| 606 | + |
| 607 | + # Create legacy schema without new fields |
| 608 | + empty_schema = pa.schema([]) |
| 609 | + empty_schema_bytes = empty_schema.serialize().to_pybytes() |
| 610 | + |
| 611 | + legacy_fields: list[pa.Field[pa.DataType]] = [ |
| 612 | + pa.field("name", pa.string(), nullable=False), |
| 613 | + pa.field("schema_name", pa.string(), nullable=False), |
| 614 | + pa.field("function_type", pa.string(), nullable=False), |
| 615 | + pa.field("arguments", pa.binary(), nullable=False), |
| 616 | + pa.field("output_schema", pa.binary(), nullable=False), |
| 617 | + pa.field("comment", pa.string(), nullable=True), |
| 618 | + pa.field("tags", pa.map_(pa.string(), pa.string()), nullable=False), |
| 619 | + ] |
| 620 | + legacy_schema = pa.schema(legacy_fields) |
| 621 | + |
| 622 | + # Create legacy batch (without new fields) |
| 623 | + legacy_batch = pa.RecordBatch.from_pylist( |
| 624 | + [ |
| 625 | + { |
| 626 | + "name": "legacy_func", |
| 627 | + "schema_name": "main", |
| 628 | + "function_type": "scalar", |
| 629 | + "arguments": empty_schema_bytes, |
| 630 | + "output_schema": empty_schema_bytes, |
| 631 | + "comment": "A legacy function", |
| 632 | + "tags": {"version": "1.0"}, |
| 633 | + } |
| 634 | + ], |
| 635 | + schema=legacy_schema, |
| 636 | + ) |
| 637 | + |
| 638 | + # Deserialize - should use defaults for missing fields |
| 639 | + restored = FunctionInfo.deserialize(legacy_batch) |
| 640 | + |
| 641 | + # Core fields should be preserved |
| 642 | + assert restored.name == "legacy_func" |
| 643 | + assert restored.schema_name == "main" |
| 644 | + assert restored.function_type == FunctionType.SCALAR |
| 645 | + assert restored.comment == "A legacy function" |
| 646 | + assert restored.tags == {"version": "1.0"} |
| 647 | + |
| 648 | + # New fields should have defaults |
| 649 | + assert restored.stability == FunctionStability.CONSISTENT |
| 650 | + assert restored.null_handling == NullHandling.DEFAULT |
| 651 | + assert restored.examples == [] |
| 652 | + assert restored.categories == [] |
| 653 | + assert restored.projection_pushdown is True |
| 654 | + assert restored.filter_pushdown is False |
| 655 | + assert restored.order_preservation == OrderPreservation.PRESERVES_ORDER |
| 656 | + assert restored.max_workers is None |
| 657 | + assert restored.order_dependent == OrderDependence.NOT_ORDER_DEPENDENT |
| 658 | + assert restored.distinct_dependent == DistinctDependence.NOT_DISTINCT_DEPENDENT |
| 659 | + assert restored.required_settings == [] |
| 660 | + |
| 661 | + def test_max_workers_nullable(self) -> None: |
| 662 | + """Verify max_workers can be None or an integer.""" |
| 663 | + from vgi.catalog import FunctionType |
| 664 | + from vgi.ipc_utils import deserialize_record_batch |
| 665 | + |
| 666 | + schema_bytes = self._get_empty_schema_bytes() |
| 667 | + |
| 668 | + # Test with None |
| 669 | + info_none = FunctionInfo( |
| 670 | + name="test_func", |
| 671 | + schema_name="main", |
| 672 | + function_type=FunctionType.SCALAR, |
| 673 | + arguments=schema_bytes, |
| 674 | + output_schema=schema_bytes, |
| 675 | + comment=None, |
| 676 | + tags={}, |
| 677 | + max_workers=None, |
| 678 | + ) |
| 679 | + assert info_none.max_workers is None |
| 680 | + |
| 681 | + serialized = info_none.serialize() |
| 682 | + batch = deserialize_record_batch(serialized) |
| 683 | + restored = FunctionInfo.deserialize(batch) |
| 684 | + assert restored.max_workers is None |
| 685 | + |
| 686 | + # Test with integer |
| 687 | + info_int = FunctionInfo( |
| 688 | + name="test_func", |
| 689 | + schema_name="main", |
| 690 | + function_type=FunctionType.SCALAR, |
| 691 | + arguments=schema_bytes, |
| 692 | + output_schema=schema_bytes, |
| 693 | + comment=None, |
| 694 | + tags={}, |
| 695 | + max_workers=8, |
| 696 | + ) |
| 697 | + assert info_int.max_workers == 8 |
| 698 | + |
| 699 | + serialized = info_int.serialize() |
| 700 | + batch = deserialize_record_batch(serialized) |
| 701 | + restored = FunctionInfo.deserialize(batch) |
| 702 | + assert restored.max_workers == 8 |
| 703 | + |
| 704 | + def test_list_fields_serialization(self) -> None: |
| 705 | + """Verify list fields serialize and deserialize correctly.""" |
| 706 | + from vgi.catalog import FunctionType |
| 707 | + from vgi.ipc_utils import deserialize_record_batch |
| 708 | + |
| 709 | + schema_bytes = self._get_empty_schema_bytes() |
| 710 | + info = FunctionInfo( |
| 711 | + name="test_func", |
| 712 | + schema_name="main", |
| 713 | + function_type=FunctionType.SCALAR, |
| 714 | + arguments=schema_bytes, |
| 715 | + output_schema=schema_bytes, |
| 716 | + comment=None, |
| 717 | + tags={}, |
| 718 | + examples=["SELECT f(1)", "SELECT f(2)", "SELECT f(3)"], |
| 719 | + categories=["a", "b"], |
| 720 | + required_settings=["setting1"], |
| 721 | + ) |
| 722 | + |
| 723 | + serialized = info.serialize() |
| 724 | + batch = deserialize_record_batch(serialized) |
| 725 | + restored = FunctionInfo.deserialize(batch) |
| 726 | + |
| 727 | + assert restored.examples == ["SELECT f(1)", "SELECT f(2)", "SELECT f(3)"] |
| 728 | + assert restored.categories == ["a", "b"] |
| 729 | + assert restored.required_settings == ["setting1"] |
| 730 | + |
| 731 | + def test_empty_list_fields(self) -> None: |
| 732 | + """Verify empty list fields serialize and deserialize correctly.""" |
| 733 | + from vgi.catalog import FunctionType |
| 734 | + from vgi.ipc_utils import deserialize_record_batch |
| 735 | + |
| 736 | + schema_bytes = self._get_empty_schema_bytes() |
| 737 | + info = FunctionInfo( |
| 738 | + name="test_func", |
| 739 | + schema_name="main", |
| 740 | + function_type=FunctionType.SCALAR, |
| 741 | + arguments=schema_bytes, |
| 742 | + output_schema=schema_bytes, |
| 743 | + comment=None, |
| 744 | + tags={}, |
| 745 | + examples=[], |
| 746 | + categories=[], |
| 747 | + required_settings=[], |
| 748 | + ) |
| 749 | + |
| 750 | + serialized = info.serialize() |
| 751 | + batch = deserialize_record_batch(serialized) |
| 752 | + restored = FunctionInfo.deserialize(batch) |
| 753 | + |
| 754 | + assert restored.examples == [] |
| 755 | + assert restored.categories == [] |
| 756 | + assert restored.required_settings == [] |
0 commit comments