|
5 | 5 |
|
6 | 6 | AVAILABLE FUNCTIONS |
7 | 7 | ------------------- |
8 | | -SequenceFunction - Generates a sequence of integers 0..n-1 |
9 | | -RangeFunction - Generates integers in a start..end range |
| 8 | +ConstantColumnsFunction - Demonstrates varargs with dynamic output schema |
10 | 9 | ConstantTableFunction - Returns a constant single-row table |
11 | | -RandomSampleFunction - Generates random sample data (parallelizable) |
12 | 10 | GeneratorExceptionFunction - Demonstrates exception handling |
| 11 | +LoggingGeneratorFunction - Demonstrates log message emission |
| 12 | +PartitionedSequenceFunction - Demonstrates multi-worker parallel execution |
| 13 | +ProjectedDataFunction - Demonstrates projection pushdown |
| 14 | +SequenceFunction - Generates a sequence of integers 0..n-1 |
| 15 | +SettingsAwareFunction - Demonstrates settings-aware output schema |
13 | 16 | """ |
14 | 17 |
|
15 | 18 | import struct |
16 | | -from typing import Annotated, ClassVar, cast |
| 19 | +from typing import Annotated, Any, ClassVar, cast |
17 | 20 |
|
18 | 21 | import numpy as np |
19 | 22 | import pyarrow as pa |
|
31 | 34 | ) |
32 | 35 |
|
33 | 36 | __all__ = [ |
34 | | - "SequenceFunction", |
| 37 | + "ConstantColumnsFunction", |
35 | 38 | "ConstantTableFunction", |
36 | 39 | "GeneratorExceptionFunction", |
37 | 40 | "LoggingGeneratorFunction", |
38 | 41 | "PartitionedSequenceFunction", |
39 | 42 | "ProjectedDataFunction", |
| 43 | + "SequenceFunction", |
40 | 44 | "SettingsAwareFunction", |
41 | 45 | ] |
42 | 46 |
|
@@ -601,3 +605,109 @@ def process(self) -> OutputGenerator: |
601 | 605 | data["details"] = [f"row_{i}"] |
602 | 606 |
|
603 | 607 | yield Output(pa.RecordBatch.from_pydict(data, schema=output_schema)) |
| 608 | + |
| 609 | + |
| 610 | +class ConstantColumnsFunction(TableFunctionGenerator): |
| 611 | + """Generates a table with constant values in each column based on varargs. |
| 612 | +
|
| 613 | + USE CASE |
| 614 | + -------- |
| 615 | + Demonstrates varargs with AnyArrow type where the output schema is |
| 616 | + determined by the types of the values provided. Each vararg value |
| 617 | + becomes a column filled with that constant value for all rows. |
| 618 | +
|
| 619 | + This shows how varargs can accept mixed types and produce a dynamic |
| 620 | + output schema based on the argument types. |
| 621 | +
|
| 622 | + SCHEMA |
| 623 | + ------ |
| 624 | + Output schema is dynamic based on the types of provided values. |
| 625 | + Column names are auto-generated as col_0, col_1, col_2, etc. |
| 626 | +
|
| 627 | + Example: constant_columns(3, 42, 'hello', 3.14) |
| 628 | + Output schema: {"col_0": int64, "col_1": string, "col_2": double} |
| 629 | +
|
| 630 | + PARALLELIZATION |
| 631 | + --------------- |
| 632 | + Single worker only (max_workers=1). |
| 633 | +
|
| 634 | + Example: |
| 635 | + ------- |
| 636 | + SELECT * FROM constant_columns(3, 42, 'hello') |
| 637 | + Returns: [{"col_0": 42, "col_1": "hello"}, |
| 638 | + {"col_0": 42, "col_1": "hello"}, |
| 639 | + {"col_0": 42, "col_1": "hello"}] |
| 640 | +
|
| 641 | + SELECT * FROM constant_columns(2, 1, 2, 3, 'apple') |
| 642 | + Returns: [{"col_0": 1, "col_1": 2, "col_2": 3, "col_3": "apple"}, |
| 643 | + {"col_0": 1, "col_1": 2, "col_2": 3, "col_3": "apple"}] |
| 644 | +
|
| 645 | + """ |
| 646 | + |
| 647 | + class Meta: |
| 648 | + """Metadata for ConstantColumnsFunction.""" |
| 649 | + |
| 650 | + name = "constant_columns" |
| 651 | + description = "Generates rows with constant values from varargs" |
| 652 | + categories = ["generator", "utility"] |
| 653 | + max_workers = 1 |
| 654 | + examples = [ |
| 655 | + FunctionExample( |
| 656 | + sql="SELECT * FROM constant_columns(5, 42, 'hello')", |
| 657 | + description="Generate 5 rows with columns containing 42 and 'hello'", |
| 658 | + ), |
| 659 | + FunctionExample( |
| 660 | + sql="SELECT * FROM constant_columns(3, 1, 2, 3, 'test')", |
| 661 | + description="Generate 3 rows with 4 columns of mixed types", |
| 662 | + ), |
| 663 | + ] |
| 664 | + |
| 665 | + count: Annotated[int, Arg(0, doc="Number of rows to generate", ge=0)] |
| 666 | + values: Annotated[ |
| 667 | + tuple[Any, ...], |
| 668 | + Arg(1, varargs=True, doc="Values to fill each column (at least one required)"), |
| 669 | + ] |
| 670 | + |
| 671 | + # Store Arrow scalars for type information |
| 672 | + _value_scalars: list[Any] |
| 673 | + |
| 674 | + BATCH_SIZE: int = 1000 |
| 675 | + |
| 676 | + def bind(self) -> None: |
| 677 | + """Extract Arrow scalars from positional arguments for type info.""" |
| 678 | + # Access raw Arrow scalars to preserve type information |
| 679 | + positional = self.invocation.arguments.positional |
| 680 | + # Filter to non-None scalars (varargs validation ensures no nulls) |
| 681 | + self._value_scalars = [s for s in positional[1:] if s is not None] |
| 682 | + |
| 683 | + @property |
| 684 | + def output_schema(self) -> pa.Schema: |
| 685 | + """Return output schema with one column per vararg, typed by value.""" |
| 686 | + fields = [ |
| 687 | + pa.field(f"col_{i}", scalar.type) |
| 688 | + for i, scalar in enumerate(self._value_scalars) |
| 689 | + ] |
| 690 | + return pa.schema(fields) |
| 691 | + |
| 692 | + @property |
| 693 | + def cardinality(self) -> TableCardinality: |
| 694 | + """Return exact cardinality since we know the count.""" |
| 695 | + return TableCardinality(estimate=self.count, max=self.count) |
| 696 | + |
| 697 | + def process(self) -> OutputGenerator: |
| 698 | + """Generate rows with constant values in each column.""" |
| 699 | + output_schema = self.output_schema |
| 700 | + remaining = self.count |
| 701 | + |
| 702 | + while remaining > 0: |
| 703 | + batch_size = min(remaining, self.BATCH_SIZE) |
| 704 | + |
| 705 | + # Create arrays filled with constant values |
| 706 | + arrays = [ |
| 707 | + pa.array([scalar.as_py()] * batch_size, type=scalar.type) |
| 708 | + for scalar in self._value_scalars |
| 709 | + ] |
| 710 | + |
| 711 | + yield Output(pa.RecordBatch.from_arrays(arrays, schema=output_schema)) |
| 712 | + |
| 713 | + remaining -= batch_size |
0 commit comments