Skip to content

Commit 3bfa6a5

Browse files
rustyconoverclaude
andcommitted
Add ConstantColumnsFunction example demonstrating varargs with any type
New table function that takes values of any type as varargs and generates rows where each column is filled with its corresponding constant value. The output schema is dynamic based on the types of the provided values. Example: constant_columns(3, 42, 'hello', 3.14) produces: {"col_0": 42, "col_1": "hello", "col_2": 3.14} (repeated 3 times) This demonstrates: - varargs with mixed Arrow types (AnyArrow pattern) - Dynamic output schema determined by argument types at bind time - Accessing raw Arrow scalars to preserve type information Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent b7c24b8 commit 3bfa6a5

2 files changed

Lines changed: 118 additions & 6 deletions

File tree

vgi/examples/table.py

Lines changed: 115 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,18 @@
55
66
AVAILABLE FUNCTIONS
77
-------------------
8-
SequenceFunction - Generates a sequence of integers 0..n-1
9-
RangeFunction - Generates integers in a start..end range
8+
ConstantColumnsFunction - Demonstrates varargs with dynamic output schema
109
ConstantTableFunction - Returns a constant single-row table
11-
RandomSampleFunction - Generates random sample data (parallelizable)
1210
GeneratorExceptionFunction - Demonstrates exception handling
11+
LoggingGeneratorFunction - Demonstrates log message emission
12+
PartitionedSequenceFunction - Demonstrates multi-worker parallel execution
13+
ProjectedDataFunction - Demonstrates projection pushdown
14+
SequenceFunction - Generates a sequence of integers 0..n-1
15+
SettingsAwareFunction - Demonstrates settings-aware output schema
1316
"""
1417

1518
import struct
16-
from typing import Annotated, ClassVar, cast
19+
from typing import Annotated, Any, ClassVar, cast
1720

1821
import numpy as np
1922
import pyarrow as pa
@@ -31,12 +34,13 @@
3134
)
3235

3336
__all__ = [
34-
"SequenceFunction",
37+
"ConstantColumnsFunction",
3538
"ConstantTableFunction",
3639
"GeneratorExceptionFunction",
3740
"LoggingGeneratorFunction",
3841
"PartitionedSequenceFunction",
3942
"ProjectedDataFunction",
43+
"SequenceFunction",
4044
"SettingsAwareFunction",
4145
]
4246

@@ -601,3 +605,109 @@ def process(self) -> OutputGenerator:
601605
data["details"] = [f"row_{i}"]
602606

603607
yield Output(pa.RecordBatch.from_pydict(data, schema=output_schema))
608+
609+
610+
class ConstantColumnsFunction(TableFunctionGenerator):
611+
"""Generates a table with constant values in each column based on varargs.
612+
613+
USE CASE
614+
--------
615+
Demonstrates varargs with AnyArrow type where the output schema is
616+
determined by the types of the values provided. Each vararg value
617+
becomes a column filled with that constant value for all rows.
618+
619+
This shows how varargs can accept mixed types and produce a dynamic
620+
output schema based on the argument types.
621+
622+
SCHEMA
623+
------
624+
Output schema is dynamic based on the types of provided values.
625+
Column names are auto-generated as col_0, col_1, col_2, etc.
626+
627+
Example: constant_columns(3, 42, 'hello', 3.14)
628+
Output schema: {"col_0": int64, "col_1": string, "col_2": double}
629+
630+
PARALLELIZATION
631+
---------------
632+
Single worker only (max_workers=1).
633+
634+
Example:
635+
-------
636+
SELECT * FROM constant_columns(3, 42, 'hello')
637+
Returns: [{"col_0": 42, "col_1": "hello"},
638+
{"col_0": 42, "col_1": "hello"},
639+
{"col_0": 42, "col_1": "hello"}]
640+
641+
SELECT * FROM constant_columns(2, 1, 2, 3, 'apple')
642+
Returns: [{"col_0": 1, "col_1": 2, "col_2": 3, "col_3": "apple"},
643+
{"col_0": 1, "col_1": 2, "col_2": 3, "col_3": "apple"}]
644+
645+
"""
646+
647+
class Meta:
648+
"""Metadata for ConstantColumnsFunction."""
649+
650+
name = "constant_columns"
651+
description = "Generates rows with constant values from varargs"
652+
categories = ["generator", "utility"]
653+
max_workers = 1
654+
examples = [
655+
FunctionExample(
656+
sql="SELECT * FROM constant_columns(5, 42, 'hello')",
657+
description="Generate 5 rows with columns containing 42 and 'hello'",
658+
),
659+
FunctionExample(
660+
sql="SELECT * FROM constant_columns(3, 1, 2, 3, 'test')",
661+
description="Generate 3 rows with 4 columns of mixed types",
662+
),
663+
]
664+
665+
count: Annotated[int, Arg(0, doc="Number of rows to generate", ge=0)]
666+
values: Annotated[
667+
tuple[Any, ...],
668+
Arg(1, varargs=True, doc="Values to fill each column (at least one required)"),
669+
]
670+
671+
# Store Arrow scalars for type information
672+
_value_scalars: list[Any]
673+
674+
BATCH_SIZE: int = 1000
675+
676+
def bind(self) -> None:
677+
"""Extract Arrow scalars from positional arguments for type info."""
678+
# Access raw Arrow scalars to preserve type information
679+
positional = self.invocation.arguments.positional
680+
# Filter to non-None scalars (varargs validation ensures no nulls)
681+
self._value_scalars = [s for s in positional[1:] if s is not None]
682+
683+
@property
684+
def output_schema(self) -> pa.Schema:
685+
"""Return output schema with one column per vararg, typed by value."""
686+
fields = [
687+
pa.field(f"col_{i}", scalar.type)
688+
for i, scalar in enumerate(self._value_scalars)
689+
]
690+
return pa.schema(fields)
691+
692+
@property
693+
def cardinality(self) -> TableCardinality:
694+
"""Return exact cardinality since we know the count."""
695+
return TableCardinality(estimate=self.count, max=self.count)
696+
697+
def process(self) -> OutputGenerator:
698+
"""Generate rows with constant values in each column."""
699+
output_schema = self.output_schema
700+
remaining = self.count
701+
702+
while remaining > 0:
703+
batch_size = min(remaining, self.BATCH_SIZE)
704+
705+
# Create arrays filled with constant values
706+
arrays = [
707+
pa.array([scalar.as_py()] * batch_size, type=scalar.type)
708+
for scalar in self._value_scalars
709+
]
710+
711+
yield Output(pa.RecordBatch.from_arrays(arrays, schema=output_schema))
712+
713+
remaining -= batch_size

vgi/examples/worker.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
UpperCaseFunction,
2121
)
2222
from vgi.examples.table import (
23+
ConstantColumnsFunction,
2324
ConstantTableFunction,
2425
GeneratorExceptionFunction,
2526
LoggingGeneratorFunction,
@@ -63,12 +64,13 @@ class ExampleWorker(Worker):
6364
ExceptionFinalizeFunction,
6465
ExceptionProcessFunction,
6566
# TableFunctionGenerator - generate output without input
66-
SequenceFunction,
67+
ConstantColumnsFunction,
6768
ConstantTableFunction,
6869
GeneratorExceptionFunction,
6970
LoggingGeneratorFunction,
7071
PartitionedSequenceFunction,
7172
ProjectedDataFunction,
73+
SequenceFunction,
7274
SettingsAwareFunction,
7375
# ScalarFunctionGenerator - transform to single-column output
7476
AddNumericColumnsFunction,

0 commit comments

Comments
 (0)