Skip to content

Commit 0306a95

Browse files
committed
improve docs
1 parent 80f7046 commit 0306a95

4 files changed

Lines changed: 240 additions & 194 deletions

File tree

CLAUDE.md

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -115,21 +115,17 @@ import pyarrow.compute as pc
115115
from vgi import ScalarFunction, Arg
116116
from vgi.arguments import AnyArrow
117117

118-
class DoubleColumn(ScalarFunction):
119-
"""Double the value in a specified column."""
118+
class AddColumns(ScalarFunction):
119+
"""Add two integer columns together."""
120120

121121
class Meta:
122-
output_type = AnyArrow # Output type depends on input column
123-
124-
column = Arg[str](0, doc="Column to double")
122+
output_type = pa.int64()
125123

126-
@property
127-
def output_type(self) -> pa.DataType:
128-
# Output type matches input column type
129-
return self.input_schema.field(self.column).type
124+
left = Arg[AnyArrow](0, type_bound=pa.types.is_integer, doc="First column")
125+
right = Arg[AnyArrow](1, type_bound=pa.types.is_integer, doc="Second column")
130126

131127
def compute(self, batch: pa.RecordBatch) -> pa.Array:
132-
return pc.multiply(batch.column(self.column), 2)
128+
return pc.add(batch.column(self.left.value), batch.column(self.right.value))
133129
```
134130

135131
### Key Constraints for Scalar Functions:
@@ -299,7 +295,7 @@ from vgi.log import Level
299295
class MyFunction(TableInOutFunction):
300296
count = Arg[int](0) # Required positional
301297
multiplier = Arg[int](1, default=1) # Optional positional
302-
column = Arg[str]("column") # Required named
298+
target = Arg[str]("target") # Required named
303299
format = Arg[str]("format", default="json") # Optional named
304300
```
305301

@@ -336,19 +332,19 @@ class AddColumns(ScalarFunction):
336332
class Meta:
337333
output_type = AnyArrow # Output type depends on input columns
338334

339-
col1 = Arg[AnyArrow](0, type_bound=pa.types.is_numeric)
340-
col2 = Arg[AnyArrow](1, type_bound=pa.types.is_numeric)
335+
left = Arg[AnyArrow](0, type_bound=[pa.types.is_integer, pa.types.is_floating])
336+
right = Arg[AnyArrow](1, type_bound=[pa.types.is_integer, pa.types.is_floating])
341337

342338
def bind(self) -> None:
343339
"""Compute output type from input columns."""
344-
self._output_type = self.input_schema.field(self.col1.value).type
340+
self._output_type = self.input_schema.field(self.left.value).type
345341

346342
@property
347343
def output_type(self) -> pa.DataType:
348344
return self._output_type
349345

350346
def compute(self, batch: pa.RecordBatch) -> pa.Array:
351-
return pc.add(batch.column(self.col1.value), batch.column(self.col2.value))
347+
return pc.add(batch.column(self.left.value), batch.column(self.right.value))
352348
```
353349

354350
### Parallel Execution and bind() State

0 commit comments

Comments
 (0)