From 2a731fc516855691d0ec80312e802e1c2ed83191 Mon Sep 17 00:00:00 2001 From: Rusty Conover Date: Sun, 4 Jan 2026 20:07:56 -0500 Subject: [PATCH 1/5] bd sync: 2026-01-04 20:07:56 --- .beads/issues.jsonl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 6dc80c4..39317c8 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -1,13 +1,25 @@ +{"id":"vgi-python-0hr","title":"Remove redundant InitInputType class attribute","description":"InitInputType class attribute duplicates the generic type parameter: 'class ScalarFunctionGenerator(Function[FunctionInitInput])' already specifies the type, but 'InitInputType = FunctionInitInput' repeats it. Investigate using get_type_hints or __orig_bases__ to infer the type and remove the redundant attribute.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:53.780529-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:37.960914-05:00"} +{"id":"vgi-python-1s5","title":"Move distributed state management to optional mixin","description":"The Function base class in function.py includes ~200 lines for distributed state management (store_state, collect_states, enqueue_work, dequeue_work, work queue storage). Not all functions need this. Extract to DistributedStateMixin that functions can opt into, keeping Function base class simpler for basic use cases.","status":"open","priority":2,"issue_type":"task","created_at":"2026-01-04T20:06:53.606614-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:37.839559-05:00"} +{"id":"vgi-python-36f","title":"Split metadata.py Arrow serialization into separate module","description":"metadata.py is 932 lines with two distinct concerns: 1) metadata resolution (enums, dataclasses, parameter extraction, resolve_metadata) and 2) Arrow serialization (schema definitions, to_arrow/from_arrow functions). Split Arrow serialization into metadata_serialization.py or metadata_arrow.py for better separation of concerns.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:53.481364-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:37.718814-05:00"} +{"id":"vgi-python-3fq","title":"Abstract common worker batch processing logic","description":"Worker batch processing methods _process_scalar_batches (377-466), _process_batches (468-550), and _generate_batches (552-593) share significant structure: IPC writer/reader setup, batch counting/logging, main processing loop. Extract common logic to reduce duplication - consider a BatchProcessor helper class or template method pattern.","status":"open","priority":2,"issue_type":"task","created_at":"2026-01-04T20:06:53.350497-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:37.598552-05:00"} +{"id":"vgi-python-5er","title":"Extract _should_terminate into shared base class","description":"Identical _should_terminate method is copy-pasted in all three function modules. Implementation is always: check if log_message exists and level is EXCEPTION. Move to shared base class (Function or new ProcessingMixin) to eliminate duplication.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:41.190482-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:16.071737-05:00","dependencies":[{"issue_id":"vgi-python-5er","depends_on_id":"vgi-python-6o0","type":"blocks","created_at":"2026-01-04T20:07:49.283865-05:00","created_by":"rusty"}]} {"id":"vgi-python-67w","title":"Create example function using DuckDB settings","description":"Create an example function that demonstrates using DuckDB settings to determine its output.\n\nRequirements:\n- Function declares required_settings in Meta\n- Output schema depends on a setting value (e.g., include extra column based on setting)\n- Clear documentation showing the pattern\n\nExample ideas:\n1. TimezoneAwareFunction: Output includes timezone info based on 'timezone' setting\n2. VerboseOutput: Adds debug columns when 'debug_mode' setting is true\n3. NumericPrecision: Uses 'numeric_precision' to determine output type precision\n\nAdd to vgi/examples/ and register in ExampleWorker.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:48.503681-05:00","created_by":"rusty","updated_at":"2026-01-04T13:22:23.779895-05:00","closed_at":"2026-01-04T13:22:23.779895-05:00","close_reason":"Added SettingsAwareFunction example","dependencies":[{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-c2b","type":"blocks","created_at":"2026-01-04T13:06:13.865474-05:00","created_by":"rusty"},{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-ivf","type":"blocks","created_at":"2026-01-04T13:06:13.890269-05:00","created_by":"rusty"},{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-bqb","type":"blocks","created_at":"2026-01-04T13:06:13.912531-05:00","created_by":"rusty"},{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-a99","type":"blocks","created_at":"2026-01-04T13:06:13.936552-05:00","created_by":"rusty"},{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-j4t","type":"blocks","created_at":"2026-01-04T13:06:13.958494-05:00","created_by":"rusty"}]} +{"id":"vgi-python-6o0","title":"Consolidate _OutputComplete classes into shared module","description":"Three nearly identical _OutputComplete classes exist in scalar_function.py:168-197 (_ScalarOutputComplete), table_function.py:136-175 (_OutputComplete), and table_in_out_function.py:356-400 (_OutputComplete). All are frozen dataclasses with batch field, log_message field, and from_process_result() classmethod. Extract to shared module (e.g., vgi/protocol_types.py) with a single parameterized class.","status":"open","priority":2,"issue_type":"task","created_at":"2026-01-04T20:06:40.893139-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:15.806567-05:00"} +{"id":"vgi-python-79e","title":"Unify ProtocolInput classes with shared base","description":"ProtocolInput classes in scalar_function.py:151-166 and table_in_out_function.py:109-142 have similar structure with batch and metadata fields. The table_in_out version adds is_finalize logic. Create shared base ProtocolInput in protocol_types.py with table_in_out extending it.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:41.31917-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:16.240397-05:00"} {"id":"vgi-python-a99","title":"Add settings accessor to function base classes","description":"Add a property to access DuckDB settings values in function implementations.\n\nChanges needed:\n- Add 'settings: dict[str, str]' property to Function base class\n- Property should return self.invocation.duckdb_settings or empty dict\n- Add convenience method like 'get_setting(name, default=None)'\n- Update ScalarFunction, TableFunctionGenerator, TableInOutFunction\n\nExample usage in function:\ndef compute(self, batch):\n tz = self.get_setting('timezone', 'UTC')\n # or\n tz = self.settings.get('timezone', 'UTC')","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:48.221602-05:00","created_by":"rusty","updated_at":"2026-01-04T13:20:41.171991-05:00","closed_at":"2026-01-04T13:20:41.171991-05:00","close_reason":"Implementation complete, all tests pass","dependencies":[{"issue_id":"vgi-python-a99","depends_on_id":"vgi-python-aad","type":"blocks","created_at":"2026-01-04T13:06:13.738212-05:00","created_by":"rusty"}]} {"id":"vgi-python-aad","title":"Design: DuckDB settings/pragmas access for VGI functions","description":"Design how VGI functions can declare required DuckDB settings/pragmas in their Meta class, and how these settings values should be passed during the bind phase.\n\nKey design decisions:\n1. How to declare required settings in function Meta (e.g., required_settings = ['timezone', 'threads'])\n2. How to add settings to Invocation dataclass\n3. How settings values should be accessed in function code\n4. Serialization format for settings in Arrow IPC\n\nRecommendation: Add 'duckdb_settings: dict[str, str] | None' to Invocation and 'required_settings: list[str]' to Meta class.","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-04T13:05:47.619105-05:00","created_by":"rusty","updated_at":"2026-01-04T13:11:13.197139-05:00","closed_at":"2026-01-04T13:11:13.197139-05:00","close_reason":"Design document created at docs/design-duckdb-settings.md"} +{"id":"vgi-python-bi8","title":"Extract common _process_with_exception_handling into mixin","description":"The _process_with_exception_handling and _process_and_validate methods are duplicated across scalar_function.py:296-346, table_function.py:386-438, and table_in_out_function.py:586-642. All follow same pattern: try _process_and_validate, catch exceptions, return OutputComplete with error message. Extract to ProcessingMixin that all function types inherit from.","status":"open","priority":2,"issue_type":"task","created_at":"2026-01-04T20:06:41.02111-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:15.947758-05:00","dependencies":[{"issue_id":"vgi-python-bi8","depends_on_id":"vgi-python-6o0","type":"blocks","created_at":"2026-01-04T20:07:49.181408-05:00","created_by":"rusty"}]} +{"id":"vgi-python-bku","title":"Change cardinality() method to property for consistency with output_schema","description":"Inconsistent access patterns: output_schema is a property but cardinality() is a method. Both return immutable data. Change cardinality() to a property for API consistency. Located in table_function.py:304-314.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:53.211782-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:37.463727-05:00"} {"id":"vgi-python-bqb","title":"Update worker to handle DuckDB settings during bind","description":"Update vgi/worker.py to process DuckDB settings from Invocation during the bind phase.\n\nChanges needed:\n- Read settings from invocation.duckdb_settings\n- Validate that all required_settings (from Meta) are present in invocation\n- Pass settings to function instance for access\n- Log settings usage for debugging\n\nThe worker should validate settings early in bind to fail fast if required settings are missing.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:48.04037-05:00","created_by":"rusty","updated_at":"2026-01-04T13:20:41.17079-05:00","closed_at":"2026-01-04T13:20:41.17079-05:00","close_reason":"Implementation complete, all tests pass","dependencies":[{"issue_id":"vgi-python-bqb","depends_on_id":"vgi-python-aad","type":"blocks","created_at":"2026-01-04T13:06:13.714281-05:00","created_by":"rusty"}]} {"id":"vgi-python-c2b","title":"Add duckdb_settings field to Invocation class","description":"Update vgi/invocation.py to add a duckdb_settings field to the Invocation dataclass.\n\nChanges needed:\n- Add 'duckdb_settings: dict[str, str] | None = None' field to Invocation\n- Update serialize() to include settings in Arrow IPC batch\n- Update deserialize() to read settings from Arrow IPC batch\n- Handle None case (no settings requested)\n\nSerialization: Use a struct field with string key-value pairs or a map type.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:47.765077-05:00","created_by":"rusty","updated_at":"2026-01-04T13:20:41.167817-05:00","closed_at":"2026-01-04T13:20:41.167817-05:00","close_reason":"Implementation complete, all tests pass","dependencies":[{"issue_id":"vgi-python-c2b","depends_on_id":"vgi-python-aad","type":"blocks","created_at":"2026-01-04T13:06:13.664038-05:00","created_by":"rusty"}]} {"id":"vgi-python-e37","title":"move Invocation from function.py out to own file","description":"The Invocation clas is kind of seperate from functions, so it should be in its own file. Move it and all of its other associated classes like InvocationType to its own file","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T09:18:46.605941-05:00","created_by":"rusty","updated_at":"2026-01-04T09:24:37.922675-05:00","closed_at":"2026-01-04T09:24:37.922675-05:00","close_reason":"Closed"} +{"id":"vgi-python-e9q","title":"Unify ProtocolOutput classes with shared base","description":"ProtocolOutput classes in table_function.py:177-224 and table_in_out_function.py:144-207 share similar metadata() method and from_process_result() classmethod. The table_in_out version adds status field. Create shared base with table_in_out extending it for status support.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:41.45014-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:16.371419-05:00"} {"id":"vgi-python-ivf","title":"Add required_settings to function Meta class","description":"Update function metadata to support declaring required DuckDB settings.\n\nChanges needed:\n- Add 'required_settings: list[str]' to FunctionMeta in vgi/metadata.py\n- Update Meta class resolution in vgi/function.py\n- Add validation that required_settings is a list of strings\n- Make it available via get_metadata() for introspection\n\nExample usage:\nclass MyFunction(TableInOutFunction):\n class Meta:\n required_settings = ['timezone', 'threads']","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:47.903747-05:00","created_by":"rusty","updated_at":"2026-01-04T13:20:41.169516-05:00","closed_at":"2026-01-04T13:20:41.169516-05:00","close_reason":"Implementation complete, all tests pass","dependencies":[{"issue_id":"vgi-python-ivf","depends_on_id":"vgi-python-aad","type":"blocks","created_at":"2026-01-04T13:06:13.690253-05:00","created_by":"rusty"}]} {"id":"vgi-python-j4t","title":"Update client to pass DuckDB settings in Invocation","description":"Update vgi/client/client.py to support passing DuckDB settings.\n\nChanges needed:\n- Add 'duckdb_settings: dict[str, str] | None = None' parameter to relevant methods\n- Include settings in Invocation creation\n- Add helper to query function's required_settings from metadata\n\nThe client needs to know what settings to pass. Options:\n1. Client queries worker for function metadata first\n2. Settings passed explicitly by caller\n3. Client introspects function class if available locally","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:48.358656-05:00","created_by":"rusty","updated_at":"2026-01-04T13:20:41.173178-05:00","closed_at":"2026-01-04T13:20:41.173178-05:00","close_reason":"Implementation complete, all tests pass","dependencies":[{"issue_id":"vgi-python-j4t","depends_on_id":"vgi-python-aad","type":"blocks","created_at":"2026-01-04T13:06:13.761572-05:00","created_by":"rusty"}]} +{"id":"vgi-python-kz4","title":"Rename TableInOutGeneratorFunction to TableInOutGenerator for consistency","description":"Naming inconsistency: TableFunctionGenerator uses *Generator suffix, but TableInOutGeneratorFunction uses *GeneratorFunction suffix. Rename TableInOutGeneratorFunction to TableInOutGenerator for consistency. Also consider renaming ScalarFunctionGenerator if needed.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:41.581028-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:16.506499-05:00"} {"id":"vgi-python-odi","title":"Change max_processes from method to property in Function hierarchy","description":"Refactor max_processes from a method to a property across the Function class hierarchy (Function, ScalarFunction, TableFunctionGenerator, TableInOutFunction, etc.). This makes the API more consistent since max_processes is effectively a constant per function class and properties are more idiomatic for such values.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T11:25:29.750648-05:00","created_by":"rusty","updated_at":"2026-01-04T11:50:57.566545-05:00","closed_at":"2026-01-04T11:50:57.566545-05:00","close_reason":"Closed"} {"id":"vgi-python-p91","title":"Move exception classes from function.py to own file","description":"Move InitIdentifierError and SchemaValidationError from vgi/function.py to a new vgi/exceptions.py file. Update imports in function.py and any other files that reference these exceptions.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T09:12:28.058227-05:00","created_by":"rusty","updated_at":"2026-01-04T09:17:52.477661-05:00","closed_at":"2026-01-04T09:17:52.477661-05:00","close_reason":"Closed"} +{"id":"vgi-python-r3t","title":"Consolidate test client infrastructure in testing.py","description":"testing.py has three test client classes (FunctionTestClient, TableFunctionTestClient, ScalarFunctionTestClient) with shared infrastructure patterns. Extend _BaseTestClient pattern to reduce code duplication. Consider using a single unified client with method dispatch based on function type.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:53.913912-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:38.132591-05:00"} {"id":"vgi-python-vzg","title":"Update documentation for DuckDB settings feature","description":"Update documentation to cover DuckDB settings feature.\n\nFiles to update:\n1. docs/protocol.md - Add settings to protocol flow diagrams and Invocation fields\n2. docs/metadata.md - Document required_settings in Meta class\n3. CLAUDE.md - Add example showing settings usage pattern\n4. docs/lifecycle.md - Mention settings availability during bind\n\nInclude:\n- When settings are available (bind phase and later)\n- How to declare required settings\n- How to access settings in function code\n- Example patterns for settings-dependent output schemas","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-04T13:05:48.795757-05:00","created_by":"rusty","updated_at":"2026-01-04T13:28:21.005166-05:00","closed_at":"2026-01-04T13:28:21.005166-05:00","close_reason":"Documentation updated in protocol.md and CLAUDE.md","dependencies":[{"issue_id":"vgi-python-vzg","depends_on_id":"vgi-python-67w","type":"blocks","created_at":"2026-01-04T13:06:14.089469-05:00","created_by":"rusty"}]} {"id":"vgi-python-yzj","title":"Write end-to-end tests for DuckDB settings feature","description":"Create comprehensive tests for the DuckDB settings feature.\n\nTest scenarios:\n1. Function with required_settings receives settings correctly\n2. Function with no required_settings works without settings\n3. Missing required setting fails with clear error\n4. Setting value correctly affects output schema (bind phase)\n5. Setting value correctly affects function behavior (processing phase)\n6. Settings serialization/deserialization roundtrip\n7. Parallel workers all receive same settings\n\nAdd tests to tests/ directory covering:\n- Unit tests for Invocation serialization with settings\n- Unit tests for Meta required_settings\n- Integration tests using ExampleWorker with settings-dependent function","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:48.650173-05:00","created_by":"rusty","updated_at":"2026-01-04T13:26:24.713648-05:00","closed_at":"2026-01-04T13:26:24.713648-05:00","close_reason":"End-to-end tests written and passing","dependencies":[{"issue_id":"vgi-python-yzj","depends_on_id":"vgi-python-67w","type":"blocks","created_at":"2026-01-04T13:06:14.064084-05:00","created_by":"rusty"}]} {"id":"vgi-python-zf7","title":"Unify storage protocols into single FunctionStorage interface","description":"## Problem\n\nCurrently there are two separate storage protocols (InitStorage and WorkerStateStorage) with inconsistent naming:\n- `create` vs `store` (different verbs for similar operations)\n- `collect_and_delete` is verbose and describes implementation\n\nUsers wanting a custom storage backend (Redis, DynamoDB, etc.) must implement two separate classes.\n\n## Solution\n\nUnify into a single `FunctionStorage` protocol with consistent naming using prefixes to group related methods:\n\n```python\nclass FunctionStorage(Protocol):\n \"\"\"Storage protocol for VGI distributed function execution.\n \n Three access patterns:\n - Global state: Init data shared across all workers (key-value with auto-generated keys)\n - Worker state: Partial results per worker (collected during finalization)\n - Work queue: Atomic work distribution across workers (FIFO queue)\n \"\"\"\n \n # Global state (init data)\n def global_put(self, value: bytes) -\u003e bytes: ... # Returns auto-generated key\n def global_get(self, key: bytes) -\u003e bytes: ...\n def global_delete(self, key: bytes) -\u003e None: ...\n def global_exists(self, key: bytes) -\u003e bool: ...\n \n # Worker state (partial results per worker)\n def worker_put(self, invocation_id: bytes, worker_id: int, state: bytes) -\u003e None: ...\n def worker_collect(self, invocation_id: bytes) -\u003e list[bytes]: ... # Atomic collect+delete\n \n # Work queue (distributed work items)\n def queue_push(self, invocation_id: bytes, items: list[bytes]) -\u003e int: ...\n def queue_pop(self, invocation_id: bytes) -\u003e bytes | None: ... # Atomic claim\n def queue_clear(self, invocation_id: bytes) -\u003e int: ...\n```\n\n## Design Rationale\n\n- **Prefixes** (`global_`, `worker_`, `queue_`): Clear grouping, good autocomplete\n- **Consistent verbs**: `put/get` for storage, `push/pop` for queue\n- **Minimal interface**: 9 methods total (down from 9, but now unified)\n- **Single class variable** in Function: `storage: ClassVar[FunctionStorage]`\n\n## Files to Change\n\n- `vgi/function_storage.py`: New protocol + merged FunctionStorageSqlite\n- `vgi/function.py`: Single `storage` class variable, update all method calls","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T12:34:25.966005-05:00","created_by":"rusty","updated_at":"2026-01-04T12:58:16.913278-05:00","closed_at":"2026-01-04T12:58:16.913278-05:00","close_reason":"Closed"} From 4723e2bb695e3edbf185aed6612514ebb147f660 Mon Sep 17 00:00:00 2001 From: Rusty Conover Date: Sun, 4 Jan 2026 20:08:16 -0500 Subject: [PATCH 2/5] beads: add VGI improvement tasks --- .beads/.gitignore | 39 ++++++++++++++++++++++ .beads/README.md | 81 ++++++++++++++++++++++++++++++++++++++++++++++ .beads/config.yaml | 62 +++++++++++++++++++++++++++++++++++ 3 files changed, 182 insertions(+) create mode 100644 .beads/.gitignore create mode 100644 .beads/README.md create mode 100644 .beads/config.yaml diff --git a/.beads/.gitignore b/.beads/.gitignore new file mode 100644 index 0000000..4a7a77d --- /dev/null +++ b/.beads/.gitignore @@ -0,0 +1,39 @@ +# SQLite databases +*.db +*.db?* +*.db-journal +*.db-wal +*.db-shm + +# Daemon runtime files +daemon.lock +daemon.log +daemon.pid +bd.sock +sync-state.json +last-touched + +# Local version tracking (prevents upgrade notification spam after git ops) +.local_version + +# Legacy database files +db.sqlite +bd.db + +# Worktree redirect file (contains relative path to main repo's .beads/) +# Must not be committed as paths would be wrong in other clones +redirect + +# Merge artifacts (temporary files from 3-way merge) +beads.base.jsonl +beads.base.meta.json +beads.left.jsonl +beads.left.meta.json +beads.right.jsonl +beads.right.meta.json + +# NOTE: Do NOT add negation patterns (e.g., !issues.jsonl) here. +# They would override fork protection in .git/info/exclude, allowing +# contributors to accidentally commit upstream issue databases. +# The JSONL files (issues.jsonl, interactions.jsonl) and config files +# are tracked by git by default since no pattern above ignores them. diff --git a/.beads/README.md b/.beads/README.md new file mode 100644 index 0000000..50f281f --- /dev/null +++ b/.beads/README.md @@ -0,0 +1,81 @@ +# Beads - AI-Native Issue Tracking + +Welcome to Beads! This repository uses **Beads** for issue tracking - a modern, AI-native tool designed to live directly in your codebase alongside your code. + +## What is Beads? + +Beads is issue tracking that lives in your repo, making it perfect for AI coding agents and developers who want their issues close to their code. No web UI required - everything works through the CLI and integrates seamlessly with git. + +**Learn more:** [github.com/steveyegge/beads](https://github.com/steveyegge/beads) + +## Quick Start + +### Essential Commands + +```bash +# Create new issues +bd create "Add user authentication" + +# View all issues +bd list + +# View issue details +bd show + +# Update issue status +bd update --status in_progress +bd update --status done + +# Sync with git remote +bd sync +``` + +### Working with Issues + +Issues in Beads are: +- **Git-native**: Stored in `.beads/issues.jsonl` and synced like code +- **AI-friendly**: CLI-first design works perfectly with AI coding agents +- **Branch-aware**: Issues can follow your branch workflow +- **Always in sync**: Auto-syncs with your commits + +## Why Beads? + +✨ **AI-Native Design** +- Built specifically for AI-assisted development workflows +- CLI-first interface works seamlessly with AI coding agents +- No context switching to web UIs + +🚀 **Developer Focused** +- Issues live in your repo, right next to your code +- Works offline, syncs when you push +- Fast, lightweight, and stays out of your way + +🔧 **Git Integration** +- Automatic sync with git commits +- Branch-aware issue tracking +- Intelligent JSONL merge resolution + +## Get Started with Beads + +Try Beads in your own projects: + +```bash +# Install Beads +curl -sSL https://raw.githubusercontent.com/steveyegge/beads/main/scripts/install.sh | bash + +# Initialize in your repo +bd init + +# Create your first issue +bd create "Try out Beads" +``` + +## Learn More + +- **Documentation**: [github.com/steveyegge/beads/docs](https://github.com/steveyegge/beads/tree/main/docs) +- **Quick Start Guide**: Run `bd quickstart` +- **Examples**: [github.com/steveyegge/beads/examples](https://github.com/steveyegge/beads/tree/main/examples) + +--- + +*Beads: Issue tracking that moves at the speed of thought* ⚡ diff --git a/.beads/config.yaml b/.beads/config.yaml new file mode 100644 index 0000000..f242785 --- /dev/null +++ b/.beads/config.yaml @@ -0,0 +1,62 @@ +# Beads Configuration File +# This file configures default behavior for all bd commands in this repository +# All settings can also be set via environment variables (BD_* prefix) +# or overridden with command-line flags + +# Issue prefix for this repository (used by bd init) +# If not set, bd init will auto-detect from directory name +# Example: issue-prefix: "myproject" creates issues like "myproject-1", "myproject-2", etc. +# issue-prefix: "" + +# Use no-db mode: load from JSONL, no SQLite, write back after each command +# When true, bd will use .beads/issues.jsonl as the source of truth +# instead of SQLite database +# no-db: false + +# Disable daemon for RPC communication (forces direct database access) +# no-daemon: false + +# Disable auto-flush of database to JSONL after mutations +# no-auto-flush: false + +# Disable auto-import from JSONL when it's newer than database +# no-auto-import: false + +# Enable JSON output by default +# json: false + +# Default actor for audit trails (overridden by BD_ACTOR or --actor) +# actor: "" + +# Path to database (overridden by BEADS_DB or --db) +# db: "" + +# Auto-start daemon if not running (can also use BEADS_AUTO_START_DAEMON) +# auto-start-daemon: true + +# Debounce interval for auto-flush (can also use BEADS_FLUSH_DEBOUNCE) +# flush-debounce: "5s" + +# Git branch for beads commits (bd sync will commit to this branch) +# IMPORTANT: Set this for team projects so all clones use the same sync branch. +# This setting persists across clones (unlike database config which is gitignored). +# Can also use BEADS_SYNC_BRANCH env var for local override. +# If not set, bd sync will require you to run 'bd config set sync.branch '. +# sync-branch: "beads-sync" + +# Multi-repo configuration (experimental - bd-307) +# Allows hydrating from multiple repositories and routing writes to the correct JSONL +# repos: +# primary: "." # Primary repo (where this database lives) +# additional: # Additional repos to hydrate from (read-only) +# - ~/beads-planning # Personal planning repo +# - ~/work-planning # Work planning repo + +# Integration settings (access with 'bd config get/set') +# These are stored in the database, not in this file: +# - jira.url +# - jira.project +# - linear.url +# - linear.api-key +# - github.org +# - github.repo From 8682e0faa2626cd478aa04f32d573e4bfa3ea59f Mon Sep 17 00:00:00 2001 From: Rusty Conover Date: Sun, 4 Jan 2026 21:04:34 -0500 Subject: [PATCH 3/5] typed methods for state --- vgi/function.py | 99 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 95 insertions(+), 4 deletions(-) diff --git a/vgi/function.py b/vgi/function.py index ea81d5c..e174eef 100644 --- a/vgi/function.py +++ b/vgi/function.py @@ -26,6 +26,7 @@ import os import uuid from abc import ABC, abstractmethod +from collections.abc import Sequence from dataclasses import dataclass from functools import cached_property from typing import ( @@ -407,11 +408,13 @@ def finalize(self) -> OutputGenerator: return [state_class.deserialize(data) for data in state_bytes_list] def enqueue_work(self, work_items: list[bytes]) -> int: - """Add work items to the queue for this invocation. + """Add work items to the queue for this invocation (low-level bytes API). Call this during initialization (initialize_global_state or setup) to populate the work queue that workers will pull from during process(). + For a typed alternative, see enqueue_work_items(). + Args: work_items: List of opaque bytes representing work items. The function is responsible for serializing/deserializing @@ -421,7 +424,7 @@ def enqueue_work(self, work_items: list[bytes]) -> int: Number of items enqueued. Raises: - ValueError: If execution_identifier has not been set. + ExecutionIdentifierError: If execution_identifier has not been set. Example: def initialize_global_state(self, init_input: pa.RecordBatch) -> InitResult: @@ -439,8 +442,45 @@ def initialize_global_state(self, init_input: pa.RecordBatch) -> InitResult: ) return self.storage.queue_push(self.execution_identifier, work_items) + def enqueue_work_items(self, work_items: Sequence[Serializable]) -> int: + """Add typed work items to the queue for this invocation. + + This is a typed convenience method that handles serialization automatically. + Work items must implement the Serializable protocol. + + Args: + work_items: List of Serializable objects to enqueue. + + Returns: + Number of items enqueued. + + Raises: + ExecutionIdentifierError: If execution_identifier has not been set. + + Example: + @dataclass + class FileRange: + path: str + start: int + end: int + + def serialize(self) -> bytes: + return pickle.dumps((self.path, self.start, self.end)) + + @classmethod + def deserialize(cls, data: bytes) -> Self: + path, start, end = pickle.loads(data) + return cls(path, start, end) + + def setup(self): + ranges = [FileRange("a.csv", 0, 1000), FileRange("b.csv", 0, 500)] + self.enqueue_work_items(ranges) + + """ + return self.enqueue_work([item.serialize() for item in work_items]) + def dequeue_work(self) -> bytes | None: - """Claim and return the next work item from the queue. + """Claim and return the next work item from the queue (low-level bytes API). Each call atomically claims one item from the queue. Returns None when the queue is empty (all work has been claimed). @@ -448,11 +488,13 @@ def dequeue_work(self) -> bytes | None: Multiple workers can safely call this concurrently - each item will be returned to exactly one worker. + For a typed alternative, see dequeue_work_item(). + Returns: Opaque bytes representing a work item, or None if queue is empty. Raises: - ValueError: If execution_identifier has not been set. + ExecutionIdentifierError: If execution_identifier has not been set. Example: def process(self) -> OutputGenerator: @@ -472,6 +514,55 @@ def process(self) -> OutputGenerator: ) return self.storage.queue_pop(self.execution_identifier) + def dequeue_work_item(self, item_class: type[StateT]) -> StateT | None: + """Claim and deserialize the next work item from the queue. + + This is a typed convenience method that handles deserialization automatically. + The item_class must implement the Serializable protocol (have a deserialize + classmethod). + + Each call atomically claims one item from the queue. Returns None + when the queue is empty (all work has been claimed). + + Multiple workers can safely call this concurrently - each item + will be returned to exactly one worker. + + Args: + item_class: The class to use for deserializing the work item. + Must have a deserialize(bytes) classmethod. + + Returns: + Deserialized work item, or None if queue is empty. + + Raises: + ExecutionIdentifierError: If execution_identifier has not been set. + + Example: + @dataclass + class FileRange: + path: str + start: int + end: int + + def serialize(self) -> bytes: + return pickle.dumps((self.path, self.start, self.end)) + + @classmethod + def deserialize(cls, data: bytes) -> Self: + path, start, end = pickle.loads(data) + return cls(path, start, end) + + def process(self) -> OutputGenerator: + while item := self.dequeue_work_item(FileRange): + # item is FileRange, fully typed + yield Output(self.process_range(item.path, item.start, item.end)) + + """ + data = self.dequeue_work() + if data is None: + return None + return item_class.deserialize(data) + @final @cached_property def empty_output_batch(self) -> pa.RecordBatch: From 4ee911df219117804b2d3317a0ab037e9fe0eecc Mon Sep 17 00:00:00 2001 From: Rusty Conover Date: Sun, 4 Jan 2026 21:32:39 -0500 Subject: [PATCH 4/5] refactor: change cardinality() method to property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change cardinality from a method to a property for API consistency with output_schema. Both return immutable data, so having consistent access patterns improves the API. Changes: - vgi/table_function.py: Add @property decorator to cardinality - vgi/examples/table.py: Add @property to all cardinality overrides - vgi/examples/table_in_out.py: Add @property to all cardinality overrides - tests/: Update test overrides and call sites (.cardinality instead of .cardinality()) - docs/generator-api.md: Update example to use @property Closes: vgi-python-bku 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .beads/issues.jsonl | 8 ++++---- docs/generator-api.md | 1 + tests/table/generator/test_constant_table_function.py | 2 +- tests/table/generator/test_sequence_function.py | 2 +- tests/table/test_function.py | 7 ++++--- vgi/examples/table.py | 7 +++++++ vgi/examples/table_in_out.py | 2 ++ vgi/table_function.py | 3 ++- 8 files changed, 22 insertions(+), 10 deletions(-) diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 39317c8..eb951bd 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -1,15 +1,15 @@ {"id":"vgi-python-0hr","title":"Remove redundant InitInputType class attribute","description":"InitInputType class attribute duplicates the generic type parameter: 'class ScalarFunctionGenerator(Function[FunctionInitInput])' already specifies the type, but 'InitInputType = FunctionInitInput' repeats it. Investigate using get_type_hints or __orig_bases__ to infer the type and remove the redundant attribute.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:53.780529-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:37.960914-05:00"} -{"id":"vgi-python-1s5","title":"Move distributed state management to optional mixin","description":"The Function base class in function.py includes ~200 lines for distributed state management (store_state, collect_states, enqueue_work, dequeue_work, work queue storage). Not all functions need this. Extract to DistributedStateMixin that functions can opt into, keeping Function base class simpler for basic use cases.","status":"open","priority":2,"issue_type":"task","created_at":"2026-01-04T20:06:53.606614-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:37.839559-05:00"} +{"id":"vgi-python-1s5","title":"Move distributed state management to optional mixin","description":"The Function base class in function.py includes ~200 lines for distributed state management (store_state, collect_states, enqueue_work, dequeue_work, work queue storage). Not all functions need this. Extract to DistributedStateMixin that functions can opt into, keeping Function base class simpler for basic use cases.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T20:06:53.606614-05:00","created_by":"rusty","updated_at":"2026-01-04T21:22:09.772825-05:00","closed_at":"2026-01-04T21:22:09.772825-05:00","close_reason":"Analysis complete: extraction not recommended. The distributed state methods are tightly coupled with execution_identifier and storage, which are used by core initialization methods. Extraction would require moving initialize_global_state/load_global_state to the mixin, breaking the protocol and requiring multiple inheritance. Current API is already opt-in (just don't call the methods) and well-documented."} {"id":"vgi-python-36f","title":"Split metadata.py Arrow serialization into separate module","description":"metadata.py is 932 lines with two distinct concerns: 1) metadata resolution (enums, dataclasses, parameter extraction, resolve_metadata) and 2) Arrow serialization (schema definitions, to_arrow/from_arrow functions). Split Arrow serialization into metadata_serialization.py or metadata_arrow.py for better separation of concerns.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:53.481364-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:37.718814-05:00"} -{"id":"vgi-python-3fq","title":"Abstract common worker batch processing logic","description":"Worker batch processing methods _process_scalar_batches (377-466), _process_batches (468-550), and _generate_batches (552-593) share significant structure: IPC writer/reader setup, batch counting/logging, main processing loop. Extract common logic to reduce duplication - consider a BatchProcessor helper class or template method pattern.","status":"open","priority":2,"issue_type":"task","created_at":"2026-01-04T20:06:53.350497-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:37.598552-05:00"} +{"id":"vgi-python-3fq","title":"Abstract common worker batch processing logic","description":"Worker batch processing methods _process_scalar_batches (377-466), _process_batches (468-550), and _generate_batches (552-593) share significant structure: IPC writer/reader setup, batch counting/logging, main processing loop. Extract common logic to reduce duplication - consider a BatchProcessor helper class or template method pattern.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T20:06:53.350497-05:00","created_by":"rusty","updated_at":"2026-01-04T21:20:24.509785-05:00","closed_at":"2026-01-04T21:20:24.509785-05:00","close_reason":"Analysis complete: abstraction not warranted. The three methods have sufficiently different logic (input handling, log message loops, protocol types) that abstracting them would add complexity without meaningful benefit. Current code is already readable at ~70-90 lines each."} {"id":"vgi-python-5er","title":"Extract _should_terminate into shared base class","description":"Identical _should_terminate method is copy-pasted in all three function modules. Implementation is always: check if log_message exists and level is EXCEPTION. Move to shared base class (Function or new ProcessingMixin) to eliminate duplication.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:41.190482-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:16.071737-05:00","dependencies":[{"issue_id":"vgi-python-5er","depends_on_id":"vgi-python-6o0","type":"blocks","created_at":"2026-01-04T20:07:49.283865-05:00","created_by":"rusty"}]} {"id":"vgi-python-67w","title":"Create example function using DuckDB settings","description":"Create an example function that demonstrates using DuckDB settings to determine its output.\n\nRequirements:\n- Function declares required_settings in Meta\n- Output schema depends on a setting value (e.g., include extra column based on setting)\n- Clear documentation showing the pattern\n\nExample ideas:\n1. TimezoneAwareFunction: Output includes timezone info based on 'timezone' setting\n2. VerboseOutput: Adds debug columns when 'debug_mode' setting is true\n3. NumericPrecision: Uses 'numeric_precision' to determine output type precision\n\nAdd to vgi/examples/ and register in ExampleWorker.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:48.503681-05:00","created_by":"rusty","updated_at":"2026-01-04T13:22:23.779895-05:00","closed_at":"2026-01-04T13:22:23.779895-05:00","close_reason":"Added SettingsAwareFunction example","dependencies":[{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-c2b","type":"blocks","created_at":"2026-01-04T13:06:13.865474-05:00","created_by":"rusty"},{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-ivf","type":"blocks","created_at":"2026-01-04T13:06:13.890269-05:00","created_by":"rusty"},{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-bqb","type":"blocks","created_at":"2026-01-04T13:06:13.912531-05:00","created_by":"rusty"},{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-a99","type":"blocks","created_at":"2026-01-04T13:06:13.936552-05:00","created_by":"rusty"},{"issue_id":"vgi-python-67w","depends_on_id":"vgi-python-j4t","type":"blocks","created_at":"2026-01-04T13:06:13.958494-05:00","created_by":"rusty"}]} -{"id":"vgi-python-6o0","title":"Consolidate _OutputComplete classes into shared module","description":"Three nearly identical _OutputComplete classes exist in scalar_function.py:168-197 (_ScalarOutputComplete), table_function.py:136-175 (_OutputComplete), and table_in_out_function.py:356-400 (_OutputComplete). All are frozen dataclasses with batch field, log_message field, and from_process_result() classmethod. Extract to shared module (e.g., vgi/protocol_types.py) with a single parameterized class.","status":"open","priority":2,"issue_type":"task","created_at":"2026-01-04T20:06:40.893139-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:15.806567-05:00"} +{"id":"vgi-python-6o0","title":"Consolidate _OutputComplete classes into shared module","description":"Three nearly identical _OutputComplete classes exist in scalar_function.py:168-197 (_ScalarOutputComplete), table_function.py:136-175 (_OutputComplete), and table_in_out_function.py:356-400 (_OutputComplete). All are frozen dataclasses with batch field, log_message field, and from_process_result() classmethod. Extract to shared module (e.g., vgi/protocol_types.py) with a single parameterized class.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T20:06:40.893139-05:00","created_by":"rusty","updated_at":"2026-01-04T21:18:34.529683-05:00","closed_at":"2026-01-04T21:18:34.529683-05:00","close_reason":"PR #5 created: https://github.com/Query-farm/vgi-python/pull/5"} {"id":"vgi-python-79e","title":"Unify ProtocolInput classes with shared base","description":"ProtocolInput classes in scalar_function.py:151-166 and table_in_out_function.py:109-142 have similar structure with batch and metadata fields. The table_in_out version adds is_finalize logic. Create shared base ProtocolInput in protocol_types.py with table_in_out extending it.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:41.31917-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:16.240397-05:00"} {"id":"vgi-python-a99","title":"Add settings accessor to function base classes","description":"Add a property to access DuckDB settings values in function implementations.\n\nChanges needed:\n- Add 'settings: dict[str, str]' property to Function base class\n- Property should return self.invocation.duckdb_settings or empty dict\n- Add convenience method like 'get_setting(name, default=None)'\n- Update ScalarFunction, TableFunctionGenerator, TableInOutFunction\n\nExample usage in function:\ndef compute(self, batch):\n tz = self.get_setting('timezone', 'UTC')\n # or\n tz = self.settings.get('timezone', 'UTC')","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:48.221602-05:00","created_by":"rusty","updated_at":"2026-01-04T13:20:41.171991-05:00","closed_at":"2026-01-04T13:20:41.171991-05:00","close_reason":"Implementation complete, all tests pass","dependencies":[{"issue_id":"vgi-python-a99","depends_on_id":"vgi-python-aad","type":"blocks","created_at":"2026-01-04T13:06:13.738212-05:00","created_by":"rusty"}]} {"id":"vgi-python-aad","title":"Design: DuckDB settings/pragmas access for VGI functions","description":"Design how VGI functions can declare required DuckDB settings/pragmas in their Meta class, and how these settings values should be passed during the bind phase.\n\nKey design decisions:\n1. How to declare required settings in function Meta (e.g., required_settings = ['timezone', 'threads'])\n2. How to add settings to Invocation dataclass\n3. How settings values should be accessed in function code\n4. Serialization format for settings in Arrow IPC\n\nRecommendation: Add 'duckdb_settings: dict[str, str] | None' to Invocation and 'required_settings: list[str]' to Meta class.","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-04T13:05:47.619105-05:00","created_by":"rusty","updated_at":"2026-01-04T13:11:13.197139-05:00","closed_at":"2026-01-04T13:11:13.197139-05:00","close_reason":"Design document created at docs/design-duckdb-settings.md"} {"id":"vgi-python-bi8","title":"Extract common _process_with_exception_handling into mixin","description":"The _process_with_exception_handling and _process_and_validate methods are duplicated across scalar_function.py:296-346, table_function.py:386-438, and table_in_out_function.py:586-642. All follow same pattern: try _process_and_validate, catch exceptions, return OutputComplete with error message. Extract to ProcessingMixin that all function types inherit from.","status":"open","priority":2,"issue_type":"task","created_at":"2026-01-04T20:06:41.02111-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:15.947758-05:00","dependencies":[{"issue_id":"vgi-python-bi8","depends_on_id":"vgi-python-6o0","type":"blocks","created_at":"2026-01-04T20:07:49.181408-05:00","created_by":"rusty"}]} -{"id":"vgi-python-bku","title":"Change cardinality() method to property for consistency with output_schema","description":"Inconsistent access patterns: output_schema is a property but cardinality() is a method. Both return immutable data. Change cardinality() to a property for API consistency. Located in table_function.py:304-314.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:53.211782-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:37.463727-05:00"} +{"id":"vgi-python-bku","title":"Change cardinality() method to property for consistency with output_schema","description":"Inconsistent access patterns: output_schema is a property but cardinality() is a method. Both return immutable data. Change cardinality() to a property for API consistency. Located in table_function.py:304-314.","status":"in_progress","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:53.211782-05:00","created_by":"rusty","updated_at":"2026-01-04T21:22:27.430091-05:00"} {"id":"vgi-python-bqb","title":"Update worker to handle DuckDB settings during bind","description":"Update vgi/worker.py to process DuckDB settings from Invocation during the bind phase.\n\nChanges needed:\n- Read settings from invocation.duckdb_settings\n- Validate that all required_settings (from Meta) are present in invocation\n- Pass settings to function instance for access\n- Log settings usage for debugging\n\nThe worker should validate settings early in bind to fail fast if required settings are missing.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:48.04037-05:00","created_by":"rusty","updated_at":"2026-01-04T13:20:41.17079-05:00","closed_at":"2026-01-04T13:20:41.17079-05:00","close_reason":"Implementation complete, all tests pass","dependencies":[{"issue_id":"vgi-python-bqb","depends_on_id":"vgi-python-aad","type":"blocks","created_at":"2026-01-04T13:06:13.714281-05:00","created_by":"rusty"}]} {"id":"vgi-python-c2b","title":"Add duckdb_settings field to Invocation class","description":"Update vgi/invocation.py to add a duckdb_settings field to the Invocation dataclass.\n\nChanges needed:\n- Add 'duckdb_settings: dict[str, str] | None = None' field to Invocation\n- Update serialize() to include settings in Arrow IPC batch\n- Update deserialize() to read settings from Arrow IPC batch\n- Handle None case (no settings requested)\n\nSerialization: Use a struct field with string key-value pairs or a map type.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:47.765077-05:00","created_by":"rusty","updated_at":"2026-01-04T13:20:41.167817-05:00","closed_at":"2026-01-04T13:20:41.167817-05:00","close_reason":"Implementation complete, all tests pass","dependencies":[{"issue_id":"vgi-python-c2b","depends_on_id":"vgi-python-aad","type":"blocks","created_at":"2026-01-04T13:06:13.664038-05:00","created_by":"rusty"}]} {"id":"vgi-python-e37","title":"move Invocation from function.py out to own file","description":"The Invocation clas is kind of seperate from functions, so it should be in its own file. Move it and all of its other associated classes like InvocationType to its own file","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T09:18:46.605941-05:00","created_by":"rusty","updated_at":"2026-01-04T09:24:37.922675-05:00","closed_at":"2026-01-04T09:24:37.922675-05:00","close_reason":"Closed"} diff --git a/docs/generator-api.md b/docs/generator-api.md index 444e7d8..c329acc 100644 --- a/docs/generator-api.md +++ b/docs/generator-api.md @@ -95,6 +95,7 @@ class MyTableFunction(TableFunctionGenerator): def output_schema(self) -> pa.Schema: return pa.schema([("value", pa.int64())]) + @property def cardinality(self) -> TableCardinality: """Optional: provide row count estimate.""" return TableCardinality(estimate=self.count, max=self.count) diff --git a/tests/table/generator/test_constant_table_function.py b/tests/table/generator/test_constant_table_function.py index 54a31b2..bf216f2 100644 --- a/tests/table/generator/test_constant_table_function.py +++ b/tests/table/generator/test_constant_table_function.py @@ -39,7 +39,7 @@ def test_cardinality(self) -> None: invocation=invocation, logger=structlog.get_logger(), ) - cardinality = func.cardinality() + cardinality = func.cardinality assert cardinality is not None assert cardinality.estimate == 1 assert cardinality.max == 1 diff --git a/tests/table/generator/test_sequence_function.py b/tests/table/generator/test_sequence_function.py index c9288b0..4c54340 100644 --- a/tests/table/generator/test_sequence_function.py +++ b/tests/table/generator/test_sequence_function.py @@ -40,7 +40,7 @@ def test_cardinality(self) -> None: invocation=invocation, logger=structlog.get_logger(), ) - cardinality = func.cardinality() + cardinality = func.cardinality assert cardinality is not None assert cardinality.estimate == 100 assert cardinality.max == 100 diff --git a/tests/table/test_function.py b/tests/table/test_function.py index bfeeeb2..3fae496 100644 --- a/tests/table/test_function.py +++ b/tests/table/test_function.py @@ -196,22 +196,23 @@ def output_schema(self) -> pa.Schema: func = NoCardinalityFunction( invocation=invocation, logger=structlog.get_logger() ) - assert func.cardinality() is None + assert func.cardinality is None def test_custom_cardinality(self) -> None: - """Custom cardinality() should be respected.""" + """Custom cardinality should be respected.""" class CardinalityFunction(TableFunctionGenerator): @property def output_schema(self) -> pa.Schema: return make_schema([pa.field("x", pa.int64())]) + @property def cardinality(self) -> TableCardinality: return TableCardinality(estimate=100, max=1000) invocation = make_invocation() func = CardinalityFunction(invocation=invocation, logger=structlog.get_logger()) - cardinality = func.cardinality() + cardinality = func.cardinality assert cardinality is not None assert cardinality.estimate == 100 assert cardinality.max == 1000 diff --git a/vgi/examples/table.py b/vgi/examples/table.py index dfdfee3..eecdf61 100644 --- a/vgi/examples/table.py +++ b/vgi/examples/table.py @@ -91,6 +91,7 @@ def output_schema(self) -> pa.Schema: """Return output schema with single integer column.""" return pa.schema([pa.field("n", pa.int64())]) + @property def cardinality(self) -> TableCardinality: """Return exact cardinality since we know the count.""" return TableCardinality(estimate=self.count, max=self.count) @@ -160,6 +161,7 @@ def output_schema(self) -> pa.Schema: """Return output schema with single integer column.""" return pa.schema([pa.field("value", pa.int64())]) + @property def cardinality(self) -> TableCardinality: """Return cardinality based on range parameters.""" if self.end <= self.start: @@ -233,6 +235,7 @@ def output_schema(self) -> pa.Schema: """Return output schema with single integer column.""" return pa.schema([pa.field("value", pa.int64())]) + @property def cardinality(self) -> TableCardinality: """Return cardinality of exactly one row.""" return TableCardinality(estimate=1, max=1) @@ -299,6 +302,7 @@ def output_schema(self) -> pa.Schema: ] return pa.schema(fields) + @property def cardinality(self) -> TableCardinality: """Return cardinality estimate.""" return TableCardinality(estimate=self.count, max=self.count) @@ -466,6 +470,7 @@ def output_schema(self) -> pa.Schema: """Return output schema with single integer column.""" return pa.schema([pa.field("value", pa.int64())]) + @property def cardinality(self) -> TableCardinality: """Return cardinality estimate. @@ -583,6 +588,7 @@ def output_schema(self) -> pa.Schema: """Return the projected schema based on init_input.""" return self.apply_projection(self.FULL_SCHEMA) + @property def cardinality(self) -> TableCardinality: """Return exact cardinality since we know the count.""" return TableCardinality(estimate=self.count, max=self.count) @@ -705,6 +711,7 @@ def output_schema(self) -> pa.Schema: return pa.schema(fields) + @property def cardinality(self) -> TableCardinality: """Return exact cardinality since we know the count.""" return TableCardinality(estimate=self.count, max=self.count) diff --git a/vgi/examples/table_in_out.py b/vgi/examples/table_in_out.py index 22e0cbd..2f8624b 100644 --- a/vgi/examples/table_in_out.py +++ b/vgi/examples/table_in_out.py @@ -357,6 +357,7 @@ class Meta: data: TableInput = Arg[TableInput](0, doc="Input table with numeric columns") # type: ignore[assignment] + @property def cardinality(self) -> TableCardinality | None: """Return cardinality estimate of exactly 1 row.""" return TableCardinality(estimate=1, max=1) @@ -676,6 +677,7 @@ def __init__( super().__init__(invocation=invocation, logger=logger) self.sums: dict[str, pa.Scalar[Any]] = {} + @property def cardinality(self) -> TableCardinality | None: """Return cardinality estimate of exactly 1 row.""" return TableCardinality(estimate=1, max=1) diff --git a/vgi/table_function.py b/vgi/table_function.py index 1291cc7..9073439 100644 --- a/vgi/table_function.py +++ b/vgi/table_function.py @@ -301,8 +301,9 @@ def __init__( """ super().__init__(invocation=invocation, logger=logger) + @property def cardinality(self) -> TableCardinality | None: - """Return optional cardinality estimate for the output. + """Optional cardinality estimate for the output. Override to provide row count estimates that help query planners make better decisions about join ordering and memory allocation. From 66eb06367763a6b7a817fa16c478e485fa7042be Mon Sep 17 00:00:00 2001 From: Rusty Conover Date: Sun, 4 Jan 2026 21:33:17 -0500 Subject: [PATCH 5/5] bd sync: 2026-01-04 21:33:17 --- .beads/issues.jsonl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index eb951bd..72ce516 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -9,7 +9,7 @@ {"id":"vgi-python-a99","title":"Add settings accessor to function base classes","description":"Add a property to access DuckDB settings values in function implementations.\n\nChanges needed:\n- Add 'settings: dict[str, str]' property to Function base class\n- Property should return self.invocation.duckdb_settings or empty dict\n- Add convenience method like 'get_setting(name, default=None)'\n- Update ScalarFunction, TableFunctionGenerator, TableInOutFunction\n\nExample usage in function:\ndef compute(self, batch):\n tz = self.get_setting('timezone', 'UTC')\n # or\n tz = self.settings.get('timezone', 'UTC')","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:48.221602-05:00","created_by":"rusty","updated_at":"2026-01-04T13:20:41.171991-05:00","closed_at":"2026-01-04T13:20:41.171991-05:00","close_reason":"Implementation complete, all tests pass","dependencies":[{"issue_id":"vgi-python-a99","depends_on_id":"vgi-python-aad","type":"blocks","created_at":"2026-01-04T13:06:13.738212-05:00","created_by":"rusty"}]} {"id":"vgi-python-aad","title":"Design: DuckDB settings/pragmas access for VGI functions","description":"Design how VGI functions can declare required DuckDB settings/pragmas in their Meta class, and how these settings values should be passed during the bind phase.\n\nKey design decisions:\n1. How to declare required settings in function Meta (e.g., required_settings = ['timezone', 'threads'])\n2. How to add settings to Invocation dataclass\n3. How settings values should be accessed in function code\n4. Serialization format for settings in Arrow IPC\n\nRecommendation: Add 'duckdb_settings: dict[str, str] | None' to Invocation and 'required_settings: list[str]' to Meta class.","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-04T13:05:47.619105-05:00","created_by":"rusty","updated_at":"2026-01-04T13:11:13.197139-05:00","closed_at":"2026-01-04T13:11:13.197139-05:00","close_reason":"Design document created at docs/design-duckdb-settings.md"} {"id":"vgi-python-bi8","title":"Extract common _process_with_exception_handling into mixin","description":"The _process_with_exception_handling and _process_and_validate methods are duplicated across scalar_function.py:296-346, table_function.py:386-438, and table_in_out_function.py:586-642. All follow same pattern: try _process_and_validate, catch exceptions, return OutputComplete with error message. Extract to ProcessingMixin that all function types inherit from.","status":"open","priority":2,"issue_type":"task","created_at":"2026-01-04T20:06:41.02111-05:00","created_by":"rusty","updated_at":"2026-01-04T20:07:15.947758-05:00","dependencies":[{"issue_id":"vgi-python-bi8","depends_on_id":"vgi-python-6o0","type":"blocks","created_at":"2026-01-04T20:07:49.181408-05:00","created_by":"rusty"}]} -{"id":"vgi-python-bku","title":"Change cardinality() method to property for consistency with output_schema","description":"Inconsistent access patterns: output_schema is a property but cardinality() is a method. Both return immutable data. Change cardinality() to a property for API consistency. Located in table_function.py:304-314.","status":"in_progress","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:53.211782-05:00","created_by":"rusty","updated_at":"2026-01-04T21:22:27.430091-05:00"} +{"id":"vgi-python-bku","title":"Change cardinality() method to property for consistency with output_schema","description":"Inconsistent access patterns: output_schema is a property but cardinality() is a method. Both return immutable data. Change cardinality() to a property for API consistency. Located in table_function.py:304-314.","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-04T20:06:53.211782-05:00","created_by":"rusty","updated_at":"2026-01-04T21:33:10.617152-05:00","closed_at":"2026-01-04T21:33:10.617152-05:00","close_reason":"PR #6 created: https://github.com/Query-farm/vgi-python/pull/6"} {"id":"vgi-python-bqb","title":"Update worker to handle DuckDB settings during bind","description":"Update vgi/worker.py to process DuckDB settings from Invocation during the bind phase.\n\nChanges needed:\n- Read settings from invocation.duckdb_settings\n- Validate that all required_settings (from Meta) are present in invocation\n- Pass settings to function instance for access\n- Log settings usage for debugging\n\nThe worker should validate settings early in bind to fail fast if required settings are missing.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:48.04037-05:00","created_by":"rusty","updated_at":"2026-01-04T13:20:41.17079-05:00","closed_at":"2026-01-04T13:20:41.17079-05:00","close_reason":"Implementation complete, all tests pass","dependencies":[{"issue_id":"vgi-python-bqb","depends_on_id":"vgi-python-aad","type":"blocks","created_at":"2026-01-04T13:06:13.714281-05:00","created_by":"rusty"}]} {"id":"vgi-python-c2b","title":"Add duckdb_settings field to Invocation class","description":"Update vgi/invocation.py to add a duckdb_settings field to the Invocation dataclass.\n\nChanges needed:\n- Add 'duckdb_settings: dict[str, str] | None = None' field to Invocation\n- Update serialize() to include settings in Arrow IPC batch\n- Update deserialize() to read settings from Arrow IPC batch\n- Handle None case (no settings requested)\n\nSerialization: Use a struct field with string key-value pairs or a map type.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T13:05:47.765077-05:00","created_by":"rusty","updated_at":"2026-01-04T13:20:41.167817-05:00","closed_at":"2026-01-04T13:20:41.167817-05:00","close_reason":"Implementation complete, all tests pass","dependencies":[{"issue_id":"vgi-python-c2b","depends_on_id":"vgi-python-aad","type":"blocks","created_at":"2026-01-04T13:06:13.664038-05:00","created_by":"rusty"}]} {"id":"vgi-python-e37","title":"move Invocation from function.py out to own file","description":"The Invocation clas is kind of seperate from functions, so it should be in its own file. Move it and all of its other associated classes like InvocationType to its own file","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-04T09:18:46.605941-05:00","created_by":"rusty","updated_at":"2026-01-04T09:24:37.922675-05:00","closed_at":"2026-01-04T09:24:37.922675-05:00","close_reason":"Closed"}