From 3c925e29f9d3dda1c5ea45b1e38b18899db85dc8 Mon Sep 17 00:00:00 2001 From: Rusty Conover Date: Fri, 2 Jan 2026 14:40:26 -0500 Subject: [PATCH 1/2] feat: start of coverage --- .gitignore | 5 +++++ CLAUDE.md | 6 ++++++ pyproject.toml | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/.gitignore b/.gitignore index 04c48d2..32fa8a0 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,8 @@ wheels/ # Mypy reports mypy-reports/ mypy-html-report/ + +# Coverage artifacts +.coverage +.coverage.* +htmlcov/ diff --git a/CLAUDE.md b/CLAUDE.md index c91287b..e5d687a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -10,6 +10,12 @@ uv run pytest # Run tests uv run ruff check . # Lint uv run ruff format . # Format uv run mypy vgi/ # Type check + +# Run tests with coverage (includes subprocess/worker coverage) +uv run coverage run -m pytest --no-cov +uv run coverage combine # Merge subprocess coverage data +uv run coverage report # Show coverage report +uv run coverage html # Generate HTML report in htmlcov/ ``` **Before committing**, always run lint and format checks: diff --git a/pyproject.toml b/pyproject.toml index ddecbe9..4b1791f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,42 @@ ignore_missing_imports = true addopts = "--mypy --ruff" testpaths = ["tests"] +[tool.coverage.run] +# Enable subprocess coverage - automatically patches subprocess.Popen +# to inject coverage measurement into spawned worker processes +patch = ["subprocess"] +# Each subprocess writes to its own data file (.coverage..) +parallel = true +# Handle SIGTERM gracefully (writes coverage data before exit) +sigterm = true +# Measure branch coverage +branch = true +# What to measure +source = ["vgi"] +# Omit test files and examples from coverage +omit = [ + "vgi/examples/*", + "tests/*", +] + +[tool.coverage.report] +# Fail if coverage drops below this threshold +fail_under = 80 +# Show missing lines in the report +show_missing = true +# Exclude common patterns from coverage +exclude_lines = [ + "pragma: no cover", + "if TYPE_CHECKING:", + "if __name__ == .__main__.:", + "@overload", + "raise NotImplementedError", + "\\.\\.\\.", # Ellipsis in stub files +] + +[tool.coverage.html] +directory = "htmlcov" + [dependency-groups] dev = [ "lxml>=6.0.2", From d4d84ed6db4a93d28b4b90d501f2c05c13aed037 Mon Sep 17 00:00:00 2001 From: Rusty Conover Date: Fri, 2 Jan 2026 14:50:04 -0500 Subject: [PATCH 2/2] feat: add comprehensive test coverage for CLI, Arguments, and Client MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add tests/client/test_cli.py with ~30 tests for CLI module: - OutputWriter tests for parquet, CSV, JSON formats - CLI validation tests (JSON args, table-input-position, attach-id) - Table function and table-in-out function invocation tests - Output format and CLI options tests - Add Arguments null/type validation tests to test_protocol_classes.py - Add Client lifecycle tests to test_client.py (start/stop edge cases) - Refactor cli.py to expose cli command at module level for testability Coverage improved from 87% to 90%: - cli.py: 15% → 91% - arguments.py: 89% → 93% - worker.py: 92% → 93% 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- tests/client/__init__.py | 1 + tests/client/test_cli.py | 582 ++++++++++++++++++++++++++++++ tests/table_in_out/test_client.py | 34 ++ tests/test_protocol_classes.py | 25 ++ vgi/client/cli.py | 13 +- 5 files changed, 653 insertions(+), 2 deletions(-) create mode 100644 tests/client/__init__.py create mode 100644 tests/client/test_cli.py diff --git a/tests/client/__init__.py b/tests/client/__init__.py new file mode 100644 index 0000000..9283666 --- /dev/null +++ b/tests/client/__init__.py @@ -0,0 +1 @@ +"""Client tests package.""" diff --git a/tests/client/test_cli.py b/tests/client/test_cli.py new file mode 100644 index 0000000..f1f9b69 --- /dev/null +++ b/tests/client/test_cli.py @@ -0,0 +1,582 @@ +"""Tests for VGI CLI client.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pyarrow as pa +import pyarrow.parquet as pq +import pytest +from click.testing import CliRunner + +from vgi.client.cli import OutputWriter, cli + + +@pytest.fixture +def sample_batch() -> pa.RecordBatch: + """Create a simple test RecordBatch.""" + return pa.RecordBatch.from_pydict( + { + "id": [1, 2, 3], + "name": ["a", "b", "c"], + } + ) + + +@pytest.fixture +def input_parquet(tmp_path: Path, sample_batch: pa.RecordBatch) -> Path: + """Create a temporary parquet file with sample data.""" + input_file = tmp_path / "input.parquet" + pq.write_table(pa.Table.from_batches([sample_batch]), str(input_file)) + return input_file + + +class TestOutputWriter: + """Tests for OutputWriter class.""" + + def test_write_batch_to_log_when_no_output_file( + self, sample_batch: pa.RecordBatch + ) -> None: + """When output_file is None, batch is logged, not written.""" + writer = OutputWriter(None, "json") + # Should not raise - just logs the batch + writer.write_batch(sample_batch) + writer.close() + + def test_write_batch_parquet_to_file( + self, tmp_path: Path, sample_batch: pa.RecordBatch + ) -> None: + """Write parquet format to file path.""" + output_file = tmp_path / "output.parquet" + writer = OutputWriter(str(output_file), "parquet") + writer.write_batch(sample_batch) + writer.close() + + # Verify file is valid parquet with correct data + table = pq.read_table(str(output_file)) + assert table.num_rows == 3 + assert table.column_names == ["id", "name"] + + def test_write_batch_parquet_multiple_batches( + self, tmp_path: Path, sample_batch: pa.RecordBatch + ) -> None: + """Write multiple parquet batches to same file.""" + output_file = tmp_path / "output.parquet" + writer = OutputWriter(str(output_file), "parquet") + writer.write_batch(sample_batch) + writer.write_batch(sample_batch) + writer.close() + + table = pq.read_table(str(output_file)) + assert table.num_rows == 6 + + def test_write_batch_csv_to_file( + self, tmp_path: Path, sample_batch: pa.RecordBatch + ) -> None: + """Write CSV format to file.""" + output_file = tmp_path / "output.csv" + writer = OutputWriter(str(output_file), "csv") + writer.write_batch(sample_batch) + writer.close() + + content = output_file.read_text() + # Check header is present + assert "id" in content + assert "name" in content + # Check data is present + assert "1" in content + assert "a" in content + + def test_write_batch_csv_header_once( + self, tmp_path: Path, sample_batch: pa.RecordBatch + ) -> None: + """CSV header should only be written on first batch.""" + output_file = tmp_path / "output.csv" + writer = OutputWriter(str(output_file), "csv") + writer.write_batch(sample_batch) + writer.write_batch(sample_batch) + writer.close() + + content = output_file.read_text() + # Header should appear exactly once + lines = content.strip().split("\n") + header_count = sum(1 for line in lines if "id" in line and "name" in line) + assert header_count == 1 + # Should have 6 data rows + 1 header = 7 lines + assert len(lines) == 7 + + def test_write_batch_json_to_file( + self, tmp_path: Path, sample_batch: pa.RecordBatch + ) -> None: + """Write JSON format to file.""" + output_file = tmp_path / "output.jsonl" + writer = OutputWriter(str(output_file), "json") + writer.write_batch(sample_batch) + writer.close() + + lines = output_file.read_text().strip().split("\n") + assert len(lines) == 3 + row1 = json.loads(lines[0]) + assert row1["id"] == 1 + assert row1["name"] == "a" + + def test_write_batch_json_multiple_batches( + self, tmp_path: Path, sample_batch: pa.RecordBatch + ) -> None: + """Write multiple JSON batches to same file.""" + output_file = tmp_path / "output.jsonl" + writer = OutputWriter(str(output_file), "json") + writer.write_batch(sample_batch) + writer.write_batch(sample_batch) + writer.close() + + lines = output_file.read_text().strip().split("\n") + assert len(lines) == 6 + + def test_close_without_writer(self) -> None: + """close() is safe when no writer was created.""" + writer = OutputWriter(None, "json") + writer.close() # Should not raise + + +class TestCLIValidation: + """Tests for CLI argument validation.""" + + def test_invalid_json_args(self, example_worker: str) -> None: + """Invalid JSON in --args should raise error.""" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--function", + "echo", + "--args", + "not valid json", + "--server", + example_worker, + ], + ) + assert result.exit_code != 0 + assert "Invalid JSON" in result.output + + def test_args_not_array(self, example_worker: str) -> None: + """--args must be a JSON array, not object.""" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--function", + "echo", + "--args", + '{"key": "value"}', + "--server", + example_worker, + ], + ) + assert result.exit_code != 0 + assert "must be a JSON array" in result.output + + def test_table_input_position_without_input(self, example_worker: str) -> None: + """--table-input-position requires --input.""" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--function", + "sequence", + "--args", + "[10]", + "--table-input-position", + "1", + "--server", + example_worker, + ], + ) + assert result.exit_code != 0 + assert "requires --input" in result.output + + def test_table_input_position_negative( + self, example_worker: str, input_parquet: Path + ) -> None: + """--table-input-position must be non-negative.""" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--input", + str(input_parquet), + "--function", + "echo", + "--table-input-position", + "-1", + "--server", + example_worker, + ], + ) + assert result.exit_code != 0 + assert "non-negative" in result.output + + def test_table_input_position_out_of_range( + self, example_worker: str, input_parquet: Path + ) -> None: + """--table-input-position out of range for args.""" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--input", + str(input_parquet), + "--function", + "echo", + "--args", + "[1, 2]", + "--table-input-position", + "5", + "--server", + example_worker, + ], + ) + assert result.exit_code != 0 + assert "out of range" in result.output + + def test_invalid_attach_id_hex(self, example_worker: str) -> None: + """--attach-id must be valid hex.""" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--function", + "sequence", + "--args", + "[5]", + "--attach-id", + "not_hex_string", + "--server", + example_worker, + ], + ) + assert result.exit_code != 0 + assert "valid hex string" in result.output + + def test_missing_required_function(self) -> None: + """--function is required.""" + runner = CliRunner() + result = runner.invoke(cli, []) + assert result.exit_code != 0 + + +class TestCLITableFunction: + """Tests for CLI table function invocation (no input).""" + + def test_table_function_invocation(self, example_worker: str) -> None: + """Invoke a table function without input.""" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--function", + "sequence", + "--args", + "[5]", + "--server", + example_worker, + ], + ) + assert result.exit_code == 0 + + def test_table_function_with_output_file( + self, example_worker: str, tmp_path: Path + ) -> None: + """Table function with output to file.""" + output_file = tmp_path / "output.jsonl" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--function", + "sequence", + "--args", + "[5]", + "--output", + str(output_file), + "--format", + "json", + "--server", + example_worker, + ], + ) + assert result.exit_code == 0 + assert output_file.exists() + lines = output_file.read_text().strip().split("\n") + assert len(lines) == 5 + + +class TestCLITableInOutFunction: + """Tests for CLI table-in-out function invocation (with input).""" + + def test_table_in_out_function_invocation( + self, example_worker: str, input_parquet: Path + ) -> None: + """Invoke a table-in-out function with input.""" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--input", + str(input_parquet), + "--function", + "echo", + "--server", + example_worker, + ], + ) + assert result.exit_code == 0 + + def test_table_in_out_with_output_file( + self, example_worker: str, input_parquet: Path, tmp_path: Path + ) -> None: + """Table-in-out function with output to file.""" + output_file = tmp_path / "output.jsonl" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--input", + str(input_parquet), + "--output", + str(output_file), + "--format", + "json", + "--function", + "echo", + "--server", + example_worker, + ], + ) + assert result.exit_code == 0 + assert output_file.exists() + lines = output_file.read_text().strip().split("\n") + assert len(lines) == 3 + + +class TestCLIOutputFormats: + """Tests for CLI output format options.""" + + def test_output_format_json( + self, example_worker: str, input_parquet: Path, tmp_path: Path + ) -> None: + """JSON output format.""" + output_file = tmp_path / "output.json" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--input", + str(input_parquet), + "--output", + str(output_file), + "--format", + "json", + "--function", + "echo", + "--server", + example_worker, + ], + ) + assert result.exit_code == 0 + # Verify it's valid JSON + for line in output_file.read_text().strip().split("\n"): + json.loads(line) + + def test_output_format_csv( + self, example_worker: str, input_parquet: Path, tmp_path: Path + ) -> None: + """CSV output format.""" + output_file = tmp_path / "output.csv" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--input", + str(input_parquet), + "--output", + str(output_file), + "--format", + "csv", + "--function", + "echo", + "--server", + example_worker, + ], + ) + assert result.exit_code == 0 + content = output_file.read_text() + assert "id" in content # Header present + + def test_output_format_parquet( + self, example_worker: str, input_parquet: Path, tmp_path: Path + ) -> None: + """Parquet output format.""" + output_file = tmp_path / "output.parquet" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--input", + str(input_parquet), + "--output", + str(output_file), + "--format", + "parquet", + "--function", + "echo", + "--server", + example_worker, + ], + ) + assert result.exit_code == 0 + # Verify it's valid parquet + table = pq.read_table(str(output_file)) + assert table.num_rows == 3 + + +class TestCLIOptions: + """Tests for various CLI options.""" + + def test_max_workers_option(self, example_worker: str, input_parquet: Path) -> None: + """--max-workers option is passed correctly.""" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--input", + str(input_parquet), + "--function", + "echo", + "--max-workers", + "2", + "--server", + example_worker, + ], + ) + assert result.exit_code == 0 + + def test_worker_stderr_passthrough( + self, example_worker: str, input_parquet: Path + ) -> None: + """--worker-stderr flag works.""" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--input", + str(input_parquet), + "--function", + "echo", + "--worker-stderr", + "--server", + example_worker, + ], + ) + assert result.exit_code == 0 + + def test_valid_attach_id(self, example_worker: str) -> None: + """Valid hex attach-id is accepted.""" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--function", + "sequence", + "--args", + "[5]", + "--attach-id", + "deadbeef", + "--server", + example_worker, + ], + ) + assert result.exit_code == 0 + + def test_projection_ids(self, example_worker: str, input_parquet: Path) -> None: + """--projection-id option works.""" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--input", + str(input_parquet), + "--function", + "echo", + "--projection-id", + "0", + "--projection-id", + "1", + "--server", + example_worker, + ], + ) + assert result.exit_code == 0 + + def test_table_input_position_valid( + self, example_worker: str, input_parquet: Path + ) -> None: + """Valid --table-input-position is accepted.""" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--input", + str(input_parquet), + "--function", + "echo", + "--args", + "[]", + "--table-input-position", + "0", + "--server", + example_worker, + ], + ) + assert result.exit_code == 0 + + +class TestCLIErrorHandling: + """Tests for CLI error handling.""" + + def test_nonexistent_function(self, example_worker: str) -> None: + """Non-existent function returns error.""" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--function", + "nonexistent_function_xyz", + "--server", + example_worker, + ], + ) + assert result.exit_code != 0 + + def test_stdout_output_json(self, example_worker: str) -> None: + """Output to stdout with - works.""" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "--function", + "sequence", + "--args", + "[3]", + "--output", + "-", + "--format", + "json", + "--server", + example_worker, + ], + ) + assert result.exit_code == 0 + # Should have JSON output + assert "n" in result.output diff --git a/tests/table_in_out/test_client.py b/tests/table_in_out/test_client.py index 39896ae..099e800 100644 --- a/tests/table_in_out/test_client.py +++ b/tests/table_in_out/test_client.py @@ -5,9 +5,11 @@ import time import pyarrow as pa +import pytest from tests.utils import make_schema from vgi.client import Client +from vgi.client.client import ClientError class TestClientLifecycle: @@ -20,6 +22,38 @@ def test_context_manager(self, example_worker: str) -> None: # After context exit, process should be cleaned up assert client._proc is None + def test_start_when_already_started_raises(self, example_worker: str) -> None: + """Starting an already-started client should raise ClientError.""" + client = Client(example_worker) + client.start() + try: + with pytest.raises(ClientError, match="already started"): + client.start() + finally: + client.stop() + + def test_stop_when_not_started_raises(self, example_worker: str) -> None: + """Stopping a client that wasn't started should raise ClientError.""" + client = Client(example_worker) + with pytest.raises(ClientError, match="not started"): + client.stop() + + def test_table_in_out_function_not_started_raises( + self, example_worker: str + ) -> None: + """Calling table_in_out_function before start should raise ClientError.""" + client = Client(example_worker) + schema = make_schema([pa.field("id", pa.int64())]) + batch = pa.RecordBatch.from_pydict({"id": [1]}, schema=schema) + + with pytest.raises(ClientError, match="not started"): + list( + client.table_in_out_function( + function_name="echo", + input=iter([batch]), + ) + ) + class TestEdgeCases: """Tests for edge cases and boundary conditions.""" diff --git a/tests/test_protocol_classes.py b/tests/test_protocol_classes.py index eba677f..3693a4a 100644 --- a/tests/test_protocol_classes.py +++ b/tests/test_protocol_classes.py @@ -147,6 +147,31 @@ def test_schema_generation(self) -> None: assert schema.field("positional_1").type == pa.string() assert schema.field("named_flag").type == pa.bool_() + def test_null_positional_without_default_raises(self) -> None: + """Null positional argument without default should raise ValueError.""" + # Create a null scalar explicitly + args = Arguments(positional=(pa.scalar(None, type=pa.int64()),)) + with pytest.raises(ValueError, match="Argument 0: value is null"): + args.get(0) + + def test_null_named_without_default_raises(self) -> None: + """Null named argument without default should raise ValueError.""" + args = Arguments(named={"key": pa.scalar(None, type=pa.string())}) + with pytest.raises(ValueError, match="Argument 'key': value is null"): + args.get("key") + + def test_type_validation_positional_mismatch(self) -> None: + """Type mismatch for positional argument should raise TypeError.""" + args = Arguments(positional=(pa.scalar("string"),)) + with pytest.raises(TypeError, match="Argument 0: expected int64, got"): + args.get(0, type=pa.int64()) + + def test_type_validation_named_mismatch(self) -> None: + """Type mismatch for named argument should raise TypeError.""" + args = Arguments(named={"count": pa.scalar(42)}) + with pytest.raises(TypeError, match="Argument 'count': expected string, got"): + args.get("count", type=pa.string()) + class TestInvocation: """Tests for Invocation serialization and deserialization.""" diff --git a/vgi/client/cli.py b/vgi/client/cli.py index 6125334..cab7199 100644 --- a/vgi/client/cli.py +++ b/vgi/client/cli.py @@ -113,8 +113,8 @@ def close(self) -> None: self._writer.close() -def main() -> None: - """CLI entry point for vgi-client.""" +def _create_cli() -> Any: + """Create the CLI command. Separated for testability.""" import click import pyarrow.parquet as pq @@ -311,6 +311,15 @@ def cli( if output_writer is not None: output_writer.close() + return cli + + +# Module-level command for testing +cli = _create_cli() + + +def main() -> None: + """CLI entry point for vgi-client.""" cli()