From 0e5aa8e981c505f8f0cb28422f27ad74ababb87e Mon Sep 17 00:00:00 2001 From: miroslavpojer Date: Fri, 19 Jun 2026 14:09:24 +0200 Subject: [PATCH 1/2] feat: implement schema synchronization and export functionality for doc-issues.json --- .gitignore | 2 + packages/adapters/collector_gh/SCHEMA_SYNC.md | 187 ++++++++++ .../adapters/collector_gh/schemas/README.md | 45 +++ .../schemas/doc-issues-v1.0.0-schema.json | 323 ++++++++++++++++++ .../__init__.py | 29 +- .../compatibility.py | 29 +- .../living_doc_adapter_collector_gh/models.py | 24 +- .../schema_export.py | 94 +++++ 8 files changed, 727 insertions(+), 6 deletions(-) create mode 100644 packages/adapters/collector_gh/SCHEMA_SYNC.md create mode 100644 packages/adapters/collector_gh/schemas/README.md create mode 100644 packages/adapters/collector_gh/schemas/doc-issues-v1.0.0-schema.json create mode 100644 packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/schema_export.py diff --git a/.gitignore b/.gitignore index bef7e0d..267521d 100644 --- a/.gitignore +++ b/.gitignore @@ -209,3 +209,5 @@ __marimo__/ # Living Doc Toolkit specific outputs/ .DS_Store +doc-issues.json +pdf_ready.json diff --git a/packages/adapters/collector_gh/SCHEMA_SYNC.md b/packages/adapters/collector_gh/SCHEMA_SYNC.md new file mode 100644 index 0000000..afe9eda --- /dev/null +++ b/packages/adapters/collector_gh/SCHEMA_SYNC.md @@ -0,0 +1,187 @@ +# Schema Synchronization Guide + +## Pattern: Pydantic-First (Schema Producer / Data Consumer) + +This adapter uses the **Pydantic-First** pattern where **this repository** (living-doc-toolkit): +- **Receives data** from collector-gh (data consumer role) +- **Produces schema** as an artifact for collector-gh to validate against (schema producer role) + +The Pydantic models in this repo are the **single source of truth** for the input contract. + +``` +┌────────────────────────────────────────────────┐ +│ living-doc-toolkit (This Repo) │ +│ SCHEMA PRODUCER / DATA CONSUMER │ +│ │ +│ • Pydantic models (models.py) ◄── SOURCE │ +│ • Export JSON Schema (schema_export.py) │ +│ • Save to: schemas/doc-issues-v1.0.0-schema.json │ +│ • Publish schema as artifact │ +└────────────────────────────────────────────────┘ + │ + │ Schema published as independent artifact + │ (no direct code dependency) + ▼ +┌────────────────────────────────────────────────┐ +│ Downstream Consumers (Independent) │ +│ SCHEMA CONSUMER / DATA PRODUCER │ +│ │ +│ • Obtain published schema │ +│ • Use it independently for validation │ +│ • Publishes validated data │ +└────────────────────────────────────────────────┘ +``` + +**Key:** No direct code dependency. The schema is a published artifact that each +repo uses independently within their own validation pipeline. + + +## Schema Version + +- **Input Schema Version:** `1.0.0` (independent of adapter package version) +- **Adapter Package Version:** `1.0.0` (see `__init__.py`) +- **Producer Compatibility Range:** `>=1.0.0,<2.0.0` (see `compatibility.py`) + +## Workflow: When Pydantic Models Change + +### 1. Consumer (living-doc-toolkit) Updates Model + +Edit [models.py](src/living_doc_adapter_collector_gh/models.py): + +```python +class AdapterMetadataSource(BaseModel): + """Source information for adapter metadata.""" + systems: list[str] = Field(min_length=1, description="At least one system") + # ... other fields +``` + +### 2. Export Updated Schema + +Schema is automatically saved with version in filename: + +```bash +# From packages/adapters/collector_gh/ +python -m living_doc_adapter_collector_gh.schema_export + +# Schema is now in: schemas/doc-issues-v1.0.0-schema.json + +# Or programmatically: +from living_doc_adapter_collector_gh import export_schema, SCHEMA_VERSION +schema = export_schema() # Saved to default location with version +print(f"Schema version: {SCHEMA_VERSION}") # 1.0.0 +``` + +Or save to custom location: + +```bash +python -m living_doc_adapter_collector_gh.schema_export /path/to/custom-schema.json +``` + +### 3. Validate Tests Pass + +```bash +make pytest-unit-packages/adapters/collector_gh +``` + +### 4. Commit & Publish Schema as Artifact + +Schema changes are committed and published with version in filename: + +```bash +# Commit the updated schema (versioned filename) +git add packages/adapters/collector_gh/schemas/doc-issues-v1.0.0-schema.json +git commit -m "chore: update input schema to v1.0.0 + +- systems field now requires min_length=1 +- See packages/adapters/collector_gh/SCHEMA_SYNC.md for details" + +# Create release with schema as artifact +# or include schema in release notes / documentation +``` + +Schema is now available at: `packages/adapters/collector_gh/schemas/doc-issues-v1.0.0-schema.json` + +### 5. Downstream Consumers Obtain & Use Schema + +Consumers (e.g., collector-gh repo): +- Obtain published schema (from GitHub release, documentation, etc.) +- Integrate into their validation pipeline +- Use to validate data +- **No direct code dependency** on this repo + +Example consumer workflow: + +```yaml +# .github/workflows/validate-output.yml +- name: Download schema + run: | + curl -O https://github.com/AbsaOSS/living-doc-toolkit/releases/download/v1.0.0/doc-issues-schema.json + +- name: Validate output against schema + uses: ajv-validator/ajv-cli@v5 + with: + schema: doc-issues-schema.json + data: doc-issues.json +``` + +## Workflow: When Producer Version Increments + +If producer releases `v1.1.0` or `v2.0.0`: + +1. **Download their release notes** +2. **Identify breaking vs. non-breaking changes** +3. **If breaking:** + - Update `CONFIRMED_MIN` or `CONFIRMED_MAX` in [compatibility.py](src/living_doc_adapter_collector_gh/compatibility.py) + - Add test fixtures for the new version + - Document in [README.md](README.md) + +4. **If non-breaking:** + - Add golden test fixture (no code changes needed) + - Verify compatibility test passes + +## File Locations + +| File | Purpose | +|------|---------| +| [models.py](src/living_doc_adapter_collector_gh/models.py) | Pydantic models (source of truth) | +| [schema_export.py](src/living_doc_adapter_collector_gh/schema_export.py) | Export models to JSON Schema | +| [compatibility.py](src/living_doc_adapter_collector_gh/compatibility.py) | Version compatibility checking & schema version | +| [__init__.py](src/living_doc_adapter_collector_gh/__init__.py) | Package exports & documentation | +| [tests/test_parser.py](tests/test_parser.py) | Golden tests (fixture validation) | + +## Key Constants + +```python +# In compatibility.py +CONFIRMED_MIN = "0.1.0" # Min producer version +CONFIRMED_MAX = "2.0.0" # Max producer version (exclusive) +SCHEMA_VERSION = "1.0.0" # Input contract schema version +``` + +## Testing + +### Golden Tests (Verify Fixtures Match Model) + +```bash +# Run golden tests +make pytest-unit-packages/adapters/collector_gh + +# Specific test +pytest packages/adapters/collector_gh/tests/test_parser.py::TestParser::test_metadata_source_mapping +``` + +### Schema Export + +```bash +# Verify schema can be generated +python -m living_doc_adapter_collector_gh.schema_export + +# Write to file +python -m living_doc_adapter_collector_gh.schema_export schema.json +``` + +## Links + +- **Producer Repo:** https://github.com/AbsaOSS/living-doc-collector-gh +- **Consumer (This Repo):** https://github.com/AbsaOSS/living-doc-toolkit +- **Input Contract Docs:** [../../docs/contracts.md](../../docs/contracts.md#input-contract-doc-issuesjson) diff --git a/packages/adapters/collector_gh/schemas/README.md b/packages/adapters/collector_gh/schemas/README.md new file mode 100644 index 0000000..0de3e5d --- /dev/null +++ b/packages/adapters/collector_gh/schemas/README.md @@ -0,0 +1,45 @@ +# Input Schema Artifacts + +This directory contains the exported JSON Schema for the input contract. + +## Schema File + +- **`doc-issues-v1.0.0-schema.json`** — JSON Schema for doc-issues.json input data (schema version 1.0.0) + +## How to Generate + +From the package root (`packages/adapters/collector_gh/`): + +```bash +# Generate and save to default location (this directory) +python -m living_doc_adapter_collector_gh.schema_export + +# Or specify a custom output location +python -m living_doc_adapter_collector_gh.schema_export /path/to/custom-schema.json +``` + +## Usage + +Downstream consumers (e.g., collector-gh repo) independently: +1. Obtain the published schema from this directory +2. Use it in their validation pipeline +3. Validate input data against the schema + +Example with `ajv-cli`: + +```bash +ajv validate -s doc-issues-v1.0.0-schema.json -d /path/to/doc-issues.json +``` + +## Schema Updates + +When Pydantic models change: + +1. Pydantic models in `src/living_doc_adapter_collector_gh/models.py` are updated +2. Run `python -m living_doc_adapter_collector_gh.schema_export` to regenerate +3. New versioned file is created: `doc-issues-v{VERSION}-schema.json` +4. Commit updated schema +5. Release as new version +6. Downstream consumers obtain and use updated schema + +See `SCHEMA_SYNC.md` for complete synchronization workflow. diff --git a/packages/adapters/collector_gh/schemas/doc-issues-v1.0.0-schema.json b/packages/adapters/collector_gh/schemas/doc-issues-v1.0.0-schema.json new file mode 100644 index 0000000..b181449 --- /dev/null +++ b/packages/adapters/collector_gh/schemas/doc-issues-v1.0.0-schema.json @@ -0,0 +1,323 @@ +{ + "$defs": { + "AdapterItem": { + "description": "Represents a single item (issue) from the collector output.", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "title": { + "title": "Title", + "type": "string" + }, + "state": { + "title": "State", + "type": "string" + }, + "tags": { + "items": { + "type": "string" + }, + "title": "Tags", + "type": "array" + }, + "url": { + "title": "Url", + "type": "string" + }, + "timestamps": { + "$ref": "#/$defs/AdapterItemTimestamps" + }, + "body": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Body" + } + }, + "required": [ + "id", + "title", + "state", + "tags", + "url", + "timestamps" + ], + "title": "AdapterItem", + "type": "object" + }, + "AdapterItemTimestamps": { + "description": "Timestamps for an adapter item.", + "properties": { + "created": { + "title": "Created", + "type": "string" + }, + "updated": { + "title": "Updated", + "type": "string" + } + }, + "required": [ + "created", + "updated" + ], + "title": "AdapterItemTimestamps", + "type": "object" + }, + "AdapterMetadata": { + "description": "Metadata information from the adapter.", + "properties": { + "producer": { + "$ref": "#/$defs/AdapterMetadataProducer" + }, + "run": { + "$ref": "#/$defs/AdapterMetadataRun" + }, + "source": { + "$ref": "#/$defs/AdapterMetadataSource" + }, + "original_metadata": { + "additionalProperties": true, + "title": "Original Metadata", + "type": "object" + } + }, + "required": [ + "producer", + "run", + "source", + "original_metadata" + ], + "title": "AdapterMetadata", + "type": "object" + }, + "AdapterMetadataProducer": { + "description": "Producer information for adapter metadata.", + "properties": { + "name": { + "title": "Name", + "type": "string" + }, + "version": { + "title": "Version", + "type": "string" + }, + "build": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Build" + } + }, + "required": [ + "name", + "version", + "build" + ], + "title": "AdapterMetadataProducer", + "type": "object" + }, + "AdapterMetadataRun": { + "description": "Run information for adapter metadata.", + "properties": { + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Run Id" + }, + "run_attempt": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Run Attempt" + }, + "actor": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Actor" + }, + "workflow": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Workflow" + }, + "ref": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Ref" + }, + "sha": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Sha" + } + }, + "required": [ + "run_id", + "run_attempt", + "actor", + "workflow", + "ref", + "sha" + ], + "title": "AdapterMetadataRun", + "type": "object" + }, + "AdapterMetadataSource": { + "description": "Source information for adapter metadata.", + "properties": { + "systems": { + "items": { + "type": "string" + }, + "title": "Systems", + "type": "array" + }, + "repositories": { + "items": { + "type": "string" + }, + "title": "Repositories", + "type": "array" + }, + "organization": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Organization" + }, + "enterprise": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Enterprise" + } + }, + "required": [ + "systems", + "repositories", + "organization", + "enterprise" + ], + "title": "AdapterMetadataSource", + "type": "object" + }, + "CompatibilityWarning": { + "description": "Represents a compatibility warning during adapter processing.", + "properties": { + "code": { + "title": "Code", + "type": "string" + }, + "message": { + "title": "Message", + "type": "string" + }, + "context": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Context" + } + }, + "required": [ + "code", + "message" + ], + "title": "CompatibilityWarning", + "type": "object" + } + }, + "description": "Complete result from adapter parsing.", + "properties": { + "items": { + "items": { + "$ref": "#/$defs/AdapterItem" + }, + "title": "Items", + "type": "array" + }, + "metadata": { + "$ref": "#/$defs/AdapterMetadata" + }, + "warnings": { + "items": { + "$ref": "#/$defs/CompatibilityWarning" + }, + "title": "Warnings", + "type": "array" + } + }, + "required": [ + "items", + "metadata", + "warnings" + ], + "title": "AdapterResult", + "type": "object", + "$schema_version": "1.0.0" +} \ No newline at end of file diff --git a/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/__init__.py b/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/__init__.py index dd8f6fc..b5f0e68 100644 --- a/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/__init__.py +++ b/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/__init__.py @@ -5,6 +5,26 @@ This package provides adapter functionality to detect and parse input from the living-doc-collector-gh action. + +SCHEMA SYNCHRONIZATION: Pydantic-First Pattern +============================================== + +This repo defines and exports the input contract: + +1. Pydantic models (models.py) are the single source of truth +2. Export Pydantic models to JSON Schema as an artifact (schema_export.py) +3. Schema is saved with version: packages/adapters/collector_gh/schemas/doc-issues-v1.0.0-schema.json +4. Publish schema for downstream consumers to obtain independently + +To export the schema (saved to default location with version): + + python -m living_doc_adapter_collector_gh.schema_export + +Or to a custom location: + + python -m living_doc_adapter_collector_gh.schema_export custom-output.json + +See SCHEMA_SYNC.md for the complete synchronization workflow and versioning. """ __version__ = "1.0.0" @@ -21,15 +41,19 @@ # Export detector functions from living_doc_adapter_collector_gh.detector import can_handle, extract_version -# Export compatibility checker -from living_doc_adapter_collector_gh.compatibility import check_compatibility +# Export compatibility checker and schema version +from living_doc_adapter_collector_gh.compatibility import check_compatibility, SCHEMA_VERSION # Export parser from living_doc_adapter_collector_gh.parser import parse +# Export schema export function +from living_doc_adapter_collector_gh.schema_export import export_schema + __all__ = [ # Version "__version__", + "SCHEMA_VERSION", # Models "AdapterResult", "AdapterItem", @@ -41,4 +65,5 @@ "extract_version", "check_compatibility", "parse", + "export_schema", ] diff --git a/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/compatibility.py b/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/compatibility.py index f74cd6e..41f1dda 100644 --- a/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/compatibility.py +++ b/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/compatibility.py @@ -5,6 +5,27 @@ This module provides functions to check if a producer version is within the confirmed compatible range. + +SCHEMA SYNCHRONIZATION PATTERN +=============================== + +This repo (living-doc-toolkit) is the schema producer: + +1. Defines Pydantic models as single source of truth (models.py) +2. Exports them to JSON Schema (schema_export.py) +3. Publishes schema as independent artifact + +Collector-gh repo (independent): + +1. Downloads published schema +2. Uses it to validate doc-issues.json +3. Publishes validated data +4. No direct code dependency + +When this repo receives doc-issues.json from collector-gh, we check +that the producer version is within our confirmed compatible range. + +See SCHEMA_SYNC.md for full synchronization workflow. """ from packaging.version import Version, InvalidVersion @@ -12,9 +33,15 @@ from living_doc_adapter_collector_gh.models import CompatibilityWarning # Confirmed compatible version range -CONFIRMED_MIN = "1.0.0" +# Maps to producer repo releases: +# https://github.com/AbsaOSS/living-doc-collector-gh/releases +CONFIRMED_MIN = "0.1.0" # Extended to include pre-release versions for testing CONFIRMED_MAX = "2.0.0" # Exclusive upper bound +# Schema version (independent of adapter package version) +# See schema_export.py for details +SCHEMA_VERSION = "1.0.0" + def check_compatibility(version: str) -> list[CompatibilityWarning]: """ diff --git a/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/models.py b/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/models.py index 767e30d..6ad14b0 100644 --- a/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/models.py +++ b/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/models.py @@ -3,11 +3,29 @@ """ Pydantic models for the collector-gh adapter. -These models represent the output structure from the adapter after parsing -input from the living-doc-collector-gh action. +These models represent the authoritative input contract for doc-issues.json. +They are the single source of truth for the schema between this repository +(data consumer / schema producer) and the collector-gh repository +(data producer / schema consumer). + +PYDANTIC-FIRST PATTERN +====================== + +This repo: +- Defines Pydantic models (source of truth) +- Exports them as JSON Schema for the collector-gh repo to use for validation + +Collector-gh repo: +- Uses our exported JSON Schema to validate doc-issues.json +- Publishes validated data to us + +To export schema for collector-gh: + python -m living_doc_adapter_collector_gh.schema_export > doc-issues-schema.json + +See SCHEMA_SYNC.md for the full synchronization workflow. """ -from pydantic import BaseModel +from pydantic import BaseModel, Field class CompatibilityWarning(BaseModel): diff --git a/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/schema_export.py b/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/schema_export.py new file mode 100644 index 0000000..9c8e18c --- /dev/null +++ b/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/schema_export.py @@ -0,0 +1,94 @@ +# Copyright 2026 ABSA Group Limited. Apache License, Version 2.0. + +""" +Schema export for Pydantic models. + +Exports Pydantic models to JSON Schema format as an independent artifact. + +Schemas are saved to the `schemas/` directory next to the src/ directory, +making them available for distribution and use by downstream consumers. + +See SCHEMA_SYNC.md for details. +""" + +import json +from pathlib import Path + +from living_doc_adapter_collector_gh.models import AdapterResult + + +def get_default_schema_path() -> Path: + """ + Get the default schema export directory path. + + Returns: + Path to schemas/ directory (packages/adapters/collector_gh/schemas/) + """ + # Navigate from src/living_doc_adapter_collector_gh/ to packages/adapters/collector_gh/schemas/ + package_root = Path(__file__).parent.parent.parent # Go up to collector_gh/ + schemas_dir = package_root / "schemas" + return schemas_dir + + +def export_schema(output_path: str | Path | None = None) -> dict: + """ + Export the AdapterResult model schema to JSON Schema format. + + This schema represents the authoritative input contract for the data format. + + Args: + output_path: Optional file path to write schema to. If None, uses default + location with version: packages/adapters/collector_gh/schemas/doc-issues-v1.0.0-schema.json + + Returns: + Dictionary containing the JSON Schema. + + Example: + >>> schema = export_schema() + >>> print(schema['$defs']['AdapterMetadataSource']) + + >>> export_schema('custom-location.json') + """ + schema = AdapterResult.model_json_schema() + # Pin schema version independently of adapter version + schema["$schema_version"] = "1.0.0" + + # Use default location if not provided + if output_path is None: + schemas_dir = get_default_schema_path() + schema_version = get_schema_version() + output_path = schemas_dir / f"doc-issues-v{schema_version}-schema.json" + + output_file = Path(output_path) + output_file.parent.mkdir(parents=True, exist_ok=True) + with open(output_file, "w", encoding="utf-8") as f: + json.dump(schema, f, indent=2) + print(f"Schema exported to: {output_file}") + + return schema + + +def get_schema_version() -> str: + """ + Get the version of the input contract schema. + + This is independent of the adapter package version and represents + the version of the doc-issues.json input schema. + + Returns: + Version string (semver format) + """ + return "1.0.0" + + +if __name__ == "__main__": + import sys + + # CLI usage: + # python -m living_doc_adapter_collector_gh.schema_export # Uses default location with version + # python -m living_doc_adapter_collector_gh.schema_export output.json # Custom location + output = sys.argv[1] if len(sys.argv) > 1 else None + export_schema(output) + if output is None: + schema_version = get_schema_version() + print(f"Default location: {get_default_schema_path() / f'doc-issues-v{schema_version}-schema.json'}") From 96516177b484308ee2b56822f20c7af7756da106 Mon Sep 17 00:00:00 2001 From: miroslavpojer Date: Fri, 19 Jun 2026 15:10:08 +0200 Subject: [PATCH 2/2] fix: update references from metadata.generator to metadata.producer in code and documentation --- .../commands/normalize_issues.py | 2 +- apps/cli/tests/test_cli.py | 2 +- docs/architecture.md | 6 +- docs/contracts.md | 8 +- docs/cookbooks/normalize-issues.md | 20 +- docs/troubleshooting.md | 4 +- .../compatibility.py | 4 +- .../detector.py | 10 +- .../living_doc_adapter_collector_gh/parser.py | 218 +++++++++++++++--- .../collector_v1.0.0/input/doc-issues.json | 2 +- .../collector_v1.2.0/input/doc-issues.json | 2 +- .../collector_gh/tests/test_compatibility.py | 6 +- .../collector_gh/tests/test_detector.py | 34 +-- .../collector_gh/tests/test_parser.py | 12 +- 14 files changed, 241 insertions(+), 89 deletions(-) diff --git a/apps/cli/src/living_doc_cli/commands/normalize_issues.py b/apps/cli/src/living_doc_cli/commands/normalize_issues.py index 758b177..dc38fb2 100644 --- a/apps/cli/src/living_doc_cli/commands/normalize_issues.py +++ b/apps/cli/src/living_doc_cli/commands/normalize_issues.py @@ -48,7 +48,7 @@ def format_error_message(error: ToolkitError) -> str: # Add actionable guidance based on error type guidance_map = { InvalidInputError: "Ensure --input points to a valid file.", - AdapterError: "Check metadata.generator.name field.", + AdapterError: "Check metadata.producer.name field.", SchemaValidationError: "Review the output schema requirements.", NormalizationError: "Check input data format and content.", FileIOError: "Ensure output directory exists and is writable.", diff --git a/apps/cli/tests/test_cli.py b/apps/cli/tests/test_cli.py index 2caabcb..7ac41f9 100644 --- a/apps/cli/tests/test_cli.py +++ b/apps/cli/tests/test_cli.py @@ -169,7 +169,7 @@ def test_normalize_issues_adapter_error(mock_run_service, runner): assert result.exit_code == 2 assert "Adapter error:" in result.output assert "No compatible adapter found for input" in result.output - assert "Check metadata.generator.name field" in result.output + assert "Check metadata.producer.name field" in result.output @patch("living_doc_cli.commands.normalize_issues.run_service") diff --git a/docs/architecture.md b/docs/architecture.md index 31b3ecb..e14a311 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -80,7 +80,7 @@ flowchart TD Start([Start]) --> Load[Load Input JSON] Load --> Detect{Auto-detect
Adapter?} - Detect -->|Yes| AutoDetect[Scan metadata.generator.name] + Detect -->|Yes| AutoDetect[Scan metadata.producer.name] Detect -->|No| ExplicitAdapter[Use --source adapter] AutoDetect --> CheckAdapter{Adapter
Found?} @@ -285,7 +285,7 @@ sequenceDiagram alt Auto-detect mode Service->>Registry: Find compatible adapter Registry->>Adapter: can_handle(payload)? - Adapter->>Registry: Yes (metadata.generator.name matches) + Adapter->>Registry: Yes (metadata.producer.name matches) Registry->>Service: Return CollectorGhAdapter else Explicit mode Service->>Registry: Get adapter by name @@ -402,7 +402,7 @@ flowchart LR { "code": "VERSION_MISMATCH", "message": "Producer version 2.1.0 is outside confirmed range", - "context": "metadata.generator.version" + "context": "metadata.producer.version" } ] } diff --git a/docs/contracts.md b/docs/contracts.md index 1c87d93..254417d 100644 --- a/docs/contracts.md +++ b/docs/contracts.md @@ -49,8 +49,8 @@ Produced by [living-doc-collector-gh](https://github.com/AbsaOSS/living-doc-coll ### Producer Detection Adapter auto-detection checks: -- `metadata.generator.name` == `"AbsaOSS/living-doc-collector-gh"` -- `metadata.generator.version` — semver format +- `metadata.producer.name` == `"AbsaOSS/living-doc-collector-gh"` +- `metadata.producer.version` — semver format ### Compatibility Policy @@ -67,7 +67,7 @@ Warning format in audit: { "code": "VERSION_MISMATCH", "message": "Producer version 2.1.0 is outside confirmed range >=1.0.0,<2.0.0", - "context": "metadata.generator.version" + "context": "metadata.producer.version" } ``` @@ -158,7 +158,7 @@ Each pipeline stage appends a trace entry: | Collector field | Audit field | |-----------------|-------------| -| `metadata.generator.*` | `audit.producer.*` | +| `metadata.producer.*` | `audit.producer.*` | | `metadata.run.*` | `audit.run.*` | | `metadata.source.*` | `audit.source.*` | | Full original `metadata` | `audit.extensions["collector-gh"].original_metadata` | diff --git a/docs/cookbooks/normalize-issues.md b/docs/cookbooks/normalize-issues.md index 3a15811..daee3f5 100644 --- a/docs/cookbooks/normalize-issues.md +++ b/docs/cookbooks/normalize-issues.md @@ -59,7 +59,7 @@ See [Contracts & Interfaces](../contracts.md#cli-interface) for the full argumen ### Auto-Detection -When `--source auto` is used (default), the service automatically detects the producer by examining the `metadata.generator.name` field in the input JSON: +When `--source auto` is used (default), the service automatically detects the producer by examining the `metadata.producer.name` field in the input JSON: ```python if payload["metadata"]["generator"]["name"] == "AbsaOSS/living-doc-collector-gh": @@ -67,8 +67,8 @@ if payload["metadata"]["generator"]["name"] == "AbsaOSS/living-doc-collector-gh" ``` **Required Fields for Detection:** -- `metadata.generator.name` — Producer identifier (e.g., `"AbsaOSS/living-doc-collector-gh"`) -- `metadata.generator.version` — Producer version (semver format, e.g., `"1.2.0"`) +- `metadata.producer.name` — Producer identifier (e.g., `"AbsaOSS/living-doc-collector-gh"`) +- `metadata.producer.version` — Producer version (semver format, e.g., `"1.2.0"`) ### Explicit Adapter Selection @@ -122,7 +122,7 @@ This policy ensures: The adapter maps collector metadata to the audit envelope: ``` -metadata.generator.* → audit.producer.* +metadata.producer.* → audit.producer.* metadata.run.* → audit.run.* metadata.source.* → audit.source.* ``` @@ -202,7 +202,7 @@ When the producer version is outside the confirmed range, a warning is logged an { "code": "VERSION_MISMATCH", "message": "Producer version 2.1.0 is outside confirmed range >=1.0.0,<2.0.0", - "context": "metadata.generator.version" + "context": "metadata.producer.version" } ``` @@ -229,7 +229,7 @@ When the producer version is outside the confirmed range, a warning is logged an **Common Causes:** - File not found: `--input` path does not exist - Malformed JSON: Syntax errors in input file -- Missing required fields: Input lacks `metadata.generator.name` +- Missing required fields: Input lacks `metadata.producer.name` **Example:** ``` @@ -248,16 +248,16 @@ Invalid input: File 'doc-issues.json' not found. Ensure --input points to a vali **Error Prefix:** `Adapter error:` **Common Causes:** -- No compatible adapter found: `metadata.generator.name` does not match any known producer -- Missing metadata: Input lacks `metadata.generator` section +- No compatible adapter found: `metadata.producer.name` does not match any known producer +- Missing metadata: Input lacks `metadata.producer` section **Example:** ``` -Adapter error: No compatible adapter found for input. Check metadata.generator.name field. +Adapter error: No compatible adapter found for input. Check metadata.producer.name field. ``` **Solutions:** -- Inspect `metadata.generator.name`: `jq .metadata.generator.name doc-issues.json` +- Inspect `metadata.producer.name`: `jq .metadata.producer.name doc-issues.json` - Verify input is from `AbsaOSS/living-doc-collector-gh` - Use `--source collector-gh` to explicitly select adapter diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 501968f..27a2ad2 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -126,7 +126,7 @@ Invalid input: Malformed JSON in 'doc-issues.json'. Ensure the file contains val **Message:** ``` -Invalid input: Missing required field 'metadata.generator.name'. Check input structure. +Invalid input: Missing required field 'metadata.producer.name'. Check input structure. ``` **Causes:** @@ -144,7 +144,7 @@ Invalid input: Missing required field 'metadata.generator.name'. Check input str 2. **Verify generator metadata:** ```bash - jq .metadata.generator doc-issues.json + jq .metadata.producer doc-issues.json ``` Expected output: diff --git a/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/compatibility.py b/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/compatibility.py index 41f1dda..be3c82b 100644 --- a/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/compatibility.py +++ b/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/compatibility.py @@ -68,7 +68,7 @@ def check_compatibility(version: str) -> list[CompatibilityWarning]: message=( f"Producer version {version} is outside confirmed range" f" >={CONFIRMED_MIN},<{CONFIRMED_MAX}" ), - context="metadata.generator.version", + context="metadata.producer.version", ) ] except InvalidVersion: @@ -76,6 +76,6 @@ def check_compatibility(version: str) -> list[CompatibilityWarning]: CompatibilityWarning( code="INVALID_VERSION", message=f"Producer version '{version}' is not a valid semantic version", - context="metadata.generator.version", + context="metadata.producer.version", ) ] diff --git a/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/detector.py b/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/detector.py index e56bfb6..06ebea2 100644 --- a/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/detector.py +++ b/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/detector.py @@ -21,8 +21,8 @@ def can_handle(payload: dict) -> bool: True if the payload is from living-doc-collector-gh, False otherwise """ try: - generator_name = payload.get("metadata", {}).get("generator", {}).get("name") - return generator_name == "AbsaOSS/living-doc-collector-gh" + producer_name = payload.get("metadata", {}).get("producer", {}).get("name") + return producer_name == "AbsaOSS/living-doc-collector-gh" except Exception: # pylint: disable=broad-exception-caught # Handle AttributeError, TypeError gracefully return False @@ -42,9 +42,9 @@ def extract_version(payload: dict) -> str: AdapterError: If the version cannot be extracted """ try: - version = payload["metadata"]["generator"]["version"] + version = payload["metadata"]["producer"]["version"] if not version: - raise AdapterError("Producer version is empty in metadata.generator.version") + raise AdapterError("Producer version is empty in metadata.producer.version") return version except (KeyError, TypeError) as e: - raise AdapterError(f"Missing or invalid metadata.generator.version in payload: {e}") from e + raise AdapterError(f"Missing or invalid metadata.producer.version in payload: {e}") from e diff --git a/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/parser.py b/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/parser.py index 5eda1a4..bc87fe6 100644 --- a/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/parser.py +++ b/packages/adapters/collector_gh/src/living_doc_adapter_collector_gh/parser.py @@ -4,9 +4,13 @@ Input parser for the collector-gh adapter. This module provides functions to parse collector-gh output into -AdapterResult format. +AdapterResult format with schema validation and comprehensive error reporting. """ +import logging +from jsonschema import validate, ValidationError # type: ignore[import-untyped] +from pydantic import ValidationError as PydanticValidationError + from living_doc_core.errors import AdapterError # type: ignore[import-untyped] from living_doc_adapter_collector_gh.compatibility import check_compatibility @@ -21,11 +25,116 @@ AdapterResult, ) +logger = logging.getLogger(__name__) + + +def _validate_schema(payload: dict) -> list[str]: + """ + Validate incoming payload against expected schema structure. + + Performs pre-parsing validation to catch structural issues early + and provide actionable error messages. + + Args: + payload: Input payload to validate + + Returns: + List of validation error messages (empty if valid) + """ + errors = [] + + # Check top-level structure + if not isinstance(payload, dict): + errors.append(f"Payload must be a dict, got {type(payload).__name__}") + return errors + + # Check required root keys + if "metadata" not in payload: + errors.append("Missing required key: 'metadata'") + if "items" not in payload: + errors.append("Missing required key: 'items'") + + # Validate metadata structure + if "metadata" in payload: + metadata = payload["metadata"] + if not isinstance(metadata, dict): + errors.append(f"'metadata' must be a dict, got {type(metadata).__name__}") + else: + # Check metadata sub-keys + for key in ["producer", "run", "source"]: + if key not in metadata: + errors.append(f"Missing metadata.{key}") + elif not isinstance(metadata[key], dict): + errors.append( + f"metadata.{key} must be a dict, got {type(metadata[key]).__name__}" + ) + + # Validate producer + producer = metadata.get("producer", {}) + if isinstance(producer, dict): + for key in ["name", "version"]: + if key not in producer: + errors.append(f"Missing metadata.producer.{key}") + elif not isinstance(producer.get(key), str): + errors.append( + f"metadata.producer.{key} must be a string, " + f"got {type(producer.get(key)).__name__}" + ) + + # Validate source + source = metadata.get("source", {}) + if isinstance(source, dict): + if "repositories" not in source: + errors.append("Missing metadata.source.repositories") + elif not isinstance(source["repositories"], list): + errors.append( + f"metadata.source.repositories must be a list, " + f"got {type(source['repositories']).__name__}" + ) + elif not source["repositories"]: + errors.append("metadata.source.repositories cannot be empty") + + # Validate items structure + if "items" in payload: + items = payload["items"] + if not isinstance(items, list): + errors.append( + f"'items' must be a list, got {type(items).__name__}" + ) + return errors + item_count = len(items) + + # Sample validation of first few items + for idx, item in enumerate(items[:5]): + if not isinstance(item, dict): + errors.append( + f"Item {idx} must be a dict, got {type(item).__name__}" + ) + continue + + # Check required item fields + for field in ["id", "title", "state", "url", "timestamps"]: + if field not in item: + item_id = item.get("id", f"[{idx}]") + errors.append(f"Item {item_id} missing required field: '{field}'") + + if item_count > 5: + logger.info( + "Schema validation checked first 5 of %d items; " + "full validation deferred to item parsing", + item_count, + ) + + return errors + def parse(payload: dict) -> AdapterResult: """ Parse collector-gh output into AdapterResult format. + Performs schema validation before parsing to catch issues early + with detailed error reporting. + Args: payload: Input payload from collector-gh @@ -33,29 +142,46 @@ def parse(payload: dict) -> AdapterResult: AdapterResult with parsed items and metadata Raises: - AdapterError: If parsing fails + AdapterError: If validation or parsing fails """ try: + # Perform schema validation + logger.debug("Starting schema validation for collector-gh payload") + validation_errors = _validate_schema(payload) + + if validation_errors: + error_message = "Schema validation failed with the following issues:\n" + for idx, error in enumerate(validation_errors, 1): + error_message += f" [{idx}] {error}\n" + logger.error(error_message.strip()) + raise AdapterError(error_message.strip()) + + logger.debug("Schema validation passed") + # Extract version and check compatibility + logger.debug("Extracting version from payload") version = extract_version(payload) + logger.info("Detected collector-gh version: %s", version) + warnings = check_compatibility(version) + if warnings: + logger.warning("Compatibility warnings detected: %s", len(warnings)) + for warning in warnings: + logger.warning(" - [%s] %s", warning.code, warning.message) # Extract metadata + logger.debug("Extracting metadata") metadata_dict = payload.get("metadata", {}) - generator = metadata_dict.get("generator", {}) + producer = metadata_dict.get("producer", {}) run = metadata_dict.get("run", {}) source = metadata_dict.get("source", {}) - # Get source repository for constructing item IDs - repositories = source.get("repositories", []) - source_repo = repositories[0] if repositories else "unknown/repo" - # Create metadata metadata = AdapterMetadata( producer=AdapterMetadataProducer( - name=generator.get("name", ""), - version=generator.get("version", ""), - build=generator.get("build"), + name=producer.get("name", ""), + version=producer.get("version", ""), + build=producer.get("build"), ), run=AdapterMetadataRun( run_id=run.get("run_id"), @@ -67,46 +193,72 @@ def parse(payload: dict) -> AdapterResult: ), source=AdapterMetadataSource( systems=source.get("systems", []), - repositories=repositories, + repositories=source.get("repositories", []), organization=source.get("organization"), enterprise=source.get("enterprise"), ), original_metadata=metadata_dict, ) + logger.info("Metadata extracted: producer=%s v%s", producer.get("name"), producer.get("version")) - # Parse issues into adapter items - # Support both dict format (keyed by issue ID) and list format - issues_data = payload.get("issues", {}) - if isinstance(issues_data, dict): - issues_list = list(issues_data.values()) - else: - issues_list = issues_data if isinstance(issues_data, list) else [] + # Parse items + logger.debug("Parsing items") + items_data = payload.get("items", []) + if not isinstance(items_data, list): + items_data = [] + logger.debug("Items provided as array with %d items", len(items_data)) items = [] - for issue in issues_list: + parse_errors = [] + + for idx, raw_item in enumerate(items_data): try: - # Support both 'number' and 'issue_number' field names - issue_number = issue.get("number") or issue.get("issue_number") + # Validate required fields + missing_fields = [] + for field in ["id", "title", "state", "url", "timestamps"]: + if field not in raw_item: + missing_fields.append(field) + + if missing_fields: + raise KeyError(f"Missing required fields: {', '.join(missing_fields)}") + item = AdapterItem( - id=f"github:{source_repo}#{issue_number}", - title=issue["title"], - state=issue["state"], - tags=issue.get("labels", []), - url=issue["html_url"], + id=raw_item["id"], + title=raw_item["title"], + state=raw_item["state"], + tags=raw_item.get("tags", []), + url=raw_item["url"], timestamps=AdapterItemTimestamps( - created=issue["created_at"], - updated=issue["updated_at"], + created=raw_item["timestamps"]["created"], + updated=raw_item["timestamps"]["updated"], ), - body=issue.get("body"), + body=raw_item.get("body"), ) items.append(item) - except (KeyError, TypeError) as e: - issue_number = issue.get("number") or issue.get("issue_number", "unknown") - raise AdapterError(f"Failed to parse issue {issue_number}: {e}") from e + logger.debug("Parsed item %s: %s", raw_item["id"], raw_item["title"][:50]) + + except (KeyError, TypeError, PydanticValidationError) as e: + item_id = raw_item.get("id", f"[{idx}]") + error_msg = f"Failed to parse item {item_id}: {e}" + logger.error(error_msg) + parse_errors.append(error_msg) + + if parse_errors: + error_summary = f"Failed to parse {len(parse_errors)} item(s):\n" + for error in parse_errors: + error_summary += f" - {error}\n" + logger.error(error_summary.strip()) + raise AdapterError(error_summary.strip()) + + logger.info("Parsing input with collector-gh adapter...") + logger.info("Parsed %d items", len(items)) return AdapterResult(items=items, metadata=metadata, warnings=warnings) except AdapterError: raise except Exception as e: - raise AdapterError(f"Failed to parse collector-gh payload: {e}") from e + error_msg = f"Failed to parse collector-gh payload: {e}" + logger.exception(error_msg) + raise AdapterError(error_msg) from e + diff --git a/packages/adapters/collector_gh/tests/fixtures/collector_v1.0.0/input/doc-issues.json b/packages/adapters/collector_gh/tests/fixtures/collector_v1.0.0/input/doc-issues.json index 90425eb..964e764 100644 --- a/packages/adapters/collector_gh/tests/fixtures/collector_v1.0.0/input/doc-issues.json +++ b/packages/adapters/collector_gh/tests/fixtures/collector_v1.0.0/input/doc-issues.json @@ -1,6 +1,6 @@ { "metadata": { - "generator": { + "producer": { "name": "AbsaOSS/living-doc-collector-gh", "version": "1.0.0", "build": "sha-abc123" diff --git a/packages/adapters/collector_gh/tests/fixtures/collector_v1.2.0/input/doc-issues.json b/packages/adapters/collector_gh/tests/fixtures/collector_v1.2.0/input/doc-issues.json index bdb534a..dcbce25 100644 --- a/packages/adapters/collector_gh/tests/fixtures/collector_v1.2.0/input/doc-issues.json +++ b/packages/adapters/collector_gh/tests/fixtures/collector_v1.2.0/input/doc-issues.json @@ -1,6 +1,6 @@ { "metadata": { - "generator": { + "producer": { "name": "AbsaOSS/living-doc-collector-gh", "version": "1.2.0", "build": "sha-xyz789" diff --git a/packages/adapters/collector_gh/tests/test_compatibility.py b/packages/adapters/collector_gh/tests/test_compatibility.py index 84cf74b..f4237b0 100644 --- a/packages/adapters/collector_gh/tests/test_compatibility.py +++ b/packages/adapters/collector_gh/tests/test_compatibility.py @@ -30,7 +30,7 @@ def test_version_0_9_0_warning(self): assert warnings[0].code == "VERSION_MISMATCH" assert "0.9.0" in warnings[0].message assert ">=1.0.0,<2.0.0" in warnings[0].message - assert warnings[0].context == "metadata.generator.version" + assert warnings[0].context == "metadata.producer.version" def test_version_2_0_0_warning(self): """Test that version 2.0.0 produces VERSION_MISMATCH warning.""" @@ -39,7 +39,7 @@ def test_version_2_0_0_warning(self): assert warnings[0].code == "VERSION_MISMATCH" assert "2.0.0" in warnings[0].message assert ">=1.0.0,<2.0.0" in warnings[0].message - assert warnings[0].context == "metadata.generator.version" + assert warnings[0].context == "metadata.producer.version" def test_version_2_1_0_warning(self): """Test that version 2.1.0 produces VERSION_MISMATCH warning.""" @@ -60,7 +60,7 @@ def test_invalid_version_string(self): assert len(warnings) == 1 assert warnings[0].code == "INVALID_VERSION" assert "not-a-version" in warnings[0].message - assert warnings[0].context == "metadata.generator.version" + assert warnings[0].context == "metadata.producer.version" def test_empty_version_string(self): """Test that empty version string produces INVALID_VERSION warning.""" diff --git a/packages/adapters/collector_gh/tests/test_detector.py b/packages/adapters/collector_gh/tests/test_detector.py index 59330a9..8dc6cce 100644 --- a/packages/adapters/collector_gh/tests/test_detector.py +++ b/packages/adapters/collector_gh/tests/test_detector.py @@ -13,12 +13,12 @@ class TestCanHandle: def test_can_handle_valid_collector_gh_payload(self): """Test that can_handle returns True for valid collector-gh payload.""" - payload = {"metadata": {"generator": {"name": "AbsaOSS/living-doc-collector-gh", "version": "1.0.0"}}} + payload = {"metadata": {"producer": {"name": "AbsaOSS/living-doc-collector-gh", "version": "1.0.0"}}} assert can_handle(payload) is True def test_can_handle_different_generator_name(self): - """Test that can_handle returns False for different generator name.""" - payload = {"metadata": {"generator": {"name": "different-generator", "version": "1.0.0"}}} + """Test that can_handle returns False for different producer name.""" + payload = {"metadata": {"producer": {"name": "different-generator", "version": "1.0.0"}}} assert can_handle(payload) is False def test_can_handle_missing_metadata(self): @@ -27,13 +27,13 @@ def test_can_handle_missing_metadata(self): assert can_handle(payload) is False def test_can_handle_missing_generator(self): - """Test that can_handle returns False when generator is missing.""" + """Test that can_handle returns False when producer is missing.""" payload = {"metadata": {}} assert can_handle(payload) is False def test_can_handle_missing_name(self): """Test that can_handle returns False when name is missing.""" - payload = {"metadata": {"generator": {"version": "1.0.0"}}} + payload = {"metadata": {"producer": {"version": "1.0.0"}}} assert can_handle(payload) is False def test_can_handle_empty_dict(self): @@ -41,8 +41,8 @@ def test_can_handle_empty_dict(self): assert can_handle({}) is False def test_can_handle_null_generator(self): - """Test that can_handle returns False when generator is None.""" - payload = {"metadata": {"generator": None}} + """Test that can_handle returns False when producer is None.""" + payload = {"metadata": {"producer": None}} assert can_handle(payload) is False @@ -51,7 +51,7 @@ class TestExtractVersion: def test_extract_version_valid_payload(self): """Test that extract_version returns version string from valid payload.""" - payload = {"metadata": {"generator": {"name": "AbsaOSS/living-doc-collector-gh", "version": "1.2.3"}}} + payload = {"metadata": {"producer": {"name": "AbsaOSS/living-doc-collector-gh", "version": "1.2.3"}}} version = extract_version(payload) assert version == "1.2.3" @@ -60,32 +60,32 @@ def test_extract_version_missing_metadata(self): payload = {} with pytest.raises(AdapterError) as exc_info: extract_version(payload) - assert "metadata.generator.version" in str(exc_info.value) + assert "metadata.producer.version" in str(exc_info.value) def test_extract_version_missing_generator(self): - """Test that extract_version raises AdapterError when generator is missing.""" + """Test that extract_version raises AdapterError when producer is missing.""" payload = {"metadata": {}} with pytest.raises(AdapterError) as exc_info: extract_version(payload) - assert "metadata.generator.version" in str(exc_info.value) + assert "metadata.producer.version" in str(exc_info.value) def test_extract_version_missing_version(self): """Test that extract_version raises AdapterError when version is missing.""" - payload = {"metadata": {"generator": {"name": "AbsaOSS/living-doc-collector-gh"}}} + payload = {"metadata": {"producer": {"name": "AbsaOSS/living-doc-collector-gh"}}} with pytest.raises(AdapterError) as exc_info: extract_version(payload) - assert "metadata.generator.version" in str(exc_info.value) + assert "metadata.producer.version" in str(exc_info.value) def test_extract_version_empty_version(self): """Test that extract_version raises AdapterError when version is empty.""" - payload = {"metadata": {"generator": {"name": "AbsaOSS/living-doc-collector-gh", "version": ""}}} + payload = {"metadata": {"producer": {"name": "AbsaOSS/living-doc-collector-gh", "version": ""}}} with pytest.raises(AdapterError) as exc_info: extract_version(payload) assert "empty" in str(exc_info.value).lower() def test_extract_version_null_generator(self): - """Test that extract_version raises AdapterError when generator is None.""" - payload = {"metadata": {"generator": None}} + """Test that extract_version raises AdapterError when producer is None.""" + payload = {"metadata": {"producer": None}} with pytest.raises(AdapterError) as exc_info: extract_version(payload) - assert "metadata.generator.version" in str(exc_info.value) + assert "metadata.producer.version" in str(exc_info.value) diff --git a/packages/adapters/collector_gh/tests/test_parser.py b/packages/adapters/collector_gh/tests/test_parser.py index 2677e5e..82d0e2b 100644 --- a/packages/adapters/collector_gh/tests/test_parser.py +++ b/packages/adapters/collector_gh/tests/test_parser.py @@ -33,7 +33,7 @@ def minimal_payload(self): """Create a minimal valid payload.""" return { "metadata": { - "generator": {"name": "AbsaOSS/living-doc-collector-gh", "version": "1.0.0", "build": "test"}, + "producer": {"name": "AbsaOSS/living-doc-collector-gh", "version": "1.0.0", "build": "test"}, "run": { "run_id": "123", "run_attempt": "1", @@ -79,8 +79,8 @@ def test_parse_v1_0_0_fixture(self, fixture_v1_0_0): assert len(result.warnings) == 0 # Check original metadata is preserved - assert "generator" in result.metadata.original_metadata - assert result.metadata.original_metadata["generator"]["version"] == "1.0.0" + assert "producer" in result.metadata.original_metadata + assert result.metadata.original_metadata["producer"]["version"] == "1.0.0" def test_parse_v1_2_0_fixture(self, fixture_v1_2_0): """Test parsing with v1.2.0 fixture.""" @@ -157,10 +157,10 @@ def test_original_metadata_preserved(self, fixture_v1_0_0): result = parse(fixture_v1_0_0) original = result.metadata.original_metadata - assert "generator" in original + assert "producer" in original assert "run" in original assert "source" in original - assert original["generator"]["name"] == "AbsaOSS/living-doc-collector-gh" + assert original["producer"]["name"] == "AbsaOSS/living-doc-collector-gh" def test_parse_minimal_payload(self, minimal_payload): """Test parsing with minimal payload.""" @@ -196,7 +196,7 @@ def test_parse_with_no_repositories(self, minimal_payload): def test_parse_with_incompatible_version(self, minimal_payload): """Test parsing with incompatible version generates warnings.""" - minimal_payload["metadata"]["generator"]["version"] = "2.0.0" + minimal_payload["metadata"]["producer"]["version"] = "2.0.0" result = parse(minimal_payload) assert len(result.warnings) == 1