diff --git a/README.md b/README.md index e4c2a2b..b24af65 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,22 @@ curl http://localhost:8080/healthz # frontend → backend_reachable: true | `POST` | `/jobs/{job_id}/resume` | Resume a job paused by tape mismatch (after the user changed the tape physically) | — | | `POST` | `/printer/resume` | Resume the printer queue after a recoverable error halted it (tape empty / cover open / offline) | — | | `GET` | `/healthz` | Liveness probe for orchestrators | — | +| `GET` | `/readiness` | Readiness probe — deep check for reverse-proxy routing | — | + +### Health Probes + +The backend exposes two HTTP probes with different semantics: + +| Endpoint | Purpose | What it answers | +|----------|---------|-----------------| +| `GET /healthz` | Liveness — Docker / Kubernetes container restart signal | "the process and the event loop are alive" | +| `GET /readiness` | Readiness — reverse-proxy routing signal | "the process can serve traffic right now": database connectable, alembic at head, templates seeded, runtime printer matches DB, SNMP probe fresh, queue worker alive, SSE bus capacity ok | + +`/readiness` returns HTTP 200 with `status` of `ready` (all checks ok) or `degraded` (non-critical checks failing — still routable), and HTTP 503 with `not-ready` when a critical check (database, alembic, template_seed) fails. + +Pangolin's `targets[0].healthcheck.path` can use `/readiness` for deep checks instead of `/healthz`; Docker container healthchecks should stay on `/healthz` to avoid restart loops on transient DB failures. + +See `docs/superpowers/specs/2026-05-17-phase-7b-foundation-design.md` for the full check list and rationale. ### `POST /print` request body diff --git a/backend/alembic/versions/20260517_phase7b_datetime_tz.py b/backend/alembic/versions/20260517_phase7b_datetime_tz.py new file mode 100644 index 0000000..4be39e0 --- /dev/null +++ b/backend/alembic/versions/20260517_phase7b_datetime_tz.py @@ -0,0 +1,47 @@ +"""Phase 7b — normalise existing datetime rows to timezone-aware ISO strings. + +Existing rows from Phase 5 inserts contain naive datetimes (no TZ suffix) +that break the Go frontend's RFC3339 parser. This migration appends +`+00:00` to any value that does NOT already contain `+` or end with `Z`. +SQLite is dynamically typed so no ALTER TABLE is required — the new column +type from B4 only affects new inserts via the SQLAlchemy layer. + +Revision ID: 20260517_phase7b_datetime_tz +Revises: b2668b6e8845 +Create Date: 2026-05-17 +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "20260517_phase7b_datetime_tz" +down_revision = "b2668b6e8845" +branch_labels = None +depends_on = None + + +_TABLES_DT = [ + ("templates", ["created_at", "updated_at"]), + ("printers", ["created_at", "updated_at"]), + ("jobs", ["created_at", "updated_at", "started_at", "finished_at"]), + ("presets", ["created_at", "updated_at"]), + ("printer_state", ["updated_at"]), + ("printer_status_cache", ["captured_at", "updated_at"]), +] + + +def upgrade() -> None: + for table, cols in _TABLES_DT: + for col in cols: + op.execute( + f"UPDATE {table} SET {col} = {col} || '+00:00' " + f"WHERE {col} IS NOT NULL " + f"AND {col} NOT LIKE '%+%' " + f"AND {col} NOT LIKE '%Z'" + ) + + +def downgrade() -> None: + # The naive-datetime state being reverted to is exactly the bug we + # are fixing. Downgrade is intentionally a no-op. + pass diff --git a/backend/app/api/routes/print.py b/backend/app/api/routes/print.py index 9c481ee..eb98679 100644 --- a/backend/app/api/routes/print.py +++ b/backend/app/api/routes/print.py @@ -4,6 +4,7 @@ import logging from typing import Any +from uuid import UUID from fastapi import APIRouter, HTTPException, Request, status from fastapi.responses import JSONResponse @@ -32,7 +33,7 @@ class _PrinterResumeResponse(BaseModel): """200 response body for POST /printer/resume.""" - printer_id: str + printer_id: UUID | str state: str diff --git a/backend/app/api/routes/printers.py b/backend/app/api/routes/printers.py index 5ad2a6f..8e3a9f0 100644 --- a/backend/app/api/routes/printers.py +++ b/backend/app/api/routes/printers.py @@ -22,7 +22,6 @@ from __future__ import annotations -import asyncio import dataclasses import logging from datetime import UTC, datetime @@ -166,65 +165,51 @@ def _error_label(block: Any) -> str | None: @router.get( "/{printer_id}/status", response_model=PrinterStatus, - summary="Force a fresh printer status probe", + summary="Return the latest cached printer status", description=( - "Sends an ESC i S command to the printer over TCP/9100. " - "The result is written back to ``printer_status_cache`` and returned. " - "Returns 503 when the printer is unreachable." + "Returns the most recent status written by the background SNMP probe worker. " + "The response is served from ``printer_status_cache`` — no synchronous SNMP " + "probe is performed, so the response always returns in <10 ms. " + "When no probe has completed yet ``online`` is ``null`` and ``note`` explains why. " + "Returns 404 when the printer is not registered." ), ) async def get_printer_status( printer_id: UUID, session: SessionDep, ) -> PrinterStatus: - """Probe the printer and update the cache.""" - printer = await _get_printer_or_404(session, printer_id) + """Return the latest cached status for a printer; no sync SNMP probe.""" + await _get_printer_or_404(session, printer_id) - host: str | None = printer.connection.get("host") if printer.connection else None - if not host: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail=f"printer {printer_id} has no 'host' in connection config", + row = await cache_repo.get(session, printer_id) + if row is None or row.captured_at is None: + return PrinterStatus( + printer_id=printer_id, + online=None, + captured_at=None, + note="No probe yet — wait up to 30s for first probe cycle", ) - port: int = int(printer.connection.get("port", 9100)) - - try: - result = await asyncio.to_thread(_probe_status_sync, host, port) - except OSError as exc: - raise HTTPException( - status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail=f"printer {printer_id} unreachable: {exc}", - ) from exc + parsed = row.parsed or {} + captured = row.captured_at + if captured.tzinfo is None: + captured = captured.replace(tzinfo=UTC) + age_s = int((datetime.now(UTC) - captured).total_seconds()) - block = result["block"] - raw: bytes = result["raw"] - now = datetime.now(UTC) - - parsed: dict[str, Any] = { - "media_width_mm": block.media_width_mm, - "media_type": block.media_type.name, - "status_type": block.status_type.name, - "phase_type": block.phase_type.name, - "errors": int(block.errors), - "tape_color": block.tape_color.name, - "text_color": block.text_color.name, - } + loaded_tape_mm = parsed.get("loaded_tape_mm") + tape_loaded = f"{loaded_tape_mm}mm" if loaded_tape_mm else None - await cache_repo.upsert( - session, - printer_id, - raw_block=raw, - parsed=parsed, - captured_at=now, - ) + error_flags = parsed.get("error_flags") or [] + error_state = ", ".join(error_flags) if error_flags else None return PrinterStatus( printer_id=printer_id, - online=True, - tape_loaded=_tape_label(block), - error_state=_error_label(block), - captured_at=now, + online=parsed.get("online"), + tape_loaded=tape_loaded, + error_state=error_state, + captured_at=row.captured_at, + last_probe_age_s=age_s, + last_error=parsed.get("last_error"), ) diff --git a/backend/app/db/lifespan.py b/backend/app/db/lifespan.py index 589a576..8f4a4e1 100644 --- a/backend/app/db/lifespan.py +++ b/backend/app/db/lifespan.py @@ -7,15 +7,28 @@ Call order in main.py lifespan: 1. run_migrations() — apply pending Alembic revisions - 2. recover_inflight_jobs() — mark stale QUEUED/PRINTING jobs as failed_restart - 3. seed_templates() — upsert YAML seed templates into DB - 4. ensure_printer_state() — create missing printer_state rows + 1b. verify_alembic_at_head() — assert DB revision == script head (fail fast) + 2. _discover_plugins() — register integration + model plugins (idempotent) + 3. TemplateLoader.load_dir() — populate in-memory template cache (Cluster 1a) + 4. recover_inflight_jobs() — mark stale QUEUED/PRINTING jobs as failed_restart + 5. seed_templates() — YAML → DB upsert (defensive check on cache) + 6. upsert_runtime_printer() — env → DB Printer row (Cluster 1b) + 7. ensure_printer_state() — create missing printer_state rows per Printer + +Note: steps 2 and 3 must precede step 5 — TemplateLoader.load_dir() validates +templates against IntegrationRegistry (populated in step 2), and seed_templates() +reads from the cache that load_dir() populates in step 3. """ from __future__ import annotations +from uuid import UUID + from sqlalchemy.ext.asyncio import AsyncSession +from app.config import Settings +from app.models.printer import Printer +from app.services.printer_identity import derive_printer_id from app.services.template_loader import TemplateLoader @@ -49,6 +62,55 @@ def _upgrade() -> None: await asyncio.to_thread(_upgrade) +async def verify_alembic_at_head(settings: Settings) -> None: + """Raise RuntimeError if the DB's alembic revision does not match the script head. + + Lifespan calls this right after run_migrations() so a half-applied or + corrupted DB fails startup loudly with a clear log line, instead of + crashing later inside ORM queries with cryptic schema errors. + + Takes settings explicitly so unit tests can verify against ad-hoc DBs + without monkey-patching the get_settings() lru_cache singleton — that's + the C2/D2 testability pattern. + """ + import asyncio + from pathlib import Path as _Path + + from alembic.config import Config + from alembic.runtime.migration import MigrationContext + from alembic.script import ScriptDirectory + from sqlalchemy import create_engine + + # backend/app/db/lifespan.py → parents[2] = backend/ + ini_path = _Path(__file__).resolve().parents[2] / "alembic.ini" + + def _check() -> tuple[str | None, str | None]: + cfg = Config(str(ini_path)) + # Prevent alembic from calling logging.config.fileConfig() which would + # reconfigure the root logger and break pytest caplog fixtures. + cfg.attributes["configure_logger"] = False + script = ScriptDirectory.from_config(cfg) + head_rev = script.get_current_head() + + # SQLAlchemy's synchronous engine: strip the async driver suffix + sync_url = settings.database_url.replace("+aiosqlite", "") + engine = create_engine(sync_url) + try: + with engine.connect() as conn: + ctx = MigrationContext.configure(conn) + current_rev = ctx.get_current_revision() + finally: + engine.dispose() + + return current_rev, head_rev + + current_rev, head_rev = await asyncio.to_thread(_check) + if current_rev != head_rev: + raise RuntimeError( + f"Alembic migration drift detected: DB at {current_rev!r}, expected head {head_rev!r}" + ) + + async def recover_inflight_jobs(session: AsyncSession) -> int: """Mark any QUEUED or PRINTING jobs as FAILED_RESTART. @@ -70,8 +132,16 @@ async def seed_templates(session: AsyncSession, loader: type[TemplateLoader]) -> main.py can call by name, and is the natural seam for unit tests that want to inject a mock loader without touching the real registry. + Raises RuntimeError if the loader cache is empty — calling seed_templates + without first running TemplateLoader.load_dir() is a lifespan-ordering bug. + Returns the count of rows touched (inserted or updated). """ + if not loader._cache: + raise RuntimeError( + "seed_templates called with empty TemplateLoader cache — " + "TemplateLoader.load_dir() must run before seed_templates()." + ) return await loader.seed_db(session) @@ -102,3 +172,56 @@ async def ensure_printer_state(session: AsyncSession) -> int: await session.commit() return created + + +async def upsert_runtime_printer( + session: AsyncSession, + settings: Settings, +) -> UUID | None: + """Materialise one Printer row from env config; return its deterministic id. + + Returns ``None`` when the environment does NOT declare a printer host + (e.g. mock backend in CI). The lifespan calls this between + ``seed_templates`` and ``ensure_printer_state`` so every restart + keeps the single runtime printer row consistent with the current env. + + The Printer row is keyed by the deterministic UUIDv5 produced by + ``derive_printer_id(model, host, port)`` — the same id that the + print-queue driver uses, so the DB row and the in-memory printer share + one stable identity across restarts. + """ + model: str = settings.printer_model + # Resolve host: pt750w takes precedence, ql820 is the fallback. + host: str = settings.pt750w_host or settings.ql820_host or "" + port: int = settings.pt750w_port if settings.pt750w_host else settings.ql820_port + + if not (model and host and port): + return None + + printer_id: UUID = derive_printer_id(model, host, port) + connection: dict[str, object] = { + "host": host, + "port": port, + "snmp": settings.printer_discover_via_snmp, + "snmp_community": settings.printer_snmp_community, + } + name: str = f"{model} ({host})" + + existing = await session.get(Printer, printer_id) + if existing is not None: + existing.name = name + existing.connection = connection + existing.enabled = True + else: + session.add( + Printer( + id=printer_id, + name=name, + model=model.lower(), + backend=settings.printer_backend, + connection=connection, + enabled=True, + ) + ) + await session.flush() + return printer_id diff --git a/backend/app/main.py b/backend/app/main.py index ad1fa67..007f6b6 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -25,7 +25,7 @@ from collections.abc import AsyncIterator from contextlib import asynccontextmanager, suppress from pathlib import Path -from typing import Any +from typing import Annotated, Any # --------------------------------------------------------------------------- # F3 — Early settings validation with a friendly error message. @@ -66,10 +66,11 @@ sys.stderr.writelines(_lines) sys.exit(78) # sysexits.h EX_CONFIG -from fastapi import FastAPI, Request +from fastapi import Depends, FastAPI, Request, Response from fastapi.openapi.utils import get_openapi from fastapi.staticfiles import StaticFiles from pydantic import BaseModel, ConfigDict +from sqlalchemy.ext.asyncio import AsyncSession import app.integrations as _integrations_init # triggers integration plugin discovery from app import __version__ @@ -89,12 +90,16 @@ recover_inflight_jobs, run_migrations, seed_templates, + upsert_runtime_printer, + verify_alembic_at_head, ) +from app.db.session import get_session from app.integrations.registry import IntegrationRegistry from app.printer_backends import BackendRegistry from app.printer_backends.exceptions import SnmpDiscoveryError from app.printer_backends.snmp_helper import query_model_pjl from app.printer_models.registry import ModelRegistry +from app.schemas.readiness import ReadinessResponse from app.services.event_bus import EventBus from app.services.label_renderer import LabelRenderer from app.services.lookup_service import AppLookupService @@ -103,6 +108,7 @@ from app.services.producers.print_queue_producer import PrintQueueProducer from app.services.producers.status_probe_producer import StatusProbeProducer from app.services.producers.tape_change_producer import TapeChangeProducer +from app.services.readiness import build_readiness_response from app.services.tape_registry import TapeRegistry from app.services.template_loader import TemplateLoader @@ -233,24 +239,39 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]: """ settings = get_settings() - # --- DB startup: migrations + recovery + seed + printer state -------- + # --- DB startup: migrations first, then in-memory state, then DB writes --- await run_migrations() - async with async_session() as s: - await recover_inflight_jobs(s) - await seed_templates(s, TemplateLoader) - await ensure_printer_state(s) - # --------------------------------------------------------------------- + await verify_alembic_at_head(settings) - # Re-run integration plugin discovery if the registry was cleared (e.g. by - # test fixtures that call IntegrationRegistry._plugins.clear()). This is - # idempotent: _discover_plugins skips names that are already registered. + # 2. Plugin registries (idempotent — skips already-registered names). + # Must run BEFORE TemplateLoader.load_dir() because load_dir validates + # each template's `app` field against IntegrationRegistry. Re-run if the + # registry was cleared (e.g. by test fixtures that call + # IntegrationRegistry._plugins.clear()). if not IntegrationRegistry.names(): _integrations_init._discover_plugins() + ModelRegistry.ensure_discovered() + + # 3. Populate in-memory template cache BEFORE any DB writes that depend on it. + # load_dir must come after plugin discovery (above) and before seed_templates + # (below) — the seed step reads from the cache that load_dir populates. if _SEED_TEMPLATES_DIR.exists(): TemplateLoader.load_dir(_SEED_TEMPLATES_DIR) + else: + raise RuntimeError( + f"Seed templates directory not found: {_SEED_TEMPLATES_DIR}. " + "The application package is incomplete — reinstall or rebuild the image." + ) - ModelRegistry.ensure_discovered() + # 4. DB-bound init — plugin registry and template cache are populated. + async with async_session() as s: + await recover_inflight_jobs(s) + await seed_templates(s, TemplateLoader) + db_printer_id = await upsert_runtime_printer(s, settings) + await ensure_printer_state(s) + await s.commit() + # ------------------------------------------------------------------------- discovery_host = settings.pt750w_host or "" if discovery_host and settings.printer_discover_via_snmp: @@ -269,7 +290,7 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]: driver: Any = driver_cls(backend=backend) tape_registry = TapeRegistry() - printer = driver.make_queue_printer(tape_registry) + printer = driver.make_queue_printer(tape_registry, printer_id=db_printer_id) # --- SSE EventBus --- event_bus = EventBus(queue_size=settings.sse_queue_size) @@ -533,6 +554,36 @@ async def healthz(request: Request) -> Healthz: sse_active_subscribers=bus.distinct_subscriber_count() if bus else 0, ) + @app.get( + "/readiness", + response_model=ReadinessResponse, + tags=["meta"], + summary="Readiness probe", + description=( + "Deep readiness check: database connectivity, alembic migration " + "state, template seed, printer wiring, SNMP probe recency, " + "print-queue liveness, and SSE subscriber capacity. " + "Returns 200 with status in {ready, degraded} when all critical " + "checks pass; 503 with status=not-ready when any critical check " + "(database / alembic / template_seed) fails." + ), + responses={503: {"model": ReadinessResponse}}, + ) + async def readiness( + response: Response, + session: Annotated[AsyncSession, Depends(get_session)], + ) -> ReadinessResponse: + body = await build_readiness_response( + session, + app.state, + get_settings(), + version=HUB_VERSION, + revision=HUB_REVISION, + ) + if body.status == "not-ready": + response.status_code = 503 + return body + register_error_handlers(app) app.include_router(print_router) app.include_router(events_routes.router) diff --git a/backend/app/models/job.py b/backend/app/models/job.py index d6e1e12..d49c895 100644 --- a/backend/app/models/job.py +++ b/backend/app/models/job.py @@ -7,7 +7,7 @@ from typing import Any from uuid import UUID, uuid4 -from sqlalchemy import JSON, CheckConstraint, Index +from sqlalchemy import JSON, CheckConstraint, DateTime, Index from sqlmodel import Column, Field, SQLModel @@ -37,10 +37,23 @@ class Job(SQLModel, table=True): payload: dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON)) result: dict[str, Any] | None = Field(default=None, sa_column=Column(JSON)) error: str | None = None - created_at: datetime = Field(default_factory=lambda: datetime.now(UTC)) + created_at: datetime = Field( + default_factory=lambda: datetime.now(UTC), + sa_column=Column(DateTime(timezone=True), nullable=False), + ) updated_at: datetime = Field( default_factory=lambda: datetime.now(UTC), - sa_column_kwargs={"onupdate": lambda: datetime.now(UTC)}, + sa_column=Column( + DateTime(timezone=True), + nullable=False, + onupdate=lambda: datetime.now(UTC), + ), + ) + started_at: datetime | None = Field( + default=None, + sa_column=Column(DateTime(timezone=True), nullable=True), + ) + finished_at: datetime | None = Field( + default=None, + sa_column=Column(DateTime(timezone=True), nullable=True), ) - started_at: datetime | None = None - finished_at: datetime | None = None diff --git a/backend/app/models/preset.py b/backend/app/models/preset.py index e8067d3..11048d6 100644 --- a/backend/app/models/preset.py +++ b/backend/app/models/preset.py @@ -6,7 +6,7 @@ from typing import Any from uuid import UUID, uuid4 -from sqlalchemy import JSON +from sqlalchemy import JSON, DateTime from sqlmodel import Column, Field, SQLModel @@ -18,8 +18,15 @@ class Preset(SQLModel, table=True): printer_id: UUID | None = Field(default=None, foreign_key="printers.id") template_id: UUID = Field(foreign_key="templates.id") field_values: dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON)) - created_at: datetime = Field(default_factory=lambda: datetime.now(UTC)) + created_at: datetime = Field( + default_factory=lambda: datetime.now(UTC), + sa_column=Column(DateTime(timezone=True), nullable=False), + ) updated_at: datetime = Field( default_factory=lambda: datetime.now(UTC), - sa_column_kwargs={"onupdate": lambda: datetime.now(UTC)}, + sa_column=Column( + DateTime(timezone=True), + nullable=False, + onupdate=lambda: datetime.now(UTC), + ), ) diff --git a/backend/app/models/printer.py b/backend/app/models/printer.py index 3938d65..024704d 100644 --- a/backend/app/models/printer.py +++ b/backend/app/models/printer.py @@ -6,7 +6,7 @@ from typing import Any from uuid import UUID, uuid4 -from sqlalchemy import JSON +from sqlalchemy import JSON, DateTime from sqlmodel import Column, Field, SQLModel @@ -19,8 +19,15 @@ class Printer(SQLModel, table=True): backend: str connection: dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON)) enabled: bool = Field(default=True) - created_at: datetime = Field(default_factory=lambda: datetime.now(UTC)) + created_at: datetime = Field( + default_factory=lambda: datetime.now(UTC), + sa_column=Column(DateTime(timezone=True), nullable=False), + ) updated_at: datetime = Field( default_factory=lambda: datetime.now(UTC), - sa_column_kwargs={"onupdate": lambda: datetime.now(UTC)}, + sa_column=Column( + DateTime(timezone=True), + nullable=False, + onupdate=lambda: datetime.now(UTC), + ), ) diff --git a/backend/app/models/printer_state.py b/backend/app/models/printer_state.py index 13738c5..a4e3fa4 100644 --- a/backend/app/models/printer_state.py +++ b/backend/app/models/printer_state.py @@ -5,6 +5,7 @@ from datetime import UTC, datetime from uuid import UUID +from sqlalchemy import Column, DateTime from sqlmodel import Field, SQLModel @@ -15,5 +16,9 @@ class PrinterState(SQLModel, table=True): paused: bool = Field(default=False) updated_at: datetime = Field( default_factory=lambda: datetime.now(UTC), - sa_column_kwargs={"onupdate": lambda: datetime.now(UTC)}, + sa_column=Column( + DateTime(timezone=True), + nullable=False, + onupdate=lambda: datetime.now(UTC), + ), ) diff --git a/backend/app/models/printer_status_cache.py b/backend/app/models/printer_status_cache.py index fe2ab1e..7a97fa6 100644 --- a/backend/app/models/printer_status_cache.py +++ b/backend/app/models/printer_status_cache.py @@ -6,7 +6,7 @@ from typing import Any from uuid import UUID -from sqlalchemy import JSON, LargeBinary +from sqlalchemy import JSON, DateTime, LargeBinary from sqlmodel import Column, Field, SQLModel @@ -16,8 +16,15 @@ class PrinterStatusCache(SQLModel, table=True): printer_id: UUID = Field(primary_key=True, foreign_key="printers.id") raw_block: bytes | None = Field(default=None, sa_column=Column(LargeBinary)) parsed: dict[str, Any] | None = Field(default=None, sa_column=Column(JSON)) - captured_at: datetime | None = None + captured_at: datetime | None = Field( + default=None, + sa_column=Column(DateTime(timezone=True), nullable=True), + ) updated_at: datetime = Field( default_factory=lambda: datetime.now(UTC), - sa_column_kwargs={"onupdate": lambda: datetime.now(UTC)}, + sa_column=Column( + DateTime(timezone=True), + nullable=False, + onupdate=lambda: datetime.now(UTC), + ), ) diff --git a/backend/app/models/template.py b/backend/app/models/template.py index 5c53399..09769fc 100644 --- a/backend/app/models/template.py +++ b/backend/app/models/template.py @@ -6,7 +6,7 @@ from typing import Any from uuid import UUID, uuid4 -from sqlalchemy import JSON, CheckConstraint +from sqlalchemy import JSON, CheckConstraint, DateTime from sqlmodel import Column, Field, SQLModel @@ -23,8 +23,15 @@ class Template(SQLModel, table=True): schema_version: int = Field(default=1) definition: dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON)) source: str = Field(default="user") - created_at: datetime = Field(default_factory=lambda: datetime.now(UTC)) + created_at: datetime = Field( + default_factory=lambda: datetime.now(UTC), + sa_column=Column(DateTime(timezone=True), nullable=False), + ) updated_at: datetime = Field( default_factory=lambda: datetime.now(UTC), - sa_column_kwargs={"onupdate": lambda: datetime.now(UTC)}, + sa_column=Column( + DateTime(timezone=True), + nullable=False, + onupdate=lambda: datetime.now(UTC), + ), ) diff --git a/backend/app/printer_models/pt.py b/backend/app/printer_models/pt.py index 94ae81f..06c5a09 100644 --- a/backend/app/printer_models/pt.py +++ b/backend/app/printer_models/pt.py @@ -8,6 +8,7 @@ import logging from typing import TYPE_CHECKING, Any, ClassVar, cast +from uuid import UUID, uuid4 from PIL import Image @@ -222,12 +223,15 @@ def make_queue_printer( tape_registry: TapeRegistry, *, default_media_type: MediaType = MediaType.LAMINATED, + printer_id: UUID | None = None, ) -> _PTPQueuePrinter: + pid = printer_id if printer_id is not None else uuid4() return _PTPQueuePrinter( driver=self, backend=self._backend, tape_registry=tape_registry, default_media_type=default_media_type, + printer_id=pid, ) @@ -241,12 +245,13 @@ def __init__( backend: PrinterBackend, tape_registry: TapeRegistry, default_media_type: MediaType, + printer_id: UUID, ) -> None: self._driver = driver self._backend = backend self._tape_registry = tape_registry self._default_media_type = default_media_type - self.id: str = f"{driver.model_id}@{backend.host}" + self.id: UUID = printer_id async def print_image(self, image: Image.Image, *, tape_mm: int, **options: Any) -> None: media_type = options.pop("media_type", self._default_media_type) diff --git a/backend/app/schemas/_datetime.py b/backend/app/schemas/_datetime.py new file mode 100644 index 0000000..168cb28 --- /dev/null +++ b/backend/app/schemas/_datetime.py @@ -0,0 +1,23 @@ +"""Helpers for datetime serialisation in Pydantic schemas. + +The Go frontend's oapi-codegen client uses strict RFC3339 parsing which +rejects naive datetimes (no `Z` or `+HH:MM` suffix). This helper normalises +every datetime to a timezone-aware UTC value before serialisation. +""" + +from __future__ import annotations + +import datetime as _dt +from typing import Any + + +def serialize_datetime_utc(dt: _dt.datetime, _info: Any) -> str: + """Pydantic field-serializer: emit RFC3339 with `Z` for UTC values. + + - naive datetimes are treated as UTC (matches SQLite legacy behaviour) + - UTC-aware datetimes are emitted with `Z` + - non-UTC-aware datetimes keep their explicit offset + """ + if dt.tzinfo is None: + dt = dt.replace(tzinfo=_dt.UTC) + return dt.isoformat().replace("+00:00", "Z") diff --git a/backend/app/schemas/job.py b/backend/app/schemas/job.py index 9598715..02bdc7b 100644 --- a/backend/app/schemas/job.py +++ b/backend/app/schemas/job.py @@ -11,7 +11,9 @@ from typing import Any from uuid import UUID -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, field_serializer + +from app.schemas._datetime import serialize_datetime_utc class JobRead(BaseModel): @@ -30,3 +32,15 @@ class JobRead(BaseModel): updated_at: datetime started_at: datetime | None finished_at: datetime | None + + @field_serializer("created_at", "updated_at") + def _serialise_datetimes(self, dt: datetime, _info: object) -> str: + return serialize_datetime_utc(dt, _info) + + @field_serializer("started_at") + def _serialise_started_at(self, dt: datetime | None, _info: object) -> str | None: + return serialize_datetime_utc(dt, _info) if dt is not None else None + + @field_serializer("finished_at") + def _serialise_finished_at(self, dt: datetime | None, _info: object) -> str | None: + return serialize_datetime_utc(dt, _info) if dt is not None else None diff --git a/backend/app/schemas/printer.py b/backend/app/schemas/printer.py index 0796002..8cf5785 100644 --- a/backend/app/schemas/printer.py +++ b/backend/app/schemas/printer.py @@ -10,7 +10,9 @@ from datetime import datetime from uuid import UUID -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, ConfigDict, Field, field_serializer + +from app.schemas._datetime import serialize_datetime_utc class PrinterRead(BaseModel): @@ -34,20 +36,34 @@ class PrinterRead(BaseModel): created_at: datetime updated_at: datetime + @field_serializer("created_at", "updated_at") + def _serialise_datetimes(self, dt: datetime, _info: object) -> str: + return serialize_datetime_utc(dt, _info) + class PrinterStatus(BaseModel): - """Live status result from a fresh ESC i S probe + cache write-back. + """Printer status sourced from the printer_status_cache table. + + The endpoint reads the cache row written by StatusProbeProducer instead + of doing a synchronous SNMP probe inline. This makes the response fast + (<10 ms) even when the printer is offline. ``tape_loaded`` is a human-readable string such as ``"12mm laminated black/clear"`` or ``None`` when no tape is inserted. ``error_state`` mirrors the active PrinterError flags as a string, or ``None`` when the printer is ready. - ``captured_at`` is the UTC timestamp of the probe that produced this - block. + ``captured_at`` is the UTC timestamp of the probe that last updated the + cache row. ``None`` means no probe has completed yet. + ``last_probe_age_s`` is the age of the cached reading in seconds. + ``last_error`` is the exception message from the most recent failed probe. + ``note`` carries a human-readable hint (e.g. "No probe yet"). """ printer_id: UUID - online: bool + online: bool | None = Field( + default=None, + description="True when the printer responded to the last SNMP probe; None = no probe yet", + ) tape_loaded: str | None = Field( default=None, description='e.g. "12mm laminated black/clear"; None when no tape is loaded', @@ -56,4 +72,25 @@ class PrinterStatus(BaseModel): default=None, description="Active error flags as a string; None when printer is ready", ) - captured_at: datetime + captured_at: datetime | None = Field( + default=None, + description="UTC timestamp of the probe that produced this reading; None if no probe yet", + ) + last_probe_age_s: int | None = Field( + default=None, + description="Age of the cached reading in seconds", + ) + last_error: str | None = Field( + default=None, + description="Exception message from the most recent failed probe", + ) + note: str | None = Field( + default=None, + description="Human-readable hint, e.g. 'No probe yet'", + ) + + @field_serializer("captured_at") + def _serialise_captured_at(self, dt: datetime | None, _info: object) -> str | None: + if dt is None: + return None + return serialize_datetime_utc(dt, _info) diff --git a/backend/app/schemas/readiness.py b/backend/app/schemas/readiness.py new file mode 100644 index 0000000..56211c7 --- /dev/null +++ b/backend/app/schemas/readiness.py @@ -0,0 +1,24 @@ +"""Phase 7b Cluster 1e — readiness response shape.""" + +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import BaseModel, ConfigDict + + +class CheckStatus(BaseModel): + model_config = ConfigDict(frozen=True) + + status: Literal["ok", "fail", "skipped", "stale"] + detail: str | None = None + metric: dict[str, Any] | None = None + + +class ReadinessResponse(BaseModel): + model_config = ConfigDict(frozen=True) + + status: Literal["ready", "degraded", "not-ready"] + checks: dict[str, CheckStatus] + version: str + revision: str diff --git a/backend/app/schemas/template_read.py b/backend/app/schemas/template_read.py index 77d6fff..c88978c 100644 --- a/backend/app/schemas/template_read.py +++ b/backend/app/schemas/template_read.py @@ -12,7 +12,9 @@ from typing import Any from uuid import UUID -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, field_serializer + +from app.schemas._datetime import serialize_datetime_utc class TemplateRead(BaseModel): @@ -31,3 +33,7 @@ class TemplateRead(BaseModel): source: str created_at: datetime updated_at: datetime + + @field_serializer("created_at", "updated_at") + def _serialise_datetimes(self, dt: datetime, _info: object) -> str: + return serialize_datetime_utc(dt, _info) diff --git a/backend/app/services/job_lifecycle.py b/backend/app/services/job_lifecycle.py index 25034c8..32d34c3 100644 --- a/backend/app/services/job_lifecycle.py +++ b/backend/app/services/job_lifecycle.py @@ -27,6 +27,7 @@ from datetime import UTC, datetime from enum import StrEnum from typing import Any +from uuid import UUID class JobState(StrEnum): @@ -74,7 +75,7 @@ class Job: """A single print job. In-memory MVP; persistence comes in Phase 5.""" id: str - printer_id: str + printer_id: UUID state: JobState = JobState.QUEUED submitted_at: datetime = field(default_factory=lambda: datetime.now(UTC)) started_at: datetime | None = None diff --git a/backend/app/services/print_queue.py b/backend/app/services/print_queue.py index 2758c7f..7ea49d3 100644 --- a/backend/app/services/print_queue.py +++ b/backend/app/services/print_queue.py @@ -21,6 +21,7 @@ from enum import StrEnum from io import BytesIO from typing import Any, Protocol, runtime_checkable +from uuid import UUID from PIL import Image @@ -65,7 +66,7 @@ class PrinterAlreadyActiveError(Exception): (409) without relying on response body inspection. """ - def __init__(self, printer_id: str) -> None: + def __init__(self, printer_id: UUID) -> None: super().__init__(f"Printer {printer_id!r} is already active") self.printer_id = printer_id @@ -121,7 +122,7 @@ class _PrinterLike(Protocol): `**options` carries driver-specific extras that vary per plugin. """ - id: str + id: UUID async def print_image(self, image: Image.Image, *, tape_mm: int, **options: Any) -> None: ... @@ -135,16 +136,16 @@ def __init__( on_state_change: _StateChangeCallback | None = None, ) -> None: self._on_state_change = on_state_change - self._printers: dict[str, _PrinterLike] = {p.id: p for p in printers} + self._printers: dict[UUID, _PrinterLike] = {p.id: p for p in printers} # Queue type is Job | None — None is the sentinel used by stop() to wake # workers that are blocked at queue.get(). - self._queues: dict[str, asyncio.Queue[Job | None]] = { + self._queues: dict[UUID, asyncio.Queue[Job | None]] = { p.id: asyncio.Queue() for p in printers } - self._worker_states: dict[str, PrinterWorkerState] = { + self._worker_states: dict[UUID, PrinterWorkerState] = { p.id: PrinterWorkerState.ACTIVE for p in printers } - self._worker_resume_events: dict[str, asyncio.Event] = { + self._worker_resume_events: dict[UUID, asyncio.Event] = { p.id: asyncio.Event() for p in printers } # All resume events start "set" so a never-paused worker doesn't block. @@ -154,7 +155,7 @@ def __init__( # terminal jobs older than a configurable window once # persistence lands. self._jobs: dict[str, Job] = {} - self._workers: dict[str, asyncio.Task[None]] = {} + self._workers: dict[UUID, asyncio.Task[None]] = {} self._running: bool = False self._stopping: bool = False @@ -231,7 +232,7 @@ async def stop(self, timeout_s: float = 30.0) -> None: async def submit( self, - printer_id: str, + printer_id: UUID, image: Image.Image, tape_mm: int, **options: Any, @@ -258,7 +259,7 @@ async def submit( async def submit_paused( self, - printer_id: str, + printer_id: UUID, image: Image.Image, tape_mm: int, **options: Any, @@ -378,7 +379,7 @@ async def retry_job(self, job_id: str) -> str | None: # --- per-printer control ----------------------------------------------- - async def pause_printer(self, printer_id: str, reason: str = "") -> None: + async def pause_printer(self, printer_id: UUID, reason: str = "") -> None: """Pause the worker for a printer. Any in-flight job completes first.""" if printer_id not in self._worker_states: raise KeyError(f"Unknown printer: {printer_id}") @@ -386,7 +387,7 @@ async def pause_printer(self, printer_id: str, reason: str = "") -> None: self._worker_resume_events[printer_id].clear() logger.info("Printer %s paused: %s", printer_id, reason) - async def resume_printer(self, printer_id: str) -> None: + async def resume_printer(self, printer_id: UUID) -> None: """Resume a paused printer worker. Raises: @@ -403,7 +404,7 @@ async def resume_printer(self, printer_id: str) -> None: self._worker_resume_events[printer_id].set() logger.info("Printer %s resumed", printer_id) - def _queue_depth(self, printer_id: str) -> int: + def _queue_depth(self, printer_id: UUID) -> int: """Count non-terminal jobs for *printer_id* (QUEUED + PAUSED + PRINTING). O(N) over all-time jobs — acceptable at MVP scale. Used to populate @@ -414,7 +415,7 @@ def _queue_depth(self, printer_id: str) -> int: 1 for j in self._jobs.values() if j.printer_id == printer_id and j.state in non_terminal ) - async def list_queue(self, printer_id: str) -> list[Job]: + async def list_queue(self, printer_id: UUID) -> list[Job]: """All non-terminal jobs for a printer (queued + paused + printing). O(N) over all-time jobs — acceptable at MVP scale; see TODO(phase5) @@ -427,7 +428,7 @@ async def list_queue(self, printer_id: str) -> list[Job]: j for j in self._jobs.values() if j.printer_id == printer_id and j.state in non_terminal ] - async def clear_queue(self, printer_id: str) -> int: + async def clear_queue(self, printer_id: UUID) -> int: """Cancel all queued + paused jobs for a printer. Returns the count. O(N) over all-time jobs — acceptable at MVP scale; see TODO(phase5) @@ -483,7 +484,7 @@ def _notify_state_change( to_state.value, ) - async def _worker(self, printer_id: str) -> None: + async def _worker(self, printer_id: UUID) -> None: """Consume the queue for one printer, one job at a time. After popping a job the worker checks the pause state — this handles diff --git a/backend/app/services/print_service.py b/backend/app/services/print_service.py index f5deefd..aa4f0e3 100644 --- a/backend/app/services/print_service.py +++ b/backend/app/services/print_service.py @@ -3,6 +3,7 @@ from __future__ import annotations from typing import Protocol +from uuid import UUID from PIL import Image @@ -40,7 +41,7 @@ def __init__( renderer: _RendererProto, print_queue: PrintQueue, lookup_service: _LookupServiceProto, - printer_id: str, + printer_id: UUID, backend: _BackendProto, ) -> None: self._loader = template_loader diff --git a/backend/app/services/printer_identity.py b/backend/app/services/printer_identity.py new file mode 100644 index 0000000..65fab3d --- /dev/null +++ b/backend/app/services/printer_identity.py @@ -0,0 +1,27 @@ +"""Deterministic printer UUIDv5 derived from environment configuration. + +Phase 7b Cluster 1b: lifespan computes a stable identifier from +``(model, host, port)`` so the runtime printer (driver.make_queue_printer) +and the DB row (upsert_runtime_printer) share the same ``printer.id`` +across restarts. + +The namespace UUID is a constant committed to the repo — do NOT change +without a coordinated DB migration: every existing printer row would +become orphaned. +""" + +from __future__ import annotations + +from uuid import UUID, uuid5 + +# Phase 7b namespace constant; chosen randomly. Do not alter. +_PRINTER_NAMESPACE = UUID("6f1b3c7e-9d6a-4f48-9a8c-d4e0e1c5a3b2") + + +def derive_printer_id(model: str, host: str, port: int) -> UUID: + """Return a deterministic UUIDv5 for ``(model, host, port)``. + + ``model`` is lower-cased before hashing so environment-supplied values + like ``PT-P750W`` and ``pt-p750w`` map to the same identifier. + """ + return uuid5(_PRINTER_NAMESPACE, f"{model.lower()}|{host}|{port}") diff --git a/backend/app/services/producers/status_probe_producer.py b/backend/app/services/producers/status_probe_producer.py index 6a5bc2e..ddfddc7 100644 --- a/backend/app/services/producers/status_probe_producer.py +++ b/backend/app/services/producers/status_probe_producer.py @@ -48,6 +48,7 @@ import contextlib import logging from datetime import UTC, datetime +from uuid import UUID from app.printer_backends.snmp_helper import PreflightStatus, query_preflight from app.services.event_bus import BusEvent, EventBus @@ -102,81 +103,167 @@ def _has_changed(self, new: PreflightStatus, new_online: bool) -> bool: or new_online != self._last_online ) - async def _loop(self) -> None: - while True: - try: - status = await query_preflight( - self._host, - community=self._community, - timeout_s=5.0, + # ------------------------------------------------------------------ + # Cache helpers + # ------------------------------------------------------------------ + + async def _upsert_cache(self, snmp_result: PreflightStatus) -> None: + """Persist a successful SNMP probe result into printer_status_cache.""" + from app.db.engine import async_session + from app.models.printer_status_cache import PrinterStatusCache + + try: + printer_uuid = ( + UUID(self._printer_id) if isinstance(self._printer_id, str) else self._printer_id + ) + except (ValueError, AttributeError): + _log.debug("_upsert_cache: printer_id %r is not a valid UUID — skip", self._printer_id) + return + parsed = { + "online": True, + "loaded_tape_mm": snmp_result.loaded_tape_mm, + "hr_printer_status": snmp_result.hr_printer_status, + "error_flags": list(snmp_result.error_flags), + } + now = datetime.now(UTC) + async with async_session() as s: + row = await s.get(PrinterStatusCache, printer_uuid) + if row is not None: + row.parsed = parsed + row.raw_block = None + row.captured_at = now + else: + s.add( + PrinterStatusCache( + printer_id=printer_uuid, + parsed=parsed, + raw_block=None, + captured_at=now, + ) ) - new_online = True - - # Notify tape producer BEFORE updating _last so it receives - # the correct 'previous' tape state (old=self._last, new=status). - if self._tape_producer is not None: - self._tape_producer.on_probe_result(self._printer_id, self._last, status) - - changed = self._has_changed(status, new_online) - - # Update _last AFTER both tape-notification and change-check so - # that (a) the tape producer above received the correct 'from' - # tape, (b) _has_changed compared against the real previous - # value. Unconditional update prevents stale-_last bugs on - # the next iteration (see module docstring invariant). - self._last = status - self._last_online = new_online - - if changed: - channel = f"printer:{self._printer_id}:state" - self._bus.publish( - channel, - BusEvent( - channel=channel, - event_id=self._bus.next_event_id(channel), - event_type="printer.status", - timestamp=datetime.now(UTC), - data={ - "hr_printer_status": status.hr_printer_status, - "error_flags": list(status.error_flags), - "online": True, - }, - ), + await s.commit() + + async def _mark_offline(self, exc: Exception) -> None: + """Persist a failed probe; preserves any previous parsed snapshot.""" + from app.db.engine import async_session + from app.models.printer_status_cache import PrinterStatusCache + + try: + printer_uuid = ( + UUID(self._printer_id) if isinstance(self._printer_id, str) else self._printer_id + ) + except (ValueError, AttributeError): + _log.debug("_mark_offline: printer_id %r is not a valid UUID — skip", self._printer_id) + return + now = datetime.now(UTC) + async with async_session() as s: + row = await s.get(PrinterStatusCache, printer_uuid) + parsed: dict[str, object] = dict(row.parsed) if (row is not None and row.parsed) else {} + parsed["online"] = False + parsed["last_error"] = str(exc) + if row is not None: + row.parsed = parsed + row.captured_at = now + else: + s.add( + PrinterStatusCache( + printer_id=printer_uuid, + parsed=parsed, + captured_at=now, ) - except asyncio.CancelledError: - raise - except Exception: - _log.exception( - "StatusProbeProducer: SNMP probe failed for printer=%s", - self._printer_id, ) - offline = PreflightStatus( - hr_printer_status="other", - loaded_tape_mm=None, - error_flags=[], + await s.commit() + + # ------------------------------------------------------------------ + # Single probe iteration (extracted for testability) + # ------------------------------------------------------------------ + + async def _probe_once(self) -> None: + """Run one SNMP probe cycle: query, write cache, publish on change.""" + try: + status = await query_preflight( + self._host, + community=self._community, + timeout_s=5.0, + ) + new_online = True + + # Write to cache first (always) + await self._upsert_cache(status) + + # Notify tape producer BEFORE updating _last so it receives + # the correct 'previous' tape state (old=self._last, new=status). + if self._tape_producer is not None: + self._tape_producer.on_probe_result(self._printer_id, self._last, status) + + changed = self._has_changed(status, new_online) + + # Update _last AFTER both tape-notification and change-check so + # that (a) the tape producer above received the correct 'from' + # tape, (b) _has_changed compared against the real previous + # value. Unconditional update prevents stale-_last bugs on + # the next iteration (see module docstring invariant). + self._last = status + self._last_online = new_online + + if changed: + channel = f"printer:{self._printer_id}:state" + self._bus.publish( + channel, + BusEvent( + channel=channel, + event_id=self._bus.next_event_id(channel), + event_type="printer.status", + timestamp=datetime.now(UTC), + data={ + "hr_printer_status": status.hr_printer_status, + "error_flags": list(status.error_flags), + "online": True, + }, + ), ) - new_online = False - changed = self._has_changed(offline, new_online) - - # Update _last unconditionally AFTER change-check (same reasoning as - # success branch — see module docstring invariant). - self._last = offline - self._last_online = new_online - - if changed: - channel = f"printer:{self._printer_id}:state" - self._bus.publish( - channel, - BusEvent( - channel=channel, - event_id=self._bus.next_event_id(channel), - event_type="printer.status", - timestamp=datetime.now(UTC), - data={ - "hr_printer_status": "other", - "error_flags": [], - "online": False, - }, - ), - ) + except asyncio.CancelledError: + raise + except Exception as exc: + _log.exception( + "StatusProbeProducer: SNMP probe failed for printer=%s", + self._printer_id, + ) + + # Write offline state to cache (preserves prior data) + await self._mark_offline(exc) + + offline = PreflightStatus( + hr_printer_status="other", + loaded_tape_mm=None, + error_flags=[], + ) + new_online = False + changed = self._has_changed(offline, new_online) + + # Update _last unconditionally AFTER change-check (same reasoning as + # success branch — see module docstring invariant). + self._last = offline + self._last_online = new_online + + if changed: + channel = f"printer:{self._printer_id}:state" + self._bus.publish( + channel, + BusEvent( + channel=channel, + event_id=self._bus.next_event_id(channel), + event_type="printer.status", + timestamp=datetime.now(UTC), + data={ + "hr_printer_status": "other", + "error_flags": [], + "online": False, + }, + ), + ) + + async def _loop(self) -> None: + while True: + await self._probe_once() await asyncio.sleep(self._interval_s) diff --git a/backend/app/services/readiness.py b/backend/app/services/readiness.py new file mode 100644 index 0000000..c20d0a6 --- /dev/null +++ b/backend/app/services/readiness.py @@ -0,0 +1,175 @@ +"""Phase 7b Cluster 1e — readiness aggregator (all 8 checks). + +Checks implemented: + database — SELECT 1 latency (critical) + alembic — alembic_version at head (critical) + template_seed — templates table non-empty (critical) + printer_runtime — app.state.printer_id set (non-critical) + printer_db_sync — runtime printer_id has a DB row (non-critical) + snmp_discovery — PrinterStatusCache recency (non-critical) + print_queue — print_queue in app.state (non-critical) + sse_bus — subscriber capacity (non-critical) + +F4 wires the FastAPI route + HTTP status mapping. +""" + +from __future__ import annotations + +import time +from datetime import UTC, datetime +from typing import Any, Literal + +from sqlalchemy import func, select, text +from sqlalchemy.ext.asyncio import AsyncSession + +from app.config import Settings +from app.models.printer import Printer +from app.models.printer_status_cache import PrinterStatusCache +from app.models.template import Template +from app.schemas.readiness import CheckStatus, ReadinessResponse + +_CRITICAL_CHECKS = ("database", "alembic", "template_seed") + + +async def _check_database(session: AsyncSession) -> CheckStatus: + try: + t0 = time.monotonic() + await session.execute(text("SELECT 1")) + latency_ms = round((time.monotonic() - t0) * 1000, 2) + return CheckStatus(status="ok", metric={"latency_ms": latency_ms}) + except Exception as exc: + return CheckStatus(status="fail", detail=str(exc)) + + +async def _check_alembic(settings: Settings) -> CheckStatus: + from app.db.lifespan import verify_alembic_at_head + + try: + await verify_alembic_at_head(settings) + return CheckStatus(status="ok") + except Exception as exc: + return CheckStatus(status="fail", detail=str(exc)) + + +async def _check_template_seed(session: AsyncSession) -> CheckStatus: + count = await session.scalar(select(func.count()).select_from(Template)) + cnt = int(count or 0) + if cnt >= 1: + return CheckStatus(status="ok", metric={"templates_in_db": cnt}) + return CheckStatus( + status="fail", + detail="Templates table is empty — lifespan init-order regression?", + metric={"templates_in_db": cnt}, + ) + + +def _check_printer_runtime(app_state: Any) -> CheckStatus: + pid = getattr(app_state, "printer_id", None) + if pid is None: + return CheckStatus(status="fail", detail="app.state.printer_id is None") + return CheckStatus(status="ok", metric={"printer_id": str(pid)}) + + +async def _check_printer_db_sync(session: AsyncSession, app_state: Any) -> CheckStatus: + pid = getattr(app_state, "printer_id", None) + if pid is None: + return CheckStatus(status="skipped", detail="No runtime printer") + row = await session.get(Printer, pid) + if row is None: + return CheckStatus( + status="fail", + detail=f"app.state.printer_id={pid} has no matching DB row", + ) + return CheckStatus(status="ok") + + +async def _check_snmp_discovery(session: AsyncSession, app_state: Any) -> CheckStatus: + pid = getattr(app_state, "printer_id", None) + if pid is None: + return CheckStatus(status="skipped", detail="No runtime printer") + row = await session.get(PrinterStatusCache, pid) + if row is None or row.captured_at is None: + return CheckStatus(status="fail", detail="No SNMP probe recorded yet") + captured = row.captured_at + if captured.tzinfo is None: + captured = captured.replace(tzinfo=UTC) + age_s = int((datetime.now(UTC) - captured).total_seconds()) + metric: dict[str, Any] = {"last_probe_age_s": age_s} + if age_s < 90: + return CheckStatus(status="ok", metric=metric) + if age_s < 600: + return CheckStatus(status="stale", detail=f"{age_s}s ago (>90s)", metric=metric) + return CheckStatus( + status="fail", + detail=f"{age_s}s ago (>600s) — printer offline?", + metric=metric, + ) + + +def _check_print_queue(app_state: Any) -> CheckStatus: + queue = getattr(app_state, "print_queue", None) + if queue is None: + return CheckStatus(status="fail", detail="print_queue not in app.state") + worker_count_fn = getattr(queue, "worker_count", lambda: 1) + return CheckStatus(status="ok", metric={"worker_count": worker_count_fn()}) + + +def _check_sse_bus(app_state: Any, settings: Settings) -> CheckStatus: + """Check SSE bus subscriber capacity. + + Supports both the real :class:`~app.services.event_bus.EventBus` + (which exposes ``distinct_subscriber_count()``) and the lightweight + ``types.SimpleNamespace`` fakes used in unit tests (which expose + ``subscriber_count()`` and ``max_subscribers``). + """ + bus = getattr(app_state, "event_bus", None) + if bus is None: + return CheckStatus(status="skipped", detail="event_bus not configured") + # Prefer distinct_subscriber_count (real EventBus) — fall back to + # subscriber_count (unit-test fakes that lack the real method). + if hasattr(bus, "distinct_subscriber_count"): + subs = bus.distinct_subscriber_count() + else: + subs = getattr(bus, "subscriber_count", lambda: 0)() + # max_subscribers comes from Settings on the real bus; fakes expose it + # directly as an attribute for hermetic unit tests. + max_subs = getattr(bus, "max_subscribers", None) or settings.sse_max_subscribers + metric: dict[str, Any] = {"subscribers": subs, "max": max_subs} + if subs >= max_subs: + return CheckStatus(status="fail", detail="subscriber pool exhausted", metric=metric) + return CheckStatus(status="ok", metric=metric) + + +def _aggregate(checks: dict[str, CheckStatus]) -> Literal["ready", "degraded", "not-ready"]: + if any(checks[name].status == "fail" for name in _CRITICAL_CHECKS if name in checks): + return "not-ready" + if any(c.status == "fail" for c in checks.values()): + return "degraded" + return "ready" + + +async def build_readiness_response( + session: AsyncSession, + app_state: Any, + settings: Settings, + *, + version: str, + revision: str, +) -> ReadinessResponse: + """Run all 8 readiness checks and aggregate the result.""" + checks: dict[str, CheckStatus] = { + "database": await _check_database(session), + "alembic": await _check_alembic(settings), + "template_seed": await _check_template_seed(session), + "printer_runtime": _check_printer_runtime(app_state), + "printer_db_sync": await _check_printer_db_sync(session, app_state), + "snmp_discovery": await _check_snmp_discovery(session, app_state), + "print_queue": _check_print_queue(app_state), + "sse_bus": _check_sse_bus(app_state, settings), + } + return ReadinessResponse( + status=_aggregate(checks), + checks=checks, + version=version, + revision=revision, + ) diff --git a/backend/tests/db/test_lifespan.py b/backend/tests/db/test_lifespan.py index 7ee3e56..78a3033 100644 --- a/backend/tests/db/test_lifespan.py +++ b/backend/tests/db/test_lifespan.py @@ -43,6 +43,9 @@ class _MockLoader: Avoids loading real YAML files (which would require IntegrationRegistry to have plugins registered) and keeps the test self-contained. + + ``_cache`` is populated on construction to satisfy the D1 defensive check + in seed_templates (which raises RuntimeError when the cache is empty). """ def __init__(self, count: int = 3) -> None: @@ -50,6 +53,9 @@ def __init__(self, count: int = 3) -> None: self._templates = { f"tpl-{i}": _schema_stub(f"tpl-{i}", f"Template {i}") for i in range(count) } + # Mirror _templates in _cache so the D1 check passes — this mock + # represents a loader that has already called load_dir(). + self._cache = dict(self._templates) def all(self) -> dict: return dict(self._templates) @@ -102,6 +108,20 @@ def _schema_stub(id_: str, name: str): # --------------------------------------------------------------------------- +@pytest.mark.asyncio +async def test_seed_templates_raises_on_empty_loader_cache(): + """Cluster 1a defensive check: empty TemplateLoader cache → RuntimeError, no silent no-op.""" + from app.services.template_loader import TemplateLoader + + original_cache = dict(TemplateLoader._cache) + TemplateLoader._cache.clear() + try: + with pytest.raises(RuntimeError, match="empty"): + await seed_templates(None, TemplateLoader) # type: ignore[arg-type] + finally: + TemplateLoader._cache = original_cache + + @pytest.mark.asyncio async def test_recover_marks_inflight_as_failed_restart(session): """recover_inflight_jobs sweeps QUEUED jobs to FAILED_RESTART.""" diff --git a/backend/tests/integration/api/__init__.py b/backend/tests/integration/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/tests/integration/api/conftest.py b/backend/tests/integration/api/conftest.py new file mode 100644 index 0000000..37a1655 --- /dev/null +++ b/backend/tests/integration/api/conftest.py @@ -0,0 +1,56 @@ +"""Shared fixtures for API integration tests.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest_asyncio +from httpx import ASGITransport, AsyncClient + +# TODO(#22): simplify this fixture once Phase 7b Task D2 lands — +# the lifespan re-order (load_dir BEFORE seed_templates) will let us drop +# the manual TemplateLoader.load_dir() + seed_templates() pre-seeding here. + +_SEED_DIR = Path(__file__).parents[3] / "app" / "seed" / "templates" + + +@pytest_asyncio.fixture +async def api_client_with_seed(): + """AsyncClient against the full app with templates seeded. + + Propagates the autouse _temp_db_engine patch from + tests/integration/conftest.py into app.db.session (which holds a + name-bound `async_session` snapshot taken at import time and is NOT + updated automatically when engine.py's namespace gets monkey-patched). + """ + import app.db.engine as _engine_module + import app.db.session as _session_module + from app.db.lifespan import seed_templates + from app.main import create_app + from app.services.template_loader import TemplateLoader + + _session_module.async_session = _engine_module.async_session + + # Re-run integration plugin discovery when the lifespan from a previous + # test has cleared IntegrationRegistry (see main.py lifespan shutdown). + # TemplateLoader.load_dir validates template.app against IntegrationRegistry, + # so we must ensure the registry is populated before calling load_dir. + from app.integrations import ( # type: ignore[attr-defined] + IntegrationRegistry, + _discover_plugins, + ) + + if not IntegrationRegistry.names(): + _discover_plugins() + + original_cache = dict(TemplateLoader._cache) + TemplateLoader.load_dir(_SEED_DIR) + try: + async with _engine_module.async_session() as s: + await seed_templates(s, TemplateLoader) + + app = create_app() + async with AsyncClient(transport=ASGITransport(app=app), base_url="http://t") as c: + yield c + finally: + TemplateLoader._cache = original_cache diff --git a/backend/tests/integration/api/test_api_datetime_format.py b/backend/tests/integration/api/test_api_datetime_format.py new file mode 100644 index 0000000..c6272c7 --- /dev/null +++ b/backend/tests/integration/api/test_api_datetime_format.py @@ -0,0 +1,76 @@ +"""Phase 7b Cluster 1c contract test — every datetime field in the API +response must include a timezone suffix (Z or +HH:MM).""" + +from __future__ import annotations + +from datetime import datetime + +import pytest + +pytestmark = pytest.mark.asyncio + + +def _has_tz_suffix(s: str) -> bool: + """True if string ends with Z or contains an explicit +/- TZ offset (skip date dashes).""" + return s.endswith("Z") or "+" in s or "-" in s[10:] + + +async def test_template_read_has_tz_suffix(api_client_with_seed): + """GET /api/templates returns datetimes with TZ info that fromisoformat can parse.""" + resp = await api_client_with_seed.get("/api/templates") + assert resp.status_code == 200 + body = resp.json() + assert body, "expected at least one seeded template" + for t in body: + for field in ("created_at", "updated_at"): + assert _has_tz_suffix(t[field]), ( + f"template {t.get('key', '?')}: {field}={t[field]!r} missing TZ suffix" + ) + datetime.fromisoformat(t[field].replace("Z", "+00:00")) + + +async def test_printer_read_has_tz_suffix(api_client_with_seed): + """GET /api/printers returns datetimes with TZ info. + + TODO: Task C2 (upsert_runtime_printer) will auto-seed a printer at startup, + making this test always exercise the assertion block. Until then, the test + skips gracefully when no printers exist in the test DB. + """ + resp = await api_client_with_seed.get("/api/printers") + assert resp.status_code == 200 + body = resp.json() + if not body: + pytest.skip("No printers seeded — will be re-enabled after Task C2 auto-seeds a printer") + for p in body: + for field in ("created_at", "updated_at"): + assert _has_tz_suffix(p[field]), ( + f"printer {p.get('id', '?')}: {field}={p[field]!r} missing TZ suffix" + ) + datetime.fromisoformat(p[field].replace("Z", "+00:00")) + + +async def test_job_read_has_tz_suffix(api_client_with_seed): + """GET /api/jobs returns datetimes with TZ info on all datetime fields. + + TODO: Task C2 (upsert_runtime_printer) will auto-seed a printer; an explicit + print invocation will create jobs. Until then, the test skips gracefully when + no jobs exist in the test DB. + """ + resp = await api_client_with_seed.get("/api/jobs") + assert resp.status_code == 200 + body = resp.json() + if not body: + pytest.skip("No jobs seeded — will be re-enabled after Task C2 auto-seeds printer+jobs") + for j in body: + for field in ("created_at", "updated_at"): + assert _has_tz_suffix(j[field]), ( + f"job {j.get('id', '?')}: {field}={j[field]!r} missing TZ suffix" + ) + datetime.fromisoformat(j[field].replace("Z", "+00:00")) + # nullable datetime fields — only assert when present + for field in ("started_at", "finished_at"): + if j[field] is not None: + assert _has_tz_suffix(j[field]), ( + f"job {j.get('id', '?')}: {field}={j[field]!r} missing TZ suffix" + ) + datetime.fromisoformat(j[field].replace("Z", "+00:00")) diff --git a/backend/tests/integration/api/test_readiness_endpoint.py b/backend/tests/integration/api/test_readiness_endpoint.py new file mode 100644 index 0000000..53f8a35 --- /dev/null +++ b/backend/tests/integration/api/test_readiness_endpoint.py @@ -0,0 +1,36 @@ +"""Phase 7b Cluster 1e — /readiness deep-check endpoint.""" + +from __future__ import annotations + +import pytest + +pytestmark = pytest.mark.asyncio + + +async def test_readiness_returns_200_when_ready(api_client_with_seed): + resp = await api_client_with_seed.get("/readiness") + body = resp.json() + # template_seed will be ok (the fixture seeds), other critical checks ok → + # printer_runtime may fail (no PT-P750W env) but that's non-critical, so degraded. + # Both ready and degraded should be 200. + assert resp.status_code == 200 + assert body["status"] in {"ready", "degraded"} + assert "checks" in body + for required in ( + "database", + "alembic", + "template_seed", + "printer_runtime", + "printer_db_sync", + "snmp_discovery", + "print_queue", + "sse_bus", + ): + assert required in body["checks"], f"missing check: {required}" + + +async def test_readiness_returns_503_when_not_ready(api_client_with_broken_db): + resp = await api_client_with_broken_db.get("/readiness") + assert resp.status_code == 503 + body = resp.json() + assert body["status"] == "not-ready" diff --git a/backend/tests/integration/conftest.py b/backend/tests/integration/conftest.py index 6ae0d3d..c83c650 100644 --- a/backend/tests/integration/conftest.py +++ b/backend/tests/integration/conftest.py @@ -62,6 +62,11 @@ async def _temp_db_engine(monkeypatch: pytest.MonkeyPatch, tmp_path) -> None: # # on _lifespan_module is patched. The lifespan() function in main.py calls # its locally-bound `run_migrations` directly, so we must patch that name too. monkeypatch.setattr(_main_module, "run_migrations", _noop_migrations) + # verify_alembic_at_head checks the alembic_version table which does not + # exist in the create_all() schema (only Alembic populates it). Patch it + # to a no-op for the same reason run_migrations is patched above. + monkeypatch.setattr(_lifespan_module, "verify_alembic_at_head", _noop_verify) + monkeypatch.setattr(_main_module, "verify_alembic_at_head", _noop_verify) yield await eng.dispose() @@ -76,6 +81,90 @@ async def _noop_migrations() -> None: """ +async def _noop_verify(*_args, **_kwargs) -> None: + """Drop-in replacement for verify_alembic_at_head() in integration fixtures. + + The _temp_db_engine fixture builds the schema via SQLModel.metadata.create_all() + which does not populate the alembic_version table. Patching out the verify + step avoids a spurious RuntimeError ("drift detected") — the same rationale + as patching run_migrations to a no-op. + """ + + +async def _noop_seed_templates(*_args, **_kwargs) -> int: # type: ignore[no-untyped-def] + """Drop-in replacement for seed_templates() in integration test fixtures. + + The D1 defensive check raises RuntimeError when TemplateLoader._cache is + empty. Integration tests exercise the lifespan for other purposes (printer + startup, SSE, healthz) and do not need templates seeded. Patching this + no-op avoids a spurious failure until D2 fixes the load_dir ordering in + main.py lifespan. + """ + return 0 + + +@pytest_asyncio.fixture +async def api_client_with_broken_db(tmp_path): + """AsyncClient whose DB has never been alembic-upgraded. + + The alembic_version table is absent, so _check_alembic() returns fail + which makes build_readiness_response() return status=not-ready. + /readiness should therefore respond 503. + + /healthz MUST still respond 200 — it never touches the DB. + """ + + import app.db.engine as _eng + import app.db.session as _sess + from app.main import create_app + from httpx import ASGITransport, AsyncClient + + # Point at an empty SQLite file — create_all() gives it the schema + # tables but NOT the alembic_version row, so verify_alembic_at_head fails. + db_path = tmp_path / "broken.db" + url = f"sqlite+aiosqlite:///{db_path}" + eng = create_async_engine(url, echo=False, connect_args={"check_same_thread": False}) + event.listen(eng.sync_engine, "connect", _apply_pragmas) + async with eng.begin() as conn: + await conn.run_sync(SQLModel.metadata.create_all) + sess = async_sessionmaker(bind=eng, expire_on_commit=False) + + # Patch the session but do NOT patch verify_alembic_at_head — we want + # that check to fail so the readiness probe returns not-ready. + _sess.async_session = sess + + # Patch engine references so create_app() finds the right session. + from unittest.mock import patch + + with ( + patch.object(_eng, "engine", eng), + patch.object(_eng, "async_session", sess), + patch.object(_main_module, "engine", eng), + patch.object(_main_module, "async_session", sess), + # run_migrations uses alembic.ini URL — patch to no-op so lifespan + # doesn't crash before the readiness endpoint is called. + patch.object(_lifespan_module, "run_migrations", _noop_migrations), + patch.object(_main_module, "run_migrations", _noop_migrations), + # seed_templates needs at least one cached template; patch to no-op + # since we only test /readiness and /healthz here. + patch.object(_lifespan_module, "seed_templates", _noop_seed_templates), + patch.object(_main_module, "seed_templates", _noop_seed_templates), + ): + from app.integrations import ( # type: ignore[attr-defined] + IntegrationRegistry, + _discover_plugins, + ) + + if not IntegrationRegistry.names(): + _discover_plugins() + + app = create_app() + async with AsyncClient(transport=ASGITransport(app=app), base_url="http://t") as c: + yield c + + await eng.dispose() + + @pytest.fixture(autouse=True) def _mock_backend_env(monkeypatch: pytest.MonkeyPatch) -> None: """Ensure integration tests use the mock backend and a known model. diff --git a/backend/tests/integration/db/__init__.py b/backend/tests/integration/db/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/tests/integration/db/conftest.py b/backend/tests/integration/db/conftest.py new file mode 100644 index 0000000..ecfd83a --- /dev/null +++ b/backend/tests/integration/db/conftest.py @@ -0,0 +1,48 @@ +"""Fixtures for DB-level integration tests (tests/integration/db/). + +Provides ``async_session_empty`` — an AsyncSession against a fresh SQLite DB +migrated to alembic head. This is intentionally independent of the +integration-level autouse fixtures (which monkeypatch the engine in main.py) +so that DB-helper tests can run in isolation. +""" + +from __future__ import annotations + +import asyncio +from pathlib import Path + +import pytest_asyncio +from alembic import command +from alembic.config import Config +from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine + +_ALEMBIC_INI = Path(__file__).parents[3] / "alembic.ini" + + +@pytest_asyncio.fixture +async def async_session_empty(tmp_path): + """AsyncSession backed by a fresh per-test SQLite DB at alembic head. + + Phase 7b B6 learning: set ``configure_logger = False`` so alembic's + fileConfig does not call ``disable_existing_loggers`` and break caplog + assertions in subsequently-run tests. + + alembic's env.py calls ``asyncio.run()`` internally; to avoid the + "cannot be called from a running event loop" error, we run the upgrade + in a thread via asyncio.to_thread (same technique as + ``app.db.lifespan.run_migrations``). + """ + db = tmp_path / "phase7b_c2.db" + + def _upgrade() -> None: + cfg = Config(str(_ALEMBIC_INI)) + cfg.set_main_option("sqlalchemy.url", f"sqlite+aiosqlite:///{db}") + cfg.attributes["configure_logger"] = False + command.upgrade(cfg, "head") + + await asyncio.to_thread(_upgrade) + + engine = create_async_engine(f"sqlite+aiosqlite:///{db}", echo=False) + async with AsyncSession(engine, expire_on_commit=False) as session: + yield session + await engine.dispose() diff --git a/backend/tests/integration/db/test_alembic_phase7b_migration.py b/backend/tests/integration/db/test_alembic_phase7b_migration.py new file mode 100644 index 0000000..fa883aa --- /dev/null +++ b/backend/tests/integration/db/test_alembic_phase7b_migration.py @@ -0,0 +1,113 @@ +"""Phase 7b — datetime data migration normalises naive rows to tz-aware UTC. + +The migration must be idempotent: running it twice on the same row must NOT +result in `2026-05-17T12:00:00+00:00+00:00`. +""" + +from __future__ import annotations + +from pathlib import Path + +from alembic import command +from alembic.config import Config +from sqlalchemy import create_engine, text + +_ALEMBIC_INI = Path(__file__).parents[3] / "alembic.ini" + + +def _alembic_config(db_path: Path) -> Config: + cfg = Config(str(_ALEMBIC_INI)) + # env.py uses async_engine_from_config, so the aiosqlite async driver is required. + cfg.set_main_option("sqlalchemy.url", f"sqlite+aiosqlite:///{db_path}") + # Prevent alembic from calling logging.config.fileConfig() which invokes + # disable_existing_loggers=True and marks loggers such as `app.integrations` + # as disabled. When those loggers are disabled their records are silently + # dropped, breaking pytest caplog assertions in tests that run AFTER these + # migration tests. The same guard is already present in app/db/lifespan.py + # for the same reason. + cfg.attributes["configure_logger"] = False + return cfg + + +def test_migration_adds_tz_to_naive_template_row(tmp_path): + db = tmp_path / "phase7b_data.db" + sync_url = f"sqlite:///{db}" + cfg = _alembic_config(db) + + # Walk schema forward to head (gives us tables with the new column types). + command.upgrade(cfg, "head") + + # Roll back to the migration BEFORE this one so we can simulate a legacy + # DB with naive datetime rows, then upgrade forward and check the result. + command.downgrade(cfg, "-1") + + sync_engine = create_engine(sync_url) + with sync_engine.begin() as conn: + conn.execute( + text( + "INSERT INTO templates (id, key, name, app, printer_model, " + "tape_width_mm, schema_version, definition, source, " + "created_at, updated_at) " + "VALUES ('11111111-1111-1111-1111-111111111111', 'k', 'n', NULL, " + "'pt-series', 12, 1, '{}', 'seed', " + "'2026-05-17T12:00:00', '2026-05-17T12:00:00')" + ) + ) + + command.upgrade(cfg, "head") + + with sync_engine.begin() as conn: + row = conn.execute( + text( + "SELECT created_at, updated_at FROM templates " + "WHERE id = '11111111-1111-1111-1111-111111111111'" + ) + ).first() + assert row is not None + for value in row: + assert value.endswith("+00:00") or value.endswith("Z"), ( + f"datetime not normalised: {value!r}" + ) + + sync_engine.dispose() + + +def test_migration_is_idempotent(tmp_path): + db = tmp_path / "phase7b_idempotent.db" + cfg = _alembic_config(db) + command.upgrade(cfg, "head") + command.upgrade(cfg, "head") # second run must be a no-op + + +def test_migration_does_not_touch_already_tz_aware_rows(tmp_path): + db = tmp_path / "phase7b_already_tz.db" + sync_url = f"sqlite:///{db}" + cfg = _alembic_config(db) + command.upgrade(cfg, "head") + command.downgrade(cfg, "-1") + + sync_engine = create_engine(sync_url) + with sync_engine.begin() as conn: + conn.execute( + text( + "INSERT INTO templates (id, key, name, app, printer_model, " + "tape_width_mm, schema_version, definition, source, " + "created_at, updated_at) " + "VALUES ('22222222-2222-2222-2222-222222222222', 'k2', 'n', NULL, " + "'pt-series', 12, 1, '{}', 'seed', " + "'2026-05-17T12:00:00+00:00', '2026-05-17T12:00:00+00:00')" + ) + ) + + command.upgrade(cfg, "head") + + with sync_engine.begin() as conn: + row = conn.execute( + text( + "SELECT created_at FROM templates WHERE id = '22222222-2222-2222-2222-222222222222'" + ) + ).first() + # Must not be '2026-05-17T12:00:00+00:00+00:00' + assert row[0].count("+00:00") == 1, f"double-suffix detected: {row[0]!r}" + + sync_engine.dispose() diff --git a/backend/tests/integration/db/test_lifespan_printer_upsert.py b/backend/tests/integration/db/test_lifespan_printer_upsert.py new file mode 100644 index 0000000..e4ad161 --- /dev/null +++ b/backend/tests/integration/db/test_lifespan_printer_upsert.py @@ -0,0 +1,93 @@ +"""Phase 7b Cluster 1b — upsert_runtime_printer materialises one Printer row +from env config, idempotent across restarts, returns None when env is silent.""" + +from __future__ import annotations + +import pytest +from app.config import Settings +from app.db.lifespan import upsert_runtime_printer +from app.models.printer import Printer +from app.services.printer_identity import derive_printer_id +from sqlmodel import select + +pytestmark = pytest.mark.asyncio + +_PT750W_HOST = "192.0.2.50" +_PT750W_PORT = 9100 +_PT750W_MODEL = "PT-P750W" + + +def _settings_with_pt750w() -> Settings: + """Settings with PT-P750W printer configured at a stable test address.""" + return Settings( + _env_file=None, + pt750w_host=_PT750W_HOST, + pt750w_port=_PT750W_PORT, + printer_model=_PT750W_MODEL, + printer_backend="ptouch", + printer_discover_via_snmp=False, + printer_snmp_community="public", + ) + + +def _settings_with_mock_backend() -> Settings: + """Settings without any printer host — mock/test backend, no row expected.""" + return Settings( + _env_file=None, + pt750w_host="", + ql820_host="", + printer_model="PT-P750W", + printer_backend="mock", + printer_discover_via_snmp=False, + ) + + +async def test_upsert_creates_row_when_db_empty(async_session_empty): + settings = _settings_with_pt750w() + expected_id = derive_printer_id(_PT750W_MODEL, _PT750W_HOST, _PT750W_PORT) + + returned_id = await upsert_runtime_printer(async_session_empty, settings) + + assert returned_id == expected_id + result = await async_session_empty.execute(select(Printer)) + rows = list(result.scalars()) + assert len(rows) == 1 + assert rows[0].id == expected_id + + +async def test_upsert_is_idempotent(async_session_empty): + settings = _settings_with_pt750w() + first = await upsert_runtime_printer(async_session_empty, settings) + second = await upsert_runtime_printer(async_session_empty, settings) + assert first == second + result = await async_session_empty.execute(select(Printer)) + assert len(list(result.scalars())) == 1 + + +async def test_upsert_refreshes_fields_when_row_exists(async_session_empty): + """Re-running upsert updates the row's name + connection + enabled fields.""" + settings = _settings_with_pt750w() + pid = await upsert_runtime_printer(async_session_empty, settings) + assert pid is not None + + # Mutate the existing row in-DB so we can verify upsert overwrites it. + row = await async_session_empty.get(Printer, pid) + assert row is not None + row.enabled = False + row.name = "stale name" + await async_session_empty.flush() + + # Second upsert with same settings must restore the fields. + await upsert_runtime_printer(async_session_empty, settings) + refreshed = await async_session_empty.get(Printer, pid) + assert refreshed is not None + assert refreshed.enabled is True + assert refreshed.name == f"{_PT750W_MODEL} ({_PT750W_HOST})" + + +async def test_upsert_returns_none_when_no_env_printer(async_session_empty): + settings = _settings_with_mock_backend() + result_id = await upsert_runtime_printer(async_session_empty, settings) + assert result_id is None + result = await async_session_empty.execute(select(Printer)) + assert len(list(result.scalars())) == 0 diff --git a/backend/tests/integration/test_healthz_minimal.py b/backend/tests/integration/test_healthz_minimal.py new file mode 100644 index 0000000..5515de4 --- /dev/null +++ b/backend/tests/integration/test_healthz_minimal.py @@ -0,0 +1,19 @@ +"""Phase 7b Cluster 1e — /healthz never queries the database. + +Locks in the liveness/readiness contract: /healthz must answer 200 +even when the DB is unreachable, otherwise Docker autoheal would +restart-loop on transient DB failures. Deep checks belong to /readiness. +""" + +from __future__ import annotations + +import pytest + +pytestmark = pytest.mark.asyncio + + +async def test_healthz_returns_200_even_with_broken_db(api_client_with_broken_db): + resp = await api_client_with_broken_db.get("/healthz") + assert resp.status_code == 200 + body = resp.json() + assert body["status"] == "ok" or body.get("ok") is True diff --git a/backend/tests/integration/test_lifespan_seeds_and_upserts.py b/backend/tests/integration/test_lifespan_seeds_and_upserts.py new file mode 100644 index 0000000..9c3a10c --- /dev/null +++ b/backend/tests/integration/test_lifespan_seeds_and_upserts.py @@ -0,0 +1,67 @@ +"""Phase 7b Cluster 1a + 1b end-to-end test: a fresh DB after lifespan +startup contains the seed templates AND one deterministic-id printer, +and app.state.printer_id matches the DB printer.id.""" + +from __future__ import annotations + +import app.db.engine as _engine_module +import pytest +from app.models.printer import Printer +from app.models.template import Template +from httpx import ASGITransport, AsyncClient +from sqlmodel import select + +pytestmark = pytest.mark.asyncio + + +async def test_fresh_lifespan_seeds_templates_and_creates_printer( + _temp_db_engine, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """After lifespan startup, templates are seeded AND printer is upserted, + and app.state.printer_id matches the one Printer row in the DB.""" + # _mock_backend_env (autouse) sets PRINTER_HUB_PRINTER_MODEL=PT-P750W and + # PRINTER_HUB_PRINTER_BACKEND=mock. We additionally set a host+port so + # upsert_runtime_printer() finds all three required fields (model, host, port). + monkeypatch.setenv("PRINTER_HUB_PT750W_HOST", "192.0.2.50") + monkeypatch.setenv("PRINTER_HUB_PT750W_PORT", "9100") + + from app.config import get_settings + from app.main import create_app + + get_settings.cache_clear() + + test_app = create_app() + + async with AsyncClient(transport=ASGITransport(app=test_app), base_url="http://test") as client: + # Trigger the lifespan by making any request; the lifespan runs at + # ASGI startup inside ASGITransport. + resp = await client.get("/healthz") + assert resp.status_code == 200, f"healthz failed: {resp.text}" + + # Inspect the DB state while the lifespan is active. + # Use the attribute on _engine_module (patched by _temp_db_engine fixture), + # not the name bound at test-module import time. + async with _engine_module.async_session() as s: + templates = list((await s.execute(select(Template))).scalars()) + printers = list((await s.execute(select(Printer))).scalars()) + + assert len(templates) >= 1, ( + f"Expected at least one seeded template, got {len(templates)}. " + "Check that TemplateLoader.load_dir() runs BEFORE seed_templates() " + "in the lifespan." + ) + assert len(printers) == 1, ( + f"Expected exactly one upserted Printer row, got {len(printers)}. " + "Check that upsert_runtime_printer() is wired in the lifespan." + ) + # The deterministic id produced by upsert_runtime_printer must be the + # same id that make_queue_printer received and exposed via app.state.printer_id. + # create_app() returns a _LifespanManager; the FastAPI state is on ._app. + inner_app_state = test_app._app.state # type: ignore[attr-defined] + assert inner_app_state.printer_id == printers[0].id, ( + f"app.state.printer_id={inner_app_state.printer_id!r} != " + f"DB Printer.id={printers[0].id!r}. " + "The DB uuid from upsert_runtime_printer must be plumbed into " + "make_queue_printer(printer_id=db_printer_id)." + ) diff --git a/backend/tests/integration/test_status_cache_writer.py b/backend/tests/integration/test_status_cache_writer.py new file mode 100644 index 0000000..0bbd087 --- /dev/null +++ b/backend/tests/integration/test_status_cache_writer.py @@ -0,0 +1,165 @@ +"""Phase 7b Cluster 1f — StatusProbeProducer writes printer_status_cache.""" + +from __future__ import annotations + +import pytest +import pytest_asyncio + +pytestmark = pytest.mark.asyncio + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest_asyncio.fixture +async def _printer_id(): + """Insert one Printer row into the autouse temp-DB and return its UUID.""" + import app.db.engine as _eng + from app.models.printer import Printer + + async with _eng.async_session() as s: + p = Printer( + name="cache-writer-test", + model="PT-P750W", + backend="ptouch", + connection={"host": "127.0.0.1", "port": 9100}, + ) + s.add(p) + await s.commit() + await s.refresh(p) + return p.id + + +def _make_producer(printer_id): + """Build a StatusProbeProducer with a stub EventBus.""" + from app.services.event_bus import EventBus + from app.services.producers.status_probe_producer import StatusProbeProducer + + return StatusProbeProducer( + bus=EventBus(), + printer_id=str(printer_id), + host="127.0.0.1", + ) + + +# --------------------------------------------------------------------------- +# Test 1 — successful probe writes online=True cache row +# --------------------------------------------------------------------------- + + +async def test_successful_probe_writes_cache(_printer_id, monkeypatch): + """_probe_once writes online=True with loaded_tape_mm when probe succeeds.""" + from app.printer_backends.snmp_helper import PreflightStatus + + ok_status = PreflightStatus( + hr_printer_status="idle", + loaded_tape_mm=12, + error_flags=[], + ) + + async def _fake_query(host, *, community="public", timeout_s=3.0): + return ok_status + + monkeypatch.setattr( + "app.services.producers.status_probe_producer.query_preflight", + _fake_query, + ) + + producer = _make_producer(_printer_id) + await producer._probe_once() + + import app.db.engine as _eng + from app.models.printer_status_cache import PrinterStatusCache + + async with _eng.async_session() as s: + row = await s.get(PrinterStatusCache, _printer_id) + + assert row is not None, "cache row must be written after successful probe" + assert row.captured_at is not None + assert row.parsed is not None + assert row.parsed["online"] is True + assert row.parsed["loaded_tape_mm"] == 12 + + +# --------------------------------------------------------------------------- +# Test 2 — failed probe writes online=False cache row +# --------------------------------------------------------------------------- + + +async def test_probe_failure_marks_offline(_printer_id, monkeypatch): + """_probe_once writes online=False + last_error when SNMP raises.""" + + async def _failing_query(host, *, community="public", timeout_s=3.0): + raise OSError("timed out") + + monkeypatch.setattr( + "app.services.producers.status_probe_producer.query_preflight", + _failing_query, + ) + + producer = _make_producer(_printer_id) + await producer._probe_once() + + import app.db.engine as _eng + from app.models.printer_status_cache import PrinterStatusCache + + async with _eng.async_session() as s: + row = await s.get(PrinterStatusCache, _printer_id) + + assert row is not None, "cache row must be written even on probe failure" + assert row.captured_at is not None + assert row.parsed is not None + assert row.parsed["online"] is False + assert "last_error" in row.parsed + + +# --------------------------------------------------------------------------- +# Test 3 — failure after success preserves prior parsed data +# --------------------------------------------------------------------------- + + +async def test_probe_failure_preserves_prior_parsed_data(_printer_id, monkeypatch): + """After a prior success, a failing probe keeps loaded_tape_mm but flips online=False.""" + from app.printer_backends.snmp_helper import PreflightStatus + + # First call: success + async def _ok_query(host, *, community="public", timeout_s=3.0): + return PreflightStatus( + hr_printer_status="idle", + loaded_tape_mm=24, + error_flags=[], + ) + + monkeypatch.setattr( + "app.services.producers.status_probe_producer.query_preflight", + _ok_query, + ) + + producer = _make_producer(_printer_id) + await producer._probe_once() # success — should write loaded_tape_mm=24 + + # Second call: failure + async def _fail_query(host, *, community="public", timeout_s=3.0): + raise OSError("gone offline") + + monkeypatch.setattr( + "app.services.producers.status_probe_producer.query_preflight", + _fail_query, + ) + + await producer._probe_once() # failure — should flip online=False, keep tape data + + import app.db.engine as _eng + from app.models.printer_status_cache import PrinterStatusCache + + async with _eng.async_session() as s: + row = await s.get(PrinterStatusCache, _printer_id) + + assert row is not None + assert row.parsed is not None + assert row.parsed["online"] is False + assert "last_error" in row.parsed + # Prior tape data must be preserved + assert row.parsed.get("loaded_tape_mm") == 24 diff --git a/backend/tests/integration/test_status_endpoint_cached.py b/backend/tests/integration/test_status_endpoint_cached.py new file mode 100644 index 0000000..9a3a9f2 --- /dev/null +++ b/backend/tests/integration/test_status_endpoint_cached.py @@ -0,0 +1,177 @@ +"""Phase 7b Cluster 1f — /api/printers/{id}/status reads cache, never blocks.""" + +from __future__ import annotations + +import time +from datetime import UTC, datetime + +import pytest +import pytest_asyncio + +pytestmark = pytest.mark.asyncio + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _insert_printer(session_factory): + """Insert one Printer row and return its UUID.""" + from app.models.printer import Printer + + async with session_factory() as s: + p = Printer( + name="cache-endpoint-test", + model="PT-P750W", + backend="ptouch", + connection={"host": "127.0.0.1", "port": 9100}, + ) + s.add(p) + await s.commit() + await s.refresh(p) + return p.id + + +async def _insert_cache(session_factory, printer_id, parsed: dict): + """Insert a PrinterStatusCache row for the given printer.""" + from app.models.printer_status_cache import PrinterStatusCache + + async with session_factory() as s: + row = PrinterStatusCache( + printer_id=printer_id, + parsed=parsed, + captured_at=datetime.now(UTC), + ) + s.add(row) + await s.commit() + + +def _build_test_app(session): + """Return a minimal FastAPI app with only the printers router.""" + from collections.abc import AsyncIterator + + from app.api.routes.printers import router + from app.db.session import get_session + from fastapi import FastAPI + from sqlalchemy.ext.asyncio import AsyncSession + + app = FastAPI() + app.include_router(router) + + async def _override() -> AsyncIterator[AsyncSession]: + yield session + + app.dependency_overrides[get_session] = _override + return app + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest_asyncio.fixture +async def api_client_with_printer_no_cache(): + """AsyncClient + printer UUID; no cache row exists.""" + import app.db.engine as _eng + from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + + factory = _eng.async_session + printer_id = await _insert_printer(factory) + + # Open a session that lives for the whole test + session_factory: async_sessionmaker[AsyncSession] = factory + async with session_factory() as s: + from httpx import ASGITransport, AsyncClient + + app = _build_test_app(s) + async with AsyncClient(transport=ASGITransport(app=app), base_url="http://t") as c: + yield c, printer_id + + +@pytest_asyncio.fixture +async def api_client_with_warm_cache(): + """AsyncClient + printer UUID; cache row with online=True exists.""" + import app.db.engine as _eng + from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + + factory = _eng.async_session + printer_id = await _insert_printer(factory) + await _insert_cache( + factory, + printer_id, + { + "online": True, + "loaded_tape_mm": 12, + "hr_printer_status": "idle", + "error_flags": [], + }, + ) + + session_factory: async_sessionmaker[AsyncSession] = factory + async with session_factory() as s: + from httpx import ASGITransport, AsyncClient + + app = _build_test_app(s) + async with AsyncClient(transport=ASGITransport(app=app), base_url="http://t") as c: + yield c, printer_id + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +async def test_status_endpoint_returns_pending_when_cache_empty( + api_client_with_printer_no_cache, +): + """When no cache row exists the endpoint returns online=None and a note.""" + client, pid = api_client_with_printer_no_cache + resp = await client.get(f"/api/printers/{pid}/status") + assert resp.status_code == 200 + body = resp.json() + assert body["online"] is None + assert body["note"] is not None + assert "no probe yet" in body["note"].lower() + + +async def test_status_endpoint_returns_under_100ms(api_client_with_warm_cache): + """Even with no live SNMP path, the endpoint answers from cache in <100ms.""" + client, pid = api_client_with_warm_cache + t0 = time.monotonic() + resp = await client.get(f"/api/printers/{pid}/status") + elapsed_ms = (time.monotonic() - t0) * 1000 + assert resp.status_code == 200 + assert elapsed_ms < 100, f"endpoint blocked {elapsed_ms:.1f}ms" + body = resp.json() + assert body["online"] is True + + +async def test_status_endpoint_returns_404_for_unknown_printer( + api_client_with_printer_no_cache, +): + """Unknown printer UUID returns 404.""" + from uuid import uuid4 + + client, _ = api_client_with_printer_no_cache + resp = await client.get(f"/api/printers/{uuid4()}/status") + assert resp.status_code == 404 + + +async def test_status_endpoint_returns_cached_tape_data(api_client_with_warm_cache): + """Cached loaded_tape_mm + error_flags surface as PrinterStatus.tape_loaded + and PrinterStatus.error_state respectively (bot-review finding on PR #75).""" + client, pid = api_client_with_warm_cache + resp = await client.get(f"/api/printers/{pid}/status") + assert resp.status_code == 200 + body = resp.json() + assert body["online"] is True + # last_probe_age_s should be present and non-negative + assert body.get("last_probe_age_s") is not None + assert body["last_probe_age_s"] >= 0 + # Cached parsed JSON is rendered into the documented schema fields, + # not silently dropped: loaded_tape_mm=12 → tape_loaded="12mm", + # error_flags=[] → error_state=None. + assert body["tape_loaded"] == "12mm" + assert body["error_state"] is None diff --git a/backend/tests/unit/api/test_events_route.py b/backend/tests/unit/api/test_events_route.py index 8d0df89..9876dc4 100644 --- a/backend/tests/unit/api/test_events_route.py +++ b/backend/tests/unit/api/test_events_route.py @@ -67,7 +67,7 @@ def test_settings_sse_max_subscribers_honoured() -> None: printer_id = uuid.uuid4() fake_printer = MagicMock() - fake_printer.id = str(printer_id) + fake_printer.id = printer_id # Build a dedicated test app with a cap of 2 test_app = FastAPI() @@ -127,7 +127,7 @@ def test_429_when_subscriber_limit_exceeded(client_with_bus: TestClient) -> None """ printer_id = uuid.uuid4() fake_printer = MagicMock() - fake_printer.id = str(printer_id) + fake_printer.id = printer_id bus: EventBus = _inner.state.event_bus # Register 100 distinct subscriber IDs, each on all 3 channels diff --git a/backend/tests/unit/api/test_print_routes.py b/backend/tests/unit/api/test_print_routes.py index 768a9ed..f9870d5 100644 --- a/backend/tests/unit/api/test_print_routes.py +++ b/backend/tests/unit/api/test_print_routes.py @@ -2,6 +2,7 @@ from datetime import UTC, datetime from unittest.mock import AsyncMock, MagicMock +from uuid import UUID import pytest from app.api.routes.print import router @@ -13,6 +14,9 @@ from fastapi import FastAPI from httpx import ASGITransport, AsyncClient +_PRINTER_ID = UUID("dddddddd-0000-0000-0000-000000000001") +_PRINTER_ID_STR = str(_PRINTER_ID) + @pytest.fixture def fake_service(): @@ -101,7 +105,7 @@ async def test_post_print_lookup_failed_is_502(fake_service, fake_queue) -> None async def test_get_jobs_returns_status_with_live_block( fake_service, fake_queue, monkeypatch ) -> None: - job = Job(id="job-1", printer_id="p", image_payload=b"", tape_mm=24, options={}) + job = Job(id="job-1", printer_id=_PRINTER_ID, image_payload=b"", tape_mm=24, options={}) job.state = JobState.PRINTING job.submitted_at = datetime.now(UTC) fake_queue.get = AsyncMock(return_value=job) @@ -124,7 +128,7 @@ async def fake_live(host: str, *, community: str = "public", timeout_s: float = async def test_get_jobs_no_live_block_when_not_printing(fake_service, fake_queue) -> None: - job = Job(id="job-1", printer_id="p", image_payload=b"", tape_mm=24, options={}) + job = Job(id="job-1", printer_id=_PRINTER_ID, image_payload=b"", tape_mm=24, options={}) job.state = JobState.COMPLETED job.submitted_at = datetime.now(UTC) fake_queue.get = AsyncMock(return_value=job) @@ -137,7 +141,7 @@ async def test_get_jobs_no_live_block_when_not_printing(fake_service, fake_queue async def test_get_jobs_live_snmp_failure_is_non_fatal( fake_service, fake_queue, monkeypatch ) -> None: - job = Job(id="job-1", printer_id="p", image_payload=b"", tape_mm=24, options={}) + job = Job(id="job-1", printer_id=_PRINTER_ID, image_payload=b"", tape_mm=24, options={}) job.state = JobState.PRINTING job.submitted_at = datetime.now(UTC) fake_queue.get = AsyncMock(return_value=job) @@ -260,13 +264,13 @@ async def test_post_print_cover_open_is_409(fake_service, fake_queue) -> None: async def test_resume_printer_endpoint(fake_service, fake_queue) -> None: fake_queue.resume_printer = AsyncMock(return_value=None) app = _app(fake_service, fake_queue) - app.state.printer_id = "pt@x" + app.state.printer_id = _PRINTER_ID async with _client(app) as c: r = await c.post("/printer/resume") assert r.status_code == 200 body = r.json() - assert body == {"printer_id": "pt@x", "state": "active"} - fake_queue.resume_printer.assert_awaited_once_with("pt@x") + assert body == {"printer_id": _PRINTER_ID_STR, "state": "active"} + fake_queue.resume_printer.assert_awaited_once_with(_PRINTER_ID) async def test_resume_printer_404_when_no_printer_configured(fake_service, fake_queue) -> None: @@ -284,7 +288,7 @@ async def test_resume_printer_404_when_no_printer_configured(fake_service, fake_ async def test_resume_job_transitions_paused_to_queued(fake_service, fake_queue) -> None: """Resume a PAUSED job → 200 with state=queued and cleared error metadata.""" - job = Job(id="job-1", printer_id="p", image_payload=b"", tape_mm=24, options={}) + job = Job(id="job-1", printer_id=_PRINTER_ID, image_payload=b"", tape_mm=24, options={}) # Manually set PAUSED state (as PrintService would after tape mismatch+queue) from app.services.job_lifecycle import JobStateMachine @@ -330,7 +334,7 @@ async def test_resume_job_completed_is_409(fake_service, fake_queue) -> None: """Resuming a COMPLETED job returns 409.""" from app.services.job_lifecycle import JobStateMachine - job = Job(id="job-1", printer_id="p", image_payload=b"", tape_mm=24, options={}) + job = Job(id="job-1", printer_id=_PRINTER_ID, image_payload=b"", tape_mm=24, options={}) # Transition to COMPLETED via PRINTING JobStateMachine.transition(job, JobState.PRINTING) JobStateMachine.transition(job, JobState.COMPLETED) @@ -354,9 +358,9 @@ async def test_resume_printer_already_active_returns_409(fake_service, fake_queu """ from app.services.print_queue import PrinterAlreadyActiveError - fake_queue.resume_printer = AsyncMock(side_effect=PrinterAlreadyActiveError("pt@x")) + fake_queue.resume_printer = AsyncMock(side_effect=PrinterAlreadyActiveError(_PRINTER_ID)) app = _app(fake_service, fake_queue) - app.state.printer_id = "pt@x" + app.state.printer_id = _PRINTER_ID async with _client(app) as c: r = await c.post("/printer/resume") assert r.status_code == 409 @@ -368,11 +372,11 @@ async def test_resume_printer_paused_returns_200(fake_service, fake_queue) -> No """Calling resume_printer on a PAUSED printer must still return 200 (control).""" fake_queue.resume_printer = AsyncMock(return_value=None) app = _app(fake_service, fake_queue) - app.state.printer_id = "pt@x" + app.state.printer_id = _PRINTER_ID async with _client(app) as c: r = await c.post("/printer/resume") assert r.status_code == 200 - assert r.json() == {"printer_id": "pt@x", "state": "active"} + assert r.json() == {"printer_id": _PRINTER_ID_STR, "state": "active"} # --------------------------------------------------------------------------- @@ -384,7 +388,7 @@ async def test_resume_job_not_paused_returns_409_with_error_code(fake_service, f """When resuming a non-PAUSED job the response must include error_code='invalid_state' (structured ProblemDetail, not a plain detail string). """ - job = Job(id="job-1", printer_id="p", image_payload=b"", tape_mm=24, options={}) + job = Job(id="job-1", printer_id=_PRINTER_ID, image_payload=b"", tape_mm=24, options={}) # job.state is QUEUED by default — not PAUSED job.submitted_at = datetime.now(UTC) fake_queue.get = AsyncMock(return_value=job) diff --git a/backend/tests/unit/api/test_printers_routes.py b/backend/tests/unit/api/test_printers_routes.py index a06e0ef..3ff8dc1 100644 --- a/backend/tests/unit/api/test_printers_routes.py +++ b/backend/tests/unit/api/test_printers_routes.py @@ -163,47 +163,26 @@ async def test_list_printers_returns_printers_with_paused_flag(session) -> None: @pytest.mark.asyncio -async def test_get_printer_status_calls_probe_and_upserts_cache(session, monkeypatch) -> None: - """get_printer_status wraps the probe in asyncio.to_thread and upserts cache.""" - from app.services.status_block import ( - MediaType, - NotificationCode, - PhaseType, - PrinterError, - StatusBlock, - StatusType, - TapeColor, - TextColor, - ) +async def test_get_printer_status_reads_cache_and_returns_online(session) -> None: + """get_printer_status reads from printer_status_cache; returns online=True.""" + # Phase 7b: the endpoint no longer probes inline — it reads the cache. + from datetime import UTC, datetime printer = await _make_printer(session) - # Build a fake parsed StatusBlock (12mm laminated tape, no errors) - fake_block = StatusBlock( - raw=b"\x80" + b"\x00" * 31, - print_head_mark=0x80, - size=32, - brother_code=ord("B"), - series_code=0x30, - model_code=0x00, - country_code=0xFF, - media_width_mm=12, - media_type=MediaType.LAMINATED, - media_length_mm=0, - mode=0, - status_type=StatusType.REPLY, - phase_type=PhaseType.EDITING, - phase_number=0, - notification=NotificationCode.NOT_AVAILABLE, - tape_color=TapeColor.BLACK, - text_color=TextColor.WHITE, - errors=PrinterError.NONE, + # Pre-populate the cache as the background probe worker would. + cache = PrinterStatusCache( + printer_id=printer.id, + parsed={ + "online": True, + "loaded_tape_mm": 12, + "hr_printer_status": "idle", + "error_flags": [], + }, + captured_at=datetime.now(UTC), ) - - def fake_probe(host: str, port: int = 9100) -> dict[str, Any]: - return {"raw": b"\x80" + b"\x00" * 31, "block": fake_block} - - monkeypatch.setattr("app.api.routes.printers._probe_status_sync", fake_probe) + session.add(cache) + await session.commit() app = _build_app(session) client = TestClient(app, raise_server_exceptions=True) @@ -213,9 +192,8 @@ def fake_probe(host: str, port: int = 9100) -> dict[str, Any]: body = r.json() assert body["printer_id"] == str(printer.id) assert body["online"] is True - assert "12mm" in (body["tape_loaded"] or "") - assert body["error_state"] is None assert "captured_at" in body + assert body["last_probe_age_s"] is not None # --------------------------------------------------------------------------- @@ -382,30 +360,30 @@ async def test_get_printer_status_unknown_id_returns_404(session) -> None: # --------------------------------------------------------------------------- -# Test 9: GET /api/printers/{id}/status — probe raises OSError → 503 +# Test 9: GET /api/printers/{id}/status — no cache row → 200 + pending # --------------------------------------------------------------------------- @pytest.mark.asyncio -async def test_get_printer_status_probe_oserror_returns_503(session, monkeypatch) -> None: - """GET /api/printers/{id}/status returns 503 when TCP probe raises OSError. +async def test_get_printer_status_no_cache_returns_pending(session) -> None: + """GET /api/printers/{id}/status returns online=None when no cache row exists. - Exercises the ``except OSError`` branch in get_printer_status - (lines 194-198 of printers.py). + Phase 7b: the endpoint reads printer_status_cache instead of probing + inline. A missing cache row means the probe worker has not run yet; + the endpoint returns HTTP 200 with online=null and a descriptive note. """ printer = await _make_printer(session) - - def _failing_probe(host: str, port: int = 9100) -> dict[str, Any]: - raise OSError("Connection refused") - - monkeypatch.setattr("app.api.routes.printers._probe_status_sync", _failing_probe) + # Deliberately omit any PrinterStatusCache row. app = _build_app(session) client = TestClient(app, raise_server_exceptions=True) r = client.get(f"/api/printers/{printer.id}/status") - assert r.status_code == 503 - assert "unreachable" in r.json()["detail"] + assert r.status_code == 200 + body = r.json() + assert body["online"] is None + assert body["note"] is not None + assert "no probe yet" in body["note"].lower() # --------------------------------------------------------------------------- @@ -492,15 +470,17 @@ async def test_get_printer_tape_zero_width_returns_404(session) -> None: # --------------------------------------------------------------------------- -# Test 13: GET /api/printers/{id}/status — no host in connection → 422 +# Test 13: GET /api/printers/{id}/status — no connection config → still works # --------------------------------------------------------------------------- @pytest.mark.asyncio -async def test_get_printer_status_no_host_returns_422(session) -> None: - """GET /api/printers/{id}/status returns 422 when printer has no host. +async def test_get_printer_status_no_host_returns_pending(session) -> None: + """GET /api/printers/{id}/status returns 200+pending for printer without host. - Exercises lines 184-188 of printers.py (missing host guard). + Phase 7b: the endpoint reads the cache; it no longer inspects + ``connection.host``. A printer with no connection config and no cache + row still yields HTTP 200 with online=null. """ printer = await _make_printer(session, connection={}) # no 'host' key @@ -508,8 +488,9 @@ async def test_get_printer_status_no_host_returns_422(session) -> None: client = TestClient(app, raise_server_exceptions=True) r = client.get(f"/api/printers/{printer.id}/status") - assert r.status_code == 422 - assert "no 'host'" in r.json()["detail"] + assert r.status_code == 200 + body = r.json() + assert body["online"] is None # --------------------------------------------------------------------------- @@ -724,57 +705,38 @@ async def test_list_printers_returns_printer_with_state(session) -> None: @pytest.mark.asyncio -async def test_get_printer_status_direct_probe_success(session, monkeypatch) -> None: - """get_printer_status called directly returns PrinterStatus on probe success. +async def test_get_printer_status_direct_reads_cache(session) -> None: + """get_printer_status called directly reads from printer_status_cache. - Exercises lines 177-228 (get_printer_status body) in the pytest loop. + Phase 7b: the endpoint reads the cache written by StatusProbeProducer + instead of probing inline. Pre-populate the cache and verify the result. """ + from datetime import UTC, datetime + from app.api.routes.printers import get_printer_status - from app.services.status_block import ( - MediaType, - NotificationCode, - PhaseType, - PrinterError, - StatusBlock, - StatusType, - TapeColor, - TextColor, - ) printer = await _make_printer(session) - fake_block = StatusBlock( - raw=b"\x80" + b"\x00" * 31, - print_head_mark=0x80, - size=32, - brother_code=ord("B"), - series_code=0x30, - model_code=0x00, - country_code=0xFF, - media_width_mm=12, - media_type=MediaType.LAMINATED, - media_length_mm=0, - mode=0, - status_type=StatusType.REPLY, - phase_type=PhaseType.EDITING, - phase_number=0, - notification=NotificationCode.NOT_AVAILABLE, - tape_color=TapeColor.BLACK, - text_color=TextColor.WHITE, - errors=PrinterError.NONE, + # Pre-populate cache as the probe worker would. + cache = PrinterStatusCache( + printer_id=printer.id, + parsed={ + "online": True, + "loaded_tape_mm": 12, + "hr_printer_status": "idle", + "error_flags": [], + }, + captured_at=datetime.now(UTC), ) - - def _fake_probe(host: str, port: int = 9100) -> dict[str, Any]: - return {"raw": b"\x80" + b"\x00" * 31, "block": fake_block} - - monkeypatch.setattr("app.api.routes.printers._probe_status_sync", _fake_probe) + session.add(cache) + await session.commit() result = await get_printer_status(printer_id=printer.id, session=session) assert result.printer_id == printer.id assert result.online is True - assert result.tape_loaded is not None - assert "12mm" in result.tape_loaded + assert result.captured_at is not None + assert result.last_probe_age_s is not None @pytest.mark.asyncio diff --git a/backend/tests/unit/models/__init__.py b/backend/tests/unit/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/tests/unit/models/test_datetime_columns.py b/backend/tests/unit/models/test_datetime_columns.py new file mode 100644 index 0000000..a088dd3 --- /dev/null +++ b/backend/tests/unit/models/test_datetime_columns.py @@ -0,0 +1,30 @@ +"""Phase 7b Cluster 1c — every datetime column must be timezone-aware.""" + +import pytest +from app.models.job import Job +from app.models.preset import Preset +from app.models.printer import Printer +from app.models.printer_state import PrinterState +from app.models.printer_status_cache import PrinterStatusCache +from app.models.template import Template +from sqlalchemy import DateTime + + +@pytest.mark.parametrize( + "model,columns", + [ + (Template, ["created_at", "updated_at"]), + (Printer, ["created_at", "updated_at"]), + (Job, ["created_at", "updated_at", "started_at", "finished_at"]), + (Preset, ["created_at", "updated_at"]), + (PrinterState, ["updated_at"]), + (PrinterStatusCache, ["captured_at", "updated_at"]), + ], +) +def test_datetime_columns_are_timezone_aware(model, columns): + for col_name in columns: + col = model.__table__.columns[col_name] + assert isinstance(col.type, DateTime), f"{model.__name__}.{col_name} is not DateTime" + assert col.type.timezone is True, ( + f"{model.__name__}.{col_name} must be DateTime(timezone=True)" + ) diff --git a/backend/tests/unit/printer_models/test_make_queue_printer_id_param.py b/backend/tests/unit/printer_models/test_make_queue_printer_id_param.py new file mode 100644 index 0000000..2fc212f --- /dev/null +++ b/backend/tests/unit/printer_models/test_make_queue_printer_id_param.py @@ -0,0 +1,52 @@ +"""Phase 7b Cluster 1b — driver.make_queue_printer(...) accepts an optional +printer_id so lifespan can plumb the deterministic UUIDv5 from +upsert_runtime_printer() into the runtime printer.""" + +from __future__ import annotations + +from uuid import UUID, uuid4 + +import pytest +from app.printer_backends.mock_backend import MockPrinterBackend +from app.printer_models.pt import PTP750WDriver +from app.services.tape_registry import TapeRegistry + + +@pytest.fixture +def backend() -> MockPrinterBackend: + return MockPrinterBackend(host="192.0.2.99") + + +@pytest.fixture +def tape_registry() -> TapeRegistry: + return TapeRegistry() + + +def test_make_queue_printer_accepts_explicit_printer_id( + backend: MockPrinterBackend, + tape_registry: TapeRegistry, +) -> None: + driver = PTP750WDriver(backend=backend) + custom = uuid4() + queue_printer = driver.make_queue_printer(tape_registry, printer_id=custom) + assert queue_printer.id == custom + + +def test_make_queue_printer_generates_uuid_when_omitted( + backend: MockPrinterBackend, + tape_registry: TapeRegistry, +) -> None: + driver = PTP750WDriver(backend=backend) + queue_printer = driver.make_queue_printer(tape_registry) + assert isinstance(queue_printer.id, UUID) + + +def test_make_queue_printer_two_omitted_calls_get_different_ids( + backend: MockPrinterBackend, + tape_registry: TapeRegistry, +) -> None: + """Sanity: omitting the param defaults to a fresh uuid4 each time, not a shared sentinel.""" + driver = PTP750WDriver(backend=backend) + a = driver.make_queue_printer(tape_registry) + b = driver.make_queue_printer(tape_registry) + assert a.id != b.id diff --git a/backend/tests/unit/printer_models/test_pt_driver.py b/backend/tests/unit/printer_models/test_pt_driver.py index 66c45cc..251f418 100644 --- a/backend/tests/unit/printer_models/test_pt_driver.py +++ b/backend/tests/unit/printer_models/test_pt_driver.py @@ -1,5 +1,7 @@ from __future__ import annotations +from uuid import UUID + import pytest from app.models.tape import TapeSpec from app.printer_backends.mock_backend import MockPrinterBackend @@ -85,7 +87,7 @@ def test_make_queue_printer_returns_printer_like( driver = PTP750WDriver(backend=backend) qp = driver.make_queue_printer(tape_registry) assert isinstance(qp, _PrinterLike) - assert qp.id == "PT-P750W@192.0.2.10" + assert isinstance(qp.id, UUID) async def test_queue_printer_print_calls_backend( diff --git a/backend/tests/unit/schemas/test_datetime_serializer.py b/backend/tests/unit/schemas/test_datetime_serializer.py new file mode 100644 index 0000000..ff21ba8 --- /dev/null +++ b/backend/tests/unit/schemas/test_datetime_serializer.py @@ -0,0 +1,20 @@ +import datetime + +from app.schemas._datetime import serialize_datetime_utc + + +def test_naive_datetime_gets_utc_tz_and_z_suffix(): + naive = datetime.datetime(2026, 5, 17, 12, 0, 0) + assert serialize_datetime_utc(naive, None) == "2026-05-17T12:00:00Z" + + +def test_utc_aware_datetime_serialised_with_z_suffix(): + aware = datetime.datetime(2026, 5, 17, 12, 0, 0, tzinfo=datetime.UTC) + assert serialize_datetime_utc(aware, None) == "2026-05-17T12:00:00Z" + + +def test_non_utc_aware_datetime_kept_with_offset(): + plus_two = datetime.datetime( + 2026, 5, 17, 14, 0, 0, tzinfo=datetime.timezone(datetime.timedelta(hours=2)) + ) + assert serialize_datetime_utc(plus_two, None) == "2026-05-17T14:00:00+02:00" diff --git a/backend/tests/unit/schemas/test_printer_status_fields.py b/backend/tests/unit/schemas/test_printer_status_fields.py new file mode 100644 index 0000000..cb67b92 --- /dev/null +++ b/backend/tests/unit/schemas/test_printer_status_fields.py @@ -0,0 +1,71 @@ +"""Phase 7b Cluster 1f G2 — PrinterStatus carries cache freshness fields.""" + +from __future__ import annotations + +from datetime import UTC, datetime +from uuid import uuid4 + +from app.schemas.printer import PrinterStatus + + +def test_printer_status_pending_when_no_probe(): + """online=None and captured_at=None signals no probe has run yet.""" + s = PrinterStatus( + printer_id=uuid4(), + online=None, + captured_at=None, + note="No probe yet — wait up to 30s for first probe cycle", + ) + assert s.online is None + assert s.note is not None + assert s.note.startswith("No probe yet") + + +def test_printer_status_full_fields(): + """All four new fields round-trip through the model.""" + pid = uuid4() + now = datetime.now(UTC) + s = PrinterStatus( + printer_id=pid, + online=True, + tape_loaded="12mm laminated black/white", + captured_at=now, + last_probe_age_s=15, + last_error=None, + note=None, + ) + assert s.online is True + assert s.last_probe_age_s == 15 + assert s.last_error is None + assert s.note is None + + +def test_printer_status_serialises_captured_at_with_z_suffix(): + """captured_at is emitted as RFC3339 with Z suffix (not +00:00).""" + s = PrinterStatus( + printer_id=uuid4(), + online=True, + captured_at=datetime(2026, 5, 17, 12, 0, 0, tzinfo=UTC), + ) + dumped = s.model_dump_json() + assert '"captured_at":"2026-05-17T12:00:00Z"' in dumped + + +def test_printer_status_serialises_none_captured_at_as_null(): + """When captured_at is None the field serialises to JSON null.""" + s = PrinterStatus(printer_id=uuid4(), online=None, captured_at=None) + dumped = s.model_dump_json() + assert '"captured_at":null' in dumped + + +def test_printer_status_last_error_round_trips(): + """last_error string is preserved in model_dump.""" + s = PrinterStatus( + printer_id=uuid4(), + online=False, + captured_at=datetime.now(UTC), + last_error="timed out after 5s", + ) + data = s.model_dump() + assert data["last_error"] == "timed out after 5s" + assert data["online"] is False diff --git a/backend/tests/unit/schemas/test_readiness_schema.py b/backend/tests/unit/schemas/test_readiness_schema.py new file mode 100644 index 0000000..2f744ab --- /dev/null +++ b/backend/tests/unit/schemas/test_readiness_schema.py @@ -0,0 +1,31 @@ +"""Phase 7b Cluster 1e — ReadinessResponse + CheckStatus schema tests.""" + +from app.schemas.readiness import CheckStatus, ReadinessResponse + + +def test_check_status_minimum_fields(): + c = CheckStatus(status="ok") + assert c.status == "ok" + assert c.detail is None + assert c.metric is None + + +def test_check_status_accepts_all_statuses(): + for s in ("ok", "fail", "skipped", "stale"): + assert CheckStatus(status=s).status == s + + +def test_readiness_response_aggregate(): + body = ReadinessResponse( + status="ready", + checks={"database": CheckStatus(status="ok", metric={"latency_ms": 0.8})}, + version="dev", + revision="abc", + ) + assert body.status == "ready" + assert body.checks["database"].metric == {"latency_ms": 0.8} + + +def test_readiness_response_status_values(): + for s in ("ready", "degraded", "not-ready"): + assert ReadinessResponse(status=s, checks={}, version="v", revision="r").status == s diff --git a/backend/tests/unit/services/conftest.py b/backend/tests/unit/services/conftest.py new file mode 100644 index 0000000..10a5958 --- /dev/null +++ b/backend/tests/unit/services/conftest.py @@ -0,0 +1,90 @@ +"""Phase 7b Cluster 1e — fixtures for readiness builder unit tests. + +Provides: + async_session_empty — fresh migrated SQLite DB (no rows) + async_session_with_one_template — same but with one seed Template row + settings_at_head — Settings pointing at the migrated DB + runtime_printer_id — stable UUID literal for printer_runtime check +""" + +from __future__ import annotations + +import asyncio +from pathlib import Path +from uuid import UUID + +import pytest_asyncio +from alembic import command +from alembic.config import Config +from app.config import Settings +from app.models.template import Template +from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine + +_ALEMBIC_INI = Path(__file__).parents[3] / "alembic.ini" + +# Stable test UUID — any UUID is fine for printer_runtime (no DB validation). +_TEST_PRINTER_ID = UUID("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee") + + +def _make_db_url(tmp_path, name: str) -> str: + db = tmp_path / name + return f"sqlite+aiosqlite:///{db}" + + +def _run_migrations(db_url: str) -> None: + """Run alembic upgrade head in a thread (avoids event-loop nesting).""" + cfg = Config(str(_ALEMBIC_INI)) + cfg.set_main_option("sqlalchemy.url", db_url) + cfg.attributes["configure_logger"] = False + command.upgrade(cfg, "head") + + +@pytest_asyncio.fixture +async def async_session_empty(tmp_path): + """AsyncSession backed by a fresh per-test SQLite DB at alembic head (no rows).""" + url = _make_db_url(tmp_path, "readiness_empty.db") + await asyncio.to_thread(_run_migrations, url) + engine = create_async_engine(url, echo=False) + async with AsyncSession(engine, expire_on_commit=False) as session: + yield session + await engine.dispose() + + +@pytest_asyncio.fixture +async def async_session_with_one_template(tmp_path): + """AsyncSession backed by a fresh DB with one seed Template row.""" + url = _make_db_url(tmp_path, "readiness_one_tpl.db") + await asyncio.to_thread(_run_migrations, url) + engine = create_async_engine(url, echo=False) + async with AsyncSession(engine, expire_on_commit=False) as session: + tpl = Template( + key="test-label", + name="Test Label", + printer_model="pt-series", + tape_width_mm=12, + definition={}, + source="seed", + ) + session.add(tpl) + await session.commit() + yield session + await engine.dispose() + + +@pytest_asyncio.fixture +async def settings_at_head(tmp_path): + """Settings whose database_url points at a migrated SQLite DB. + + verify_alembic_at_head() uses Settings.database_url to open the DB via a + sync engine. We need the DB to actually be at head so the check passes. + """ + db_path = tmp_path / "readiness_settings.db" + url = f"sqlite+aiosqlite:///{db_path}" + await asyncio.to_thread(_run_migrations, url) + return Settings(_env_file=None, database_url=url) + + +@pytest_asyncio.fixture +def runtime_printer_id() -> UUID: + """Stable UUID used as app.state.printer_id in printer_runtime check tests.""" + return _TEST_PRINTER_ID diff --git a/backend/tests/unit/services/test_job_lifecycle.py b/backend/tests/unit/services/test_job_lifecycle.py index 3d55e95..7936a7f 100644 --- a/backend/tests/unit/services/test_job_lifecycle.py +++ b/backend/tests/unit/services/test_job_lifecycle.py @@ -1,4 +1,5 @@ from datetime import UTC, datetime +from uuid import UUID import pytest from app.services.job_lifecycle import ( @@ -8,9 +9,11 @@ JobStateMachine, ) +_P = UUID("cccccccc-0000-0000-0000-000000000001") + def test_job_queued_to_printing() -> None: - job = Job(id="abc", printer_id="pt750w", state=JobState.QUEUED) + job = Job(id="abc", printer_id=_P, state=JobState.QUEUED) JobStateMachine.transition(job, JobState.PRINTING) assert job.state == JobState.PRINTING assert job.started_at is not None @@ -19,7 +22,7 @@ def test_job_queued_to_printing() -> None: def test_job_printing_to_completed() -> None: job = Job( id="abc", - printer_id="pt750w", + printer_id=_P, state=JobState.PRINTING, started_at=datetime.now(), ) @@ -29,46 +32,46 @@ def test_job_printing_to_completed() -> None: def test_invalid_transition_completed_to_printing() -> None: - job = Job(id="abc", printer_id="pt750w", state=JobState.COMPLETED) + job = Job(id="abc", printer_id=_P, state=JobState.COMPLETED) with pytest.raises(InvalidStateTransitionError, match="completed"): JobStateMachine.transition(job, JobState.PRINTING) def test_cancel_only_from_queued_or_paused() -> None: """Brother Raster Spec: no mid-print cancel.""" - job = Job(id="abc", printer_id="pt750w", state=JobState.PRINTING) + job = Job(id="abc", printer_id=_P, state=JobState.PRINTING) with pytest.raises(InvalidStateTransitionError, match="printing"): JobStateMachine.transition(job, JobState.CANCELLED) def test_pause_from_queued_to_paused() -> None: - job = Job(id="abc", printer_id="pt750w", state=JobState.QUEUED) + job = Job(id="abc", printer_id=_P, state=JobState.QUEUED) JobStateMachine.transition(job, JobState.PAUSED) assert job.state == JobState.PAUSED def test_resume_from_paused_to_queued() -> None: - job = Job(id="abc", printer_id="pt750w", state=JobState.PAUSED) + job = Job(id="abc", printer_id=_P, state=JobState.PAUSED) JobStateMachine.transition(job, JobState.QUEUED) assert job.state == JobState.QUEUED def test_cancel_from_paused() -> None: - job = Job(id="abc", printer_id="pt750w", state=JobState.PAUSED) + job = Job(id="abc", printer_id=_P, state=JobState.PAUSED) JobStateMachine.transition(job, JobState.CANCELLED) assert job.state == JobState.CANCELLED def test_pause_printing_not_allowed() -> None: """Brother Raster Spec: no mid-print pause.""" - job = Job(id="abc", printer_id="pt750w", state=JobState.PRINTING) + job = Job(id="abc", printer_id=_P, state=JobState.PRINTING) with pytest.raises(InvalidStateTransitionError, match="printing"): JobStateMachine.transition(job, JobState.PAUSED) def test_done_event_set_on_terminal_state() -> None: """Terminal transitions must signal the _done_event for wait_for_job().""" - job = Job(id="abc", printer_id="pt750w", state=JobState.PRINTING) + job = Job(id="abc", printer_id=_P, state=JobState.PRINTING) assert not job._done_event.is_set() JobStateMachine.transition(job, JobState.COMPLETED) assert job._done_event.is_set() @@ -76,14 +79,14 @@ def test_done_event_set_on_terminal_state() -> None: def test_done_event_not_set_on_pause() -> None: """Non-terminal transitions must NOT signal the _done_event.""" - job = Job(id="abc", printer_id="pt750w", state=JobState.QUEUED) + job = Job(id="abc", printer_id=_P, state=JobState.QUEUED) JobStateMachine.transition(job, JobState.PAUSED) assert not job._done_event.is_set() def test_done_event_set_on_failed() -> None: """Transition to FAILED must also set _done_event (parity with COMPLETED).""" - job = Job(id="abc", printer_id="pt750w", state=JobState.PRINTING) + job = Job(id="abc", printer_id=_P, state=JobState.PRINTING) JobStateMachine.transition(job, JobState.FAILED) assert job._done_event.is_set() assert job.finished_at is not None @@ -91,7 +94,7 @@ def test_done_event_set_on_failed() -> None: def test_done_event_set_on_cancelled() -> None: """Transition to CANCELLED must also set _done_event (parity with COMPLETED).""" - job = Job(id="abc", printer_id="pt750w", state=JobState.QUEUED) + job = Job(id="abc", printer_id=_P, state=JobState.QUEUED) JobStateMachine.transition(job, JobState.CANCELLED) assert job._done_event.is_set() assert job.finished_at is not None @@ -99,7 +102,7 @@ def test_done_event_set_on_cancelled() -> None: def test_timestamps_are_utc_aware() -> None: """submitted_at, started_at and finished_at must carry UTC tzinfo.""" - job = Job(id="abc", printer_id="pt750w", state=JobState.QUEUED) + job = Job(id="abc", printer_id=_P, state=JobState.QUEUED) JobStateMachine.transition(job, JobState.PRINTING) JobStateMachine.transition(job, JobState.COMPLETED) assert job.submitted_at.tzinfo is UTC @@ -110,7 +113,7 @@ def test_timestamps_are_utc_aware() -> None: def test_terminal_states_absorb_no_outgoing_transitions() -> None: """FAILED and CANCELLED behave like COMPLETED — no further transitions allowed.""" for terminal in (JobState.FAILED, JobState.CANCELLED): - job = Job(id="abc", printer_id="pt750w", state=terminal) + job = Job(id="abc", printer_id=_P, state=terminal) for target in JobState: if target == terminal: continue @@ -119,14 +122,14 @@ def test_terminal_states_absorb_no_outgoing_transitions() -> None: def test_job_has_error_code_default_none() -> None: - job = Job(id="j", printer_id="p") + job = Job(id="j", printer_id=_P) assert job.error_code is None assert job.error_message is None assert job.error_detail is None def test_job_error_fields_writable() -> None: - job = Job(id="j", printer_id="p") + job = Job(id="j", printer_id=_P) job.error_code = "tape_mismatch" job.error_message = "expected 24mm, loaded 12mm" job.error_detail = {"expected_mm": 24, "loaded_mm": 12} diff --git a/backend/tests/unit/services/test_print_queue.py b/backend/tests/unit/services/test_print_queue.py index 875a8c8..e63605c 100644 --- a/backend/tests/unit/services/test_print_queue.py +++ b/backend/tests/unit/services/test_print_queue.py @@ -1,6 +1,7 @@ import asyncio import uuid from unittest.mock import AsyncMock, MagicMock +from uuid import UUID import pytest from app.services.job_lifecycle import ( @@ -10,15 +11,19 @@ from app.services.print_queue import PrintQueue from PIL import Image +# Stable UUIDs for fake printers used throughout this test module. +_PT750W_ID = UUID("aaaaaaaa-0000-0000-0000-000000000001") +_P1_ID = UUID("aaaaaaaa-0000-0000-0000-000000000002") + @pytest.mark.asyncio async def test_queue_submit_returns_job_id() -> None: fake_printer = MagicMock() - fake_printer.id = "pt750w" + fake_printer.id = _PT750W_ID queue = PrintQueue([fake_printer]) img = Image.new("1", (300, 76)) - job_id = await queue.submit("pt750w", img, tape_mm=12) + job_id = await queue.submit(_PT750W_ID, img, tape_mm=12) assert isinstance(job_id, str) uuid.UUID(job_id) # raises ValueError if not a valid UUID @@ -27,15 +32,15 @@ async def test_queue_submit_returns_job_id() -> None: async def test_queue_serial_per_printer() -> None: """Two jobs on the same printer execute serially, not in parallel.""" fake_printer = MagicMock() - fake_printer.id = "pt750w" + fake_printer.id = _PT750W_ID fake_printer.print_image = AsyncMock(return_value=None) queue = PrintQueue([fake_printer]) await queue.start() try: img = Image.new("1", (300, 76)) - job_id_1 = await queue.submit("pt750w", img, tape_mm=12) - job_id_2 = await queue.submit("pt750w", img, tape_mm=12) + job_id_1 = await queue.submit(_PT750W_ID, img, tape_mm=12) + job_id_2 = await queue.submit(_PT750W_ID, img, tape_mm=12) await queue.wait_for_job(job_id_1, timeout_s=5) await queue.wait_for_job(job_id_2, timeout_s=5) @@ -48,12 +53,12 @@ async def test_queue_serial_per_printer() -> None: @pytest.mark.asyncio async def test_queue_pause_and_resume_job() -> None: fake_printer = MagicMock() - fake_printer.id = "pt750w" + fake_printer.id = _PT750W_ID fake_printer.print_image = AsyncMock() queue = PrintQueue([fake_printer]) img = Image.new("1", (300, 76)) - job_id = await queue.submit("pt750w", img, tape_mm=12) + job_id = await queue.submit(_PT750W_ID, img, tape_mm=12) assert (await queue.pause_job(job_id)) is True assert (await queue.get(job_id)).state == JobState.PAUSED assert (await queue.resume_job(job_id)) is True @@ -63,13 +68,13 @@ async def test_queue_pause_and_resume_job() -> None: @pytest.mark.asyncio async def test_queue_clear_cancels_all_pending() -> None: fake_printer = MagicMock() - fake_printer.id = "pt750w" + fake_printer.id = _PT750W_ID queue = PrintQueue([fake_printer]) img = Image.new("1", (300, 76)) - j1 = await queue.submit("pt750w", img, tape_mm=12) - j2 = await queue.submit("pt750w", img, tape_mm=12) - cancelled = await queue.clear_queue("pt750w") + j1 = await queue.submit(_PT750W_ID, img, tape_mm=12) + j2 = await queue.submit(_PT750W_ID, img, tape_mm=12) + cancelled = await queue.clear_queue(_PT750W_ID) assert cancelled == 2 assert (await queue.get(j1)).state == JobState.CANCELLED assert (await queue.get(j2)).state == JobState.CANCELLED @@ -79,25 +84,25 @@ async def test_queue_clear_cancels_all_pending() -> None: async def test_queue_pause_printer_blocks_worker() -> None: """When a printer is paused, the worker must not pick further jobs.""" fake_printer = MagicMock() - fake_printer.id = "pt750w" + fake_printer.id = _PT750W_ID fake_printer.print_image = AsyncMock() queue = PrintQueue([fake_printer]) await queue.start() try: - await queue.pause_printer("pt750w", reason="manual pause") + await queue.pause_printer(_PT750W_ID, reason="manual pause") img = Image.new("1", (300, 76)) - job_id = await queue.submit("pt750w", img, tape_mm=12) + job_id = await queue.submit(_PT750W_ID, img, tape_mm=12) # Deterministic check: worker is paused and must not start printing. # With the post-get pause loop, the worker pops the job and then blocks — # qsize() drops to 0 but the job state remains QUEUED (not PRINTING). await asyncio.sleep(0) # yield to event loop; worker should not proceed - assert queue._worker_states["pt750w"].value == "paused" + assert queue._worker_states[_PT750W_ID].value == "paused" assert (await queue.get(job_id)).state == JobState.QUEUED assert fake_printer.print_image.await_count == 0 - await queue.resume_printer("pt750w") + await queue.resume_printer(_PT750W_ID) await queue.wait_for_job(job_id, timeout_s=5) assert (await queue.get(job_id)).state == JobState.COMPLETED finally: @@ -108,7 +113,7 @@ async def test_queue_pause_printer_blocks_worker() -> None: async def test_queue_pause_after_idle_worker_is_respected() -> None: """Pausing while the worker is idle at queue.get() must still block the next pop.""" fake_printer = MagicMock() - fake_printer.id = "pt750w" + fake_printer.id = _PT750W_ID fake_printer.print_image = AsyncMock() queue = PrintQueue([fake_printer]) await queue.start() @@ -118,11 +123,11 @@ async def test_queue_pause_after_idle_worker_is_respected() -> None: await asyncio.sleep(0) # Pause AFTER the worker has entered queue.get(). - await queue.pause_printer("pt750w", reason="race test") + await queue.pause_printer(_PT750W_ID, reason="race test") # Submit a job. The pause must hold. img = Image.new("1", (300, 76)) - job_id = await queue.submit("pt750w", img, tape_mm=12) + job_id = await queue.submit(_PT750W_ID, img, tape_mm=12) # Yield a few times — worker would print here if pause was ignored. for _ in range(5): @@ -131,7 +136,7 @@ async def test_queue_pause_after_idle_worker_is_respected() -> None: assert fake_printer.print_image.await_count == 0 # Resume — job should complete now. - await queue.resume_printer("pt750w") + await queue.resume_printer(_PT750W_ID) await queue.wait_for_job(job_id, timeout_s=5) assert (await queue.get(job_id)).state == JobState.COMPLETED finally: @@ -150,13 +155,13 @@ async def slow_print(image, *, tape_mm, **kw): finished.set() fake_printer = MagicMock() - fake_printer.id = "pt750w" + fake_printer.id = _PT750W_ID fake_printer.print_image = AsyncMock(side_effect=slow_print) queue = PrintQueue([fake_printer]) await queue.start() img = Image.new("1", (300, 76)) - job_id = await queue.submit("pt750w", img, tape_mm=12) + job_id = await queue.submit(_PT750W_ID, img, tape_mm=12) await started.wait() # printer is in the middle of printing await queue.stop(timeout_s=5.0) @@ -168,11 +173,11 @@ async def slow_print(image, *, tape_mm, **kw): @pytest.mark.asyncio async def test_queue_retry_failed_creates_new_job() -> None: fake_printer = MagicMock() - fake_printer.id = "pt750w" + fake_printer.id = _PT750W_ID queue = PrintQueue([fake_printer]) img = Image.new("1", (300, 76)) - job_id = await queue.submit("pt750w", img, tape_mm=12) + job_id = await queue.submit(_PT750W_ID, img, tape_mm=12) # Drive job to FAILED manually (no worker running) job = await queue.get(job_id) JobStateMachine.transition(job, JobState.PRINTING) @@ -201,7 +206,7 @@ async def test_worker_pauses_printer_on_recoverable_error() -> None: from app.services.print_queue import PrinterWorkerState class _EmptyPrinter: - id = "p1" + id = _P1_ID async def print_image(self, image, *, tape_mm, **_options): raise TapeEmptyError() @@ -210,12 +215,12 @@ async def print_image(self, image, *, tape_mm, **_options): await queue.start() try: image = Image.new("1", (200, 128)) - job_id = await queue.submit("p1", image, tape_mm=24) + job_id = await queue.submit(_P1_ID, image, tape_mm=24) job = await queue.wait_for_job(job_id, timeout_s=2.0) assert job.state == JobState.FAILED assert job.error_code == "tape_empty" # The printer is now PAUSED - assert queue._worker_states["p1"] == PrinterWorkerState.PAUSED + assert queue._worker_states[_P1_ID] == PrinterWorkerState.PAUSED finally: await queue.stop(timeout_s=2.0) @@ -229,7 +234,7 @@ async def test_worker_does_not_pause_on_fatal_error() -> None: from app.services.print_queue import PrinterWorkerState class _FailPrinter: - id = "p1" + id = _P1_ID async def print_image(self, image, *, tape_mm, **_options): raise PrintFailedError("bad raster") @@ -238,12 +243,12 @@ async def print_image(self, image, *, tape_mm, **_options): await queue.start() try: image = Image.new("1", (200, 128)) - job_id = await queue.submit("p1", image, tape_mm=24) + job_id = await queue.submit(_P1_ID, image, tape_mm=24) job = await queue.wait_for_job(job_id, timeout_s=2.0) assert job.state == JobState.FAILED assert job.error_code == "print_failed" # Printer NOT paused — fatal error doesn't halt the queue - assert queue._worker_states["p1"] == PrinterWorkerState.ACTIVE + assert queue._worker_states[_P1_ID] == PrinterWorkerState.ACTIVE finally: await queue.stop(timeout_s=2.0) @@ -254,7 +259,7 @@ async def print_image(self, image, *, tape_mm, **_options): class _MismatchPrinter: - id = "p1" + id = _P1_ID async def print_image(self, image: Image.Image, *, tape_mm: int, **_options: object) -> None: from app.printer_backends.exceptions import TapeMismatchError @@ -268,7 +273,7 @@ async def test_worker_records_printer_error_fields() -> None: await queue.start() try: image = Image.new("1", (200, 128)) - job_id = await queue.submit("p1", image, tape_mm=24) + job_id = await queue.submit(_P1_ID, image, tape_mm=24) job = await queue.wait_for_job(job_id, timeout_s=2.0) assert job.state == JobState.FAILED assert job.error_code == "tape_mismatch" @@ -303,11 +308,11 @@ def _cb(job, from_state, to_state, queue_depth=0): transitions.append((from_state.value, to_state.value)) fake_printer = MagicMock() - fake_printer.id = "pt750w" + fake_printer.id = _PT750W_ID queue = PrintQueue([fake_printer], on_state_change=_cb) img = Image.new("1", (300, 76)) - await queue.submit("pt750w", img, tape_mm=12) + await queue.submit(_PT750W_ID, img, tape_mm=12) # submit() must NOT fire the callback — no real state transition happens assert not transitions, ( @@ -325,11 +330,11 @@ def _cb(job, from_state, to_state, queue_depth=0): transitions.append((from_state.value, to_state.value)) fake_printer = MagicMock() - fake_printer.id = "pt750w" + fake_printer.id = _PT750W_ID queue = PrintQueue([fake_printer], on_state_change=_cb) img = Image.new("1", (300, 76)) - job_id = await queue.submit("pt750w", img, tape_mm=12) + job_id = await queue.submit(_PT750W_ID, img, tape_mm=12) transitions.clear() # ignore submit() callback result = await queue.pause_job(job_id) @@ -348,11 +353,11 @@ def _cb(job, from_state, to_state, queue_depth=0): transitions.append((from_state.value, to_state.value)) fake_printer = MagicMock() - fake_printer.id = "pt750w" + fake_printer.id = _PT750W_ID queue = PrintQueue([fake_printer], on_state_change=_cb) img = Image.new("1", (300, 76)) - job_id = await queue.submit("pt750w", img, tape_mm=12) + job_id = await queue.submit(_PT750W_ID, img, tape_mm=12) await queue.pause_job(job_id) transitions.clear() # ignore previous callbacks @@ -372,11 +377,11 @@ def _cb(job, from_state, to_state, queue_depth=0): transitions.append((from_state.value, to_state.value)) fake_printer = MagicMock() - fake_printer.id = "pt750w" + fake_printer.id = _PT750W_ID queue = PrintQueue([fake_printer], on_state_change=_cb) img = Image.new("1", (300, 76)) - job_id = await queue.submit("pt750w", img, tape_mm=12) + job_id = await queue.submit(_PT750W_ID, img, tape_mm=12) transitions.clear() result = await queue.cancel(job_id) @@ -395,15 +400,15 @@ def _cb(job, from_state, to_state, queue_depth=0): transitions.append((from_state.value, to_state.value)) fake_printer = MagicMock() - fake_printer.id = "pt750w" + fake_printer.id = _PT750W_ID queue = PrintQueue([fake_printer], on_state_change=_cb) img = Image.new("1", (300, 76)) - await queue.submit("pt750w", img, tape_mm=12) - await queue.submit("pt750w", img, tape_mm=12) + await queue.submit(_PT750W_ID, img, tape_mm=12) + await queue.submit(_PT750W_ID, img, tape_mm=12) transitions.clear() - count = await queue.clear_queue("pt750w") + count = await queue.clear_queue(_PT750W_ID) assert count == 2 cancelled_transitions = [t for t in transitions if t[1] == "cancelled"] assert len(cancelled_transitions) == 2, ( @@ -426,12 +431,12 @@ async def test_resume_printer_raises_when_already_active() -> None: from app.services.print_queue import PrinterAlreadyActiveError fake_printer = MagicMock() - fake_printer.id = "pt750w" + fake_printer.id = _PT750W_ID queue = PrintQueue([fake_printer]) # Printer starts ACTIVE — resume again should raise with pytest.raises(PrinterAlreadyActiveError): - await queue.resume_printer("pt750w") + await queue.resume_printer(_PT750W_ID) @pytest.mark.asyncio @@ -440,16 +445,16 @@ async def test_resume_printer_succeeds_when_paused() -> None: from app.services.print_queue import PrinterWorkerState fake_printer = MagicMock() - fake_printer.id = "pt750w" + fake_printer.id = _PT750W_ID queue = PrintQueue([fake_printer]) # Pause first - await queue.pause_printer("pt750w", reason="test") - assert queue._worker_states["pt750w"] == PrinterWorkerState.PAUSED + await queue.pause_printer(_PT750W_ID, reason="test") + assert queue._worker_states[_PT750W_ID] == PrinterWorkerState.PAUSED # Resume must not raise - await queue.resume_printer("pt750w") - assert queue._worker_states["pt750w"] == PrinterWorkerState.ACTIVE + await queue.resume_printer(_PT750W_ID) + assert queue._worker_states[_PT750W_ID] == PrinterWorkerState.ACTIVE # --------------------------------------------------------------------------- @@ -479,14 +484,14 @@ async def _blocking_print(image, *, tape_mm, **kw): await stop_print.wait() # blocks until we signal or task is cancelled fake_printer = MagicMock() - fake_printer.id = "pt750w" + fake_printer.id = _PT750W_ID fake_printer.print_image = AsyncMock(side_effect=_blocking_print) queue = PrintQueue([fake_printer]) await queue.start() img = Image.new("1", (300, 76)) - job_id = await queue.submit("pt750w", img, tape_mm=12) + job_id = await queue.submit(_PT750W_ID, img, tape_mm=12) # Wait until the worker has actually entered print_image (PRINTING state). await printing_started.wait() @@ -520,14 +525,14 @@ async def _blocking_print(image, *, tape_mm, **kw): await asyncio.get_event_loop().create_future() fake_printer = MagicMock() - fake_printer.id = "pt750w" + fake_printer.id = _PT750W_ID fake_printer.print_image = AsyncMock(side_effect=_blocking_print) queue = PrintQueue([fake_printer]) await queue.start() img = Image.new("1", (300, 76)) - job_id = await queue.submit("pt750w", img, tape_mm=12) + job_id = await queue.submit(_PT750W_ID, img, tape_mm=12) await printing_started.wait() assert (await queue.get(job_id)).state == JobState.PRINTING diff --git a/backend/tests/unit/services/test_print_service.py b/backend/tests/unit/services/test_print_service.py index ca6dee2..3a5c6ce 100644 --- a/backend/tests/unit/services/test_print_service.py +++ b/backend/tests/unit/services/test_print_service.py @@ -1,6 +1,7 @@ from __future__ import annotations from unittest.mock import AsyncMock, MagicMock +from uuid import UUID import pytest from app.printer_backends.exceptions import ( @@ -85,13 +86,16 @@ def backend(): return m +_PRINTER_ID = UUID("bbbbbbbb-0000-0000-0000-000000000001") + + def _service(loader, renderer, queue, lookup_service, backend): return PrintService( template_loader=loader, renderer=renderer, print_queue=queue, lookup_service=lookup_service, - printer_id="pt@x", + printer_id=_PRINTER_ID, backend=backend, ) @@ -261,7 +265,7 @@ async def test_preflight_mismatch_queue_creates_paused_job( ) # submit_paused() returns the job_id; queue.get returns the job object. # The job starts PAUSED (submit_paused transitions it before registering). - job = Job(id="job-1", printer_id="pt@x", image_payload=b"", tape_mm=24, options={}) + job = Job(id="job-1", printer_id=_PRINTER_ID, image_payload=b"", tape_mm=24, options={}) from app.services.job_lifecycle import JobStateMachine JobStateMachine.transition(job, JobState.PAUSED) @@ -294,7 +298,7 @@ async def test_preflight_mismatch_queue_none_tape_loaded( loaded_tape_mm=None, error_flags=[], ) - job = Job(id="job-1", printer_id="pt@x", image_payload=b"", tape_mm=24, options={}) + job = Job(id="job-1", printer_id=_PRINTER_ID, image_payload=b"", tape_mm=24, options={}) from app.services.job_lifecycle import JobStateMachine JobStateMachine.transition(job, JobState.PAUSED) diff --git a/backend/tests/unit/services/test_printer_identity.py b/backend/tests/unit/services/test_printer_identity.py new file mode 100644 index 0000000..2a22e54 --- /dev/null +++ b/backend/tests/unit/services/test_printer_identity.py @@ -0,0 +1,48 @@ +"""Phase 7b Cluster 1b — derive_printer_id is a stable UUIDv5 for (model, host, port).""" + +from __future__ import annotations + +from uuid import UUID + +from app.services.printer_identity import derive_printer_id + + +def test_same_inputs_produce_same_uuid(): + a = derive_printer_id("PT-P750W", "192.0.2.50", 9100) + b = derive_printer_id("PT-P750W", "192.0.2.50", 9100) + assert a == b + + +def test_host_change_produces_different_uuid(): + a = derive_printer_id("PT-P750W", "192.0.2.50", 9100) + b = derive_printer_id("PT-P750W", "192.0.2.51", 9100) + assert a != b + + +def test_port_change_produces_different_uuid(): + a = derive_printer_id("PT-P750W", "192.0.2.50", 9100) + b = derive_printer_id("PT-P750W", "192.0.2.50", 9101) + assert a != b + + +def test_model_change_produces_different_uuid(): + a = derive_printer_id("PT-P750W", "192.0.2.50", 9100) + b = derive_printer_id("QL-820NWB", "192.0.2.50", 9100) + assert a != b + + +def test_returns_uuid_v5(): + out = derive_printer_id("PT-P750W", "192.0.2.50", 9100) + assert isinstance(out, UUID) + assert out.version == 5 + + +def test_model_case_insensitive(): + """Mixed-case model names hash to the same UUID. + + Environment may supply ``'PT-P750W'`` or ``'pt-p750w'``; both must resolve + identically. + """ + a = derive_printer_id("PT-P750W", "192.0.2.50", 9100) + b = derive_printer_id("pt-p750w", "192.0.2.50", 9100) + assert a == b diff --git a/backend/tests/unit/services/test_readiness_builder.py b/backend/tests/unit/services/test_readiness_builder.py new file mode 100644 index 0000000..ad93f25 --- /dev/null +++ b/backend/tests/unit/services/test_readiness_builder.py @@ -0,0 +1,306 @@ +"""Phase 7b Cluster 1e — all 8 readiness checks: database, alembic, +template_seed, printer_runtime, printer_db_sync, snmp_discovery, +print_queue, sse_bus.""" + +from __future__ import annotations + +import types +from datetime import UTC, datetime, timedelta +from uuid import uuid4 + +import pytest +from app.models.printer import Printer +from app.models.printer_status_cache import PrinterStatusCache +from app.schemas.readiness import ReadinessResponse + +pytestmark = pytest.mark.asyncio + + +class _FakeState: + """Minimal stand-in for app.state with a printer_id.""" + + def __init__(self, printer_id=None): + self.printer_id = printer_id + + +# --------------------------------------------------------------------------- +# Helper for states that include print_queue + event_bus +# --------------------------------------------------------------------------- + + +def _state_with_queue_and_bus(printer_id=None, subs=0, max_subs=100): + state = _FakeState(printer_id=printer_id) + state.print_queue = types.SimpleNamespace(worker_count=lambda: 1) + state.event_bus = types.SimpleNamespace( + subscriber_count=lambda: subs, + max_subscribers=max_subs, + ) + return state + + +async def test_build_readiness_with_all_ok( + async_session_with_one_template, settings_at_head, runtime_printer_id +): + from app.services.readiness import build_readiness_response + + # Use a state that includes print_queue + event_bus so all 8 checks pass. + state = _state_with_queue_and_bus(printer_id=runtime_printer_id) + body = await build_readiness_response( + async_session_with_one_template, + state, + settings_at_head, + version="dev", + revision="abc", + ) + assert isinstance(body, ReadinessResponse) + # printer_db_sync will be fail (runtime_printer_id has no DB row) → degraded + # but all critical checks pass. + for name in ("database", "alembic", "template_seed", "printer_runtime"): + assert body.checks[name].status == "ok", f"{name} not ok: {body.checks[name]}" + assert body.status in {"ready", "degraded"} + + +async def test_build_readiness_template_seed_fails_when_empty( + async_session_empty, settings_at_head +): + from app.services.readiness import build_readiness_response + + state = _FakeState(printer_id=None) + body = await build_readiness_response( + async_session_empty, + state, + settings_at_head, + version="dev", + revision="abc", + ) + assert body.checks["template_seed"].status == "fail" + # template_seed is critical → aggregate is not-ready + assert body.status == "not-ready" + + +async def test_build_readiness_printer_runtime_fails_when_no_id( + async_session_with_one_template, settings_at_head +): + from app.services.readiness import build_readiness_response + + state = _FakeState(printer_id=None) + body = await build_readiness_response( + async_session_with_one_template, + state, + settings_at_head, + version="dev", + revision="abc", + ) + assert body.checks["printer_runtime"].status == "fail" + # printer_runtime is non-critical → aggregate is degraded + assert body.status == "degraded" + + +# --------------------------------------------------------------------------- +# F3: printer_db_sync +# --------------------------------------------------------------------------- + + +async def test_check_printer_db_sync_skipped_when_no_runtime_id( + async_session_with_one_template, settings_at_head +): + from app.services.readiness import build_readiness_response + + body = await build_readiness_response( + async_session_with_one_template, + _state_with_queue_and_bus(printer_id=None), + settings_at_head, + version="v", + revision="r", + ) + assert body.checks["printer_db_sync"].status == "skipped" + + +async def test_check_printer_db_sync_fail_when_id_has_no_row( + async_session_with_one_template, settings_at_head +): + from app.services.readiness import build_readiness_response + + body = await build_readiness_response( + async_session_with_one_template, + _state_with_queue_and_bus(printer_id=uuid4()), # any uuid; not in DB + settings_at_head, + version="v", + revision="r", + ) + assert body.checks["printer_db_sync"].status == "fail" + + +async def test_check_printer_db_sync_ok_when_row_exists( + async_session_with_one_template, settings_at_head +): + pid = uuid4() + # Insert a Printer row matching the runtime id + async_session_with_one_template.add( + Printer( + id=pid, + name="x", + model="pt-p750w", + backend="mock", + connection={"host": "192.0.2.50", "port": 9100}, + enabled=True, + ) + ) + await async_session_with_one_template.flush() + + from app.services.readiness import build_readiness_response + + body = await build_readiness_response( + async_session_with_one_template, + _state_with_queue_and_bus(printer_id=pid), + settings_at_head, + version="v", + revision="r", + ) + assert body.checks["printer_db_sync"].status == "ok" + + +# --------------------------------------------------------------------------- +# F3: snmp_discovery +# --------------------------------------------------------------------------- + + +async def test_check_snmp_discovery_fail_when_no_probe_yet( + async_session_with_one_template, settings_at_head +): + pid = uuid4() + async_session_with_one_template.add( + Printer( + id=pid, + name="x", + model="pt-p750w", + backend="mock", + connection={"host": "h", "port": 9100}, + enabled=True, + ) + ) + await async_session_with_one_template.flush() + from app.services.readiness import build_readiness_response + + body = await build_readiness_response( + async_session_with_one_template, + _state_with_queue_and_bus(printer_id=pid), + settings_at_head, + version="v", + revision="r", + ) + assert body.checks["snmp_discovery"].status == "fail" + + +async def test_check_snmp_discovery_ok_when_fresh( + async_session_with_one_template, settings_at_head +): + pid = uuid4() + async_session_with_one_template.add( + Printer( + id=pid, + name="x", + model="pt-p750w", + backend="mock", + connection={"host": "h", "port": 9100}, + enabled=True, + ) + ) + async_session_with_one_template.add( + PrinterStatusCache( + printer_id=pid, + captured_at=datetime.now(UTC), + parsed={"online": True, "tape_width_mm": 12}, + raw_block=None, + ) + ) + await async_session_with_one_template.flush() + from app.services.readiness import build_readiness_response + + body = await build_readiness_response( + async_session_with_one_template, + _state_with_queue_and_bus(printer_id=pid), + settings_at_head, + version="v", + revision="r", + ) + assert body.checks["snmp_discovery"].status == "ok" + assert "last_probe_age_s" in body.checks["snmp_discovery"].metric + + +async def test_check_snmp_discovery_stale_between_90_and_600( + async_session_with_one_template, settings_at_head +): + pid = uuid4() + async_session_with_one_template.add( + Printer( + id=pid, + name="x", + model="pt-p750w", + backend="mock", + connection={"host": "h", "port": 9100}, + enabled=True, + ) + ) + async_session_with_one_template.add( + PrinterStatusCache( + printer_id=pid, + captured_at=datetime.now(UTC) - timedelta(seconds=200), + parsed={"online": True}, + raw_block=None, + ) + ) + await async_session_with_one_template.flush() + from app.services.readiness import build_readiness_response + + body = await build_readiness_response( + async_session_with_one_template, + _state_with_queue_and_bus(printer_id=pid), + settings_at_head, + version="v", + revision="r", + ) + assert body.checks["snmp_discovery"].status == "stale" + + +# --------------------------------------------------------------------------- +# F3: print_queue +# --------------------------------------------------------------------------- + + +async def test_check_print_queue_fail_when_missing( + async_session_with_one_template, settings_at_head +): + state = _FakeState(printer_id=uuid4()) + # NO print_queue attribute + state.event_bus = types.SimpleNamespace(subscriber_count=lambda: 0, max_subscribers=100) + from app.services.readiness import build_readiness_response + + body = await build_readiness_response( + async_session_with_one_template, + state, + settings_at_head, + version="v", + revision="r", + ) + assert body.checks["print_queue"].status == "fail" + + +# --------------------------------------------------------------------------- +# F3: sse_bus +# --------------------------------------------------------------------------- + + +async def test_check_sse_bus_fail_when_subscribers_at_max( + async_session_with_one_template, settings_at_head +): + from app.services.readiness import build_readiness_response + + body = await build_readiness_response( + async_session_with_one_template, + _state_with_queue_and_bus(printer_id=uuid4(), subs=100, max_subs=100), + settings_at_head, + version="v", + revision="r", + ) + assert body.checks["sse_bus"].status == "fail" diff --git a/backend/tests/unit/test_alembic_verify.py b/backend/tests/unit/test_alembic_verify.py new file mode 100644 index 0000000..7368ddf --- /dev/null +++ b/backend/tests/unit/test_alembic_verify.py @@ -0,0 +1,66 @@ +"""Phase 7b Cluster 1d — verify_alembic_at_head fails fast on revision drift.""" + +from __future__ import annotations + +import asyncio +from pathlib import Path + +import pytest +from alembic import command +from alembic.config import Config +from app.config import Settings +from app.db.lifespan import verify_alembic_at_head + +pytestmark = pytest.mark.asyncio + + +_ALEMBIC_INI = Path(__file__).resolve().parents[2] / "alembic.ini" + + +def _alembic_cfg(db_url_async: str) -> Config: + cfg = Config(str(_ALEMBIC_INI)) + cfg.set_main_option("sqlalchemy.url", db_url_async) + cfg.attributes["configure_logger"] = False # Phase 7b B6 learning + return cfg + + +def _settings(db_url_async: str) -> Settings: + return Settings( + database_url=db_url_async, + printer_backend="mock", + _env_file=None, # type: ignore[call-arg] + ) + + +async def test_verify_passes_when_db_at_head(tmp_path: Path) -> None: + db = tmp_path / "atomic_e1_head.db" + async_url = f"sqlite+aiosqlite:///{db}" + # command.upgrade calls asyncio.run() via env.py — must run in a thread + # to avoid "asyncio.run() cannot be called from a running event loop". + await asyncio.to_thread(command.upgrade, _alembic_cfg(async_url), "head") + settings = _settings(async_url) + # Should not raise + await verify_alembic_at_head(settings) + + +async def test_verify_raises_on_stale_db(tmp_path: Path) -> None: + db = tmp_path / "atomic_e1_stale.db" + async_url = f"sqlite+aiosqlite:///{db}" + cfg = _alembic_cfg(async_url) + # Both alembic commands call asyncio.run() in env.py — run in threads. + await asyncio.to_thread(command.upgrade, cfg, "head") + await asyncio.to_thread(command.downgrade, cfg, "-1") + + settings = _settings(async_url) + with pytest.raises(RuntimeError, match="drift"): + await verify_alembic_at_head(settings) + + +async def test_verify_raises_on_empty_db(tmp_path: Path) -> None: + """Brand-new DB with no alembic_version table → not at head → must raise.""" + db = tmp_path / "atomic_e1_empty.db" + async_url = f"sqlite+aiosqlite:///{db}" + # NO alembic upgrade — DB is completely empty + settings = _settings(async_url) + with pytest.raises(RuntimeError, match="drift"): + await verify_alembic_at_head(settings) diff --git a/backend/tests/unit/test_lifespan.py b/backend/tests/unit/test_lifespan.py index 1a20c1d..f12b44c 100644 --- a/backend/tests/unit/test_lifespan.py +++ b/backend/tests/unit/test_lifespan.py @@ -28,6 +28,26 @@ async def _noop_migrations() -> None: """ +async def _noop_verify(*_args, **_kwargs) -> None: + """Drop-in for verify_alembic_at_head() in unit lifespan tests. + + The clean_registries fixture builds the schema via create_all() which does + not populate alembic_version. Patching out verify avoids a spurious + RuntimeError — same rationale as patching run_migrations to a no-op. + """ + + +async def _noop_seed_templates(*_args, **_kwargs) -> int: # type: ignore[no-untyped-def] + """Drop-in for seed_templates() in unit lifespan tests. + + The D1 defensive check raises RuntimeError when TemplateLoader._cache is + empty. These tests exercise printer backend / SNMP discovery paths and do + not require templates; patching avoids the spurious failure until D2 reorders + load_dir before seed_templates in main.py lifespan. + """ + return 0 + + @pytest_asyncio.fixture(autouse=True) async def clean_registries(monkeypatch: pytest.MonkeyPatch, tmp_path): # type: ignore[misc] """Reset registries and swap the module-level engine for a temp DB. @@ -62,6 +82,10 @@ async def clean_registries(monkeypatch: pytest.MonkeyPatch, tmp_path): # type: # run_migrations`. Patching _lifespan_module alone does not update that # local binding; we must also patch the name on _main_module. monkeypatch.setattr(_main_module, "run_migrations", _noop_migrations) + # verify_alembic_at_head checks alembic_version which is not created by + # create_all() — patch it for the same reason run_migrations is patched. + monkeypatch.setattr(_lifespan_module, "verify_alembic_at_head", _noop_verify) + monkeypatch.setattr(_main_module, "verify_alembic_at_head", _noop_verify) BackendRegistry._factories.clear() BackendRegistry._discovered = False diff --git a/docs/superpowers/plans/2026-05-17-phase-7b-foundation.md b/docs/superpowers/plans/2026-05-17-phase-7b-foundation.md new file mode 100644 index 0000000..686d05c --- /dev/null +++ b/docs/superpowers/plans/2026-05-17-phase-7b-foundation.md @@ -0,0 +1,2580 @@ +# Phase 7b Foundation Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Harden the label-printer-hub lifespan, datetime handling, health surface, and SNMP polling so the next production deploy is reproducible end-to-end and observable through `/readiness`. + +**Architecture:** Nine focused clusters layered in dependency order — datetime-TZ first (touches every model), then printer identity, then lifespan init-order, then alembic verify, then the new `/readiness` endpoint, then status-cache plumbing, then the frontend proxy widening, last the README + a final production smoke check. Each cluster is a sequence of TDD red→green→commit cycles. + +**Tech Stack:** Python 3.12 + FastAPI + SQLModel (async) + aiosqlite + Alembic + pytest-asyncio. Frontend Go + chi/v5 router + `net/http/httputil` reverse proxy + oapi-codegen client. Strict TDD per repo policy (`docs/policies/contributing.md`). Conventional Commits enforced by `commitlint.config.cjs` — type from `{feat, fix, refactor, test, docs, chore, ci}`, scope from `{api, queue, status, webhook, docker, ci, examples, docs, integration, security}`. + +**Spec:** `docs/superpowers/specs/2026-05-17-phase-7b-foundation-design.md` (Merged via PR #74, 2026-05-17). + +**Tracking:** `Refs #22` at the end of every commit body. + +--- + +## File Structure + +| File | Responsibility | Phase | +|---|---|---| +| `backend/app/schemas/_datetime.py` (NEW) | Pydantic field-serializer that coerces naive datetimes to UTC and emits RFC3339-with-Z | B | +| `backend/app/models/{template,printer,job,preset,printer_state,printer_status_cache}.py` (MODIFY) | Add `DateTime(timezone=True)` columns + UTC `default_factory` | B | +| `backend/app/schemas/{template_read,printer,job}.py` (MODIFY) | Apply `@field_serializer("created_at","updated_at",...)` | B | +| `backend/alembic/versions/20260517_phase7b_datetime_tz.py` (NEW) | Idempotent data migration: existing rows get `+00:00` suffix | B | +| `backend/app/services/printer_identity.py` (NEW) | `derive_printer_id(model, host, port)` → deterministic UUIDv5 | C | +| `backend/app/db/lifespan.py` (MODIFY) | Add `upsert_runtime_printer`, defensive check in `seed_templates`, `verify_alembic_at_head`; correct docstring | C, D, E | +| `backend/app/printer_backends/_queue_factory.py` (MODIFY)¹ | Driver `.make_queue_printer(tape_registry, printer_id: UUID \| None = None)` | C | +| `backend/app/main.py` (MODIFY lines 235–245, 270–276) | Re-ordered lifespan + plumb `db_printer_id` to driver | D | +| `backend/app/schemas/readiness.py` (NEW) | `CheckStatus`, `ReadinessResponse` Pydantic models | F | +| `backend/app/services/readiness.py` (NEW) | `build_readiness_response(session, app_state)` aggregator | F | +| `backend/app/api/routes/meta.py` (MODIFY)² | Add `GET /readiness` route | F | +| `backend/app/services/producers/status_probe_producer.py` (MODIFY) | `_upsert_cache(snmp_result)`, `_mark_offline(exc)` | G | +| `backend/app/schemas/printer.py` (MODIFY) | Extend `PrinterStatus` with `captured_at, last_probe_age_s, last_error, note` | G | +| `backend/app/api/routes/printers.py` (MODIFY) | `GET /api/printers/{id}/status` reads from cache, not sync SNMP | G | +| `frontend/cmd/server/main.go` (MODIFY lines 137–144) | `r.Mount("/docs",prx); r.Mount("/openapi.json",prx); r.Mount("/redoc",prx)` | H | +| `frontend/cmd/server/main_test.go` (MODIFY) | Assert the 3 new proxy mounts forward to backend | H | +| `README.md` (MODIFY) | Document `/readiness` endpoint + link to spec | I | + +¹ Exact file/class name depends on existing driver layout; the implementer must `grep` for `make_queue_printer` and modify the single implementation. +² If `meta.py` doesn't exist, add the route to `backend/app/main.py` next to `/healthz` (currently inline there). + +--- + +## Phase A — Setup + +### Task A1: Create feature branch from main + +**Files:** none (git only) + +- [ ] **Step 1: Confirm clean main and pull** + +```bash +cd /opt/repos/label-printer-hub +git checkout main +git pull --ff-only origin main +git log -1 --oneline +``` + +Expected: latest commit is the PR #74 squash-merge of the Phase 7b spec. + +- [ ] **Step 2: Create branch** + +```bash +git checkout -b feat/phase-7b-foundation +``` + +- [ ] **Step 3: Confirm baseline tests pass** + +```bash +cd backend && uv run pytest -q +``` + +Expected: all tests pass (existing baseline). + +No commit. Branch is the workspace for all subsequent tasks. + +--- + +## Phase B — Cluster 1c: Datetime-TZ Serialisation + +### Task B1: Test serialize_datetime_utc helper + +**Files:** +- Create: `backend/app/schemas/_datetime.py` +- Create: `backend/tests/unit/schemas/test_datetime_serializer.py` + +- [ ] **Step 1: Write failing test** + +```python +# backend/tests/unit/schemas/test_datetime_serializer.py +from datetime import datetime, timezone, timedelta + +from app.schemas._datetime import serialize_datetime_utc + + +def test_naive_datetime_gets_utc_tz_and_z_suffix(): + naive = datetime(2026, 5, 17, 12, 0, 0) + assert serialize_datetime_utc(naive, None) == "2026-05-17T12:00:00Z" + + +def test_utc_aware_datetime_serialised_with_z_suffix(): + aware = datetime(2026, 5, 17, 12, 0, 0, tzinfo=timezone.utc) + assert serialize_datetime_utc(aware, None) == "2026-05-17T12:00:00Z" + + +def test_non_utc_aware_datetime_kept_with_offset(): + plus_two = datetime(2026, 5, 17, 14, 0, 0, tzinfo=timezone(timedelta(hours=2))) + assert serialize_datetime_utc(plus_two, None) == "2026-05-17T14:00:00+02:00" +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cd backend && uv run pytest tests/unit/schemas/test_datetime_serializer.py -v +``` + +Expected: FAIL with `ImportError: cannot import name 'serialize_datetime_utc'`. + +- [ ] **Step 3: Implement minimal helper** + +```python +# backend/app/schemas/_datetime.py +"""Helpers for datetime serialisation in Pydantic schemas. + +The Go frontend's oapi-codegen client uses strict RFC3339 parsing which +rejects naive datetimes (no `Z` or `+HH:MM` suffix). This helper normalises +every datetime to a timezone-aware UTC value before serialisation. +""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any + + +def serialize_datetime_utc(dt: datetime, _info: Any) -> str: + """Pydantic field-serializer: emit RFC3339 with `Z` for UTC values. + + - naive datetimes are treated as UTC (matches SQLite legacy behaviour) + - UTC-aware datetimes are emitted with `Z` + - non-UTC-aware datetimes keep their explicit offset + """ + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + return dt.isoformat().replace("+00:00", "Z") +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +cd backend && uv run pytest tests/unit/schemas/test_datetime_serializer.py -v +``` + +Expected: 3 passed. + +- [ ] **Step 5: Commit** + +```bash +git add backend/app/schemas/_datetime.py backend/tests/unit/schemas/test_datetime_serializer.py +git commit -m "$(cat <<'EOF' +feat(api): add serialize_datetime_utc helper for RFC3339 with Z + +Go frontend oapi-codegen rejects naive datetimes. Helper normalises any +datetime to a timezone-aware ISO string before serialisation. + +Refs #22 +EOF +)" +``` + +### Task B2: Apply field_serializer to TemplateRead + +**Files:** +- Modify: `backend/app/schemas/template_read.py` +- Create: `backend/tests/integration/api/test_api_datetime_format.py` + +- [ ] **Step 1: Write failing test** + +```python +# backend/tests/integration/api/test_api_datetime_format.py +"""Contract test for Phase 7b Cluster 1c — every datetime field in the API +response must include a timezone suffix (Z or +HH:MM).""" + +import pytest +from datetime import datetime + +pytestmark = pytest.mark.asyncio + + +def _has_tz_suffix(s: str) -> bool: + return s.endswith("Z") or "+" in s or "-" in s[10:] # skip date dashes + + +async def test_template_read_has_tz_suffix(api_client_with_seed): + """GET /api/templates returns datetimes with TZ info.""" + resp = await api_client_with_seed.get("/api/templates") + assert resp.status_code == 200 + body = resp.json() + assert body, "expected at least one seeded template" + for t in body: + for field in ("created_at", "updated_at"): + assert _has_tz_suffix(t[field]), \ + f"{field}={t[field]!r} missing TZ suffix" + datetime.fromisoformat(t[field].replace("Z", "+00:00")) +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cd backend && uv run pytest tests/integration/api/test_api_datetime_format.py::test_template_read_has_tz_suffix -v +``` + +Expected: FAIL — current TemplateRead returns naive datetime. + +- [ ] **Step 3: Add field_serializer** + +Insert in `backend/app/schemas/template_read.py` (near the bottom of the `TemplateRead` class): + +```python +from pydantic import field_serializer +from app.schemas._datetime import serialize_datetime_utc + +class TemplateRead(BaseModel): + # ... existing fields ... + created_at: datetime + updated_at: datetime + + @field_serializer("created_at", "updated_at") + def _serialise_datetimes(self, dt: datetime, _info): + return serialize_datetime_utc(dt, _info) +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +cd backend && uv run pytest tests/integration/api/test_api_datetime_format.py::test_template_read_has_tz_suffix -v +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add backend/app/schemas/template_read.py backend/tests/integration/api/test_api_datetime_format.py +git commit -m "$(cat <<'EOF' +fix(api): TemplateRead emits RFC3339 datetimes with Z suffix + +Go oapi-codegen client rejected naive datetimes from /api/templates +with `parsing time "..." cannot parse "" as "Z07:00"`. Apply the new +serialize_datetime_utc helper via @field_serializer. + +Refs #22 +EOF +)" +``` + +### Task B3: Apply field_serializer to PrinterRead, JobRead, PresetRead + +**Files:** +- Modify: `backend/app/schemas/printer.py` +- Modify: `backend/app/schemas/job.py` +- Modify: `backend/app/schemas/preset.py` (if it exists; otherwise skip) +- Modify: `backend/tests/integration/api/test_api_datetime_format.py` (add three more tests) + +- [ ] **Step 1: Extend the test file** + +Append to `backend/tests/integration/api/test_api_datetime_format.py`: + +```python +async def test_printer_read_has_tz_suffix(api_client_with_seed): + resp = await api_client_with_seed.get("/api/printers") + assert resp.status_code == 200 + body = resp.json() + assert body + for p in body: + for field in ("created_at", "updated_at"): + assert _has_tz_suffix(p[field]) + + +async def test_job_read_has_tz_suffix(api_client_with_completed_job): + resp = await api_client_with_completed_job.get("/api/jobs?limit=1") + body = resp.json() + assert body + for j in body: + for field in ("created_at", "updated_at"): + assert _has_tz_suffix(j[field]) + if j.get("printed_at"): + assert _has_tz_suffix(j["printed_at"]) +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +cd backend && uv run pytest tests/integration/api/test_api_datetime_format.py -v +``` + +Expected: 2 FAIL on `printer` and `job`, 1 PASS on `template`. + +- [ ] **Step 3: Apply field_serializer** + +Add the same `@field_serializer("created_at", "updated_at", "printed_at")` pattern from Task B2 to `PrinterRead` in `backend/app/schemas/printer.py` and to `JobRead` in `backend/app/schemas/job.py`. For `JobRead.printed_at` use a separate serialiser that handles `None`: + +```python +@field_serializer("printed_at") +def _serialise_printed_at(self, dt: datetime | None, _info): + return serialize_datetime_utc(dt, _info) if dt is not None else None +``` + +- [ ] **Step 4: Run tests to verify they pass** + +```bash +cd backend && uv run pytest tests/integration/api/test_api_datetime_format.py -v +``` + +Expected: 3 passed. + +- [ ] **Step 5: Commit** + +```bash +git add backend/app/schemas/printer.py backend/app/schemas/job.py backend/tests/integration/api/test_api_datetime_format.py +git commit -m "$(cat <<'EOF' +fix(api): PrinterRead + JobRead emit RFC3339 datetimes with Z suffix + +Same Go-oapi-codegen contract fix as TemplateRead. Job.printed_at +keeps None handling. + +Refs #22 +EOF +)" +``` + +### Task B4: SQLAlchemy models use DateTime(timezone=True) + UTC default + +**Files:** +- Modify: `backend/app/models/template.py` +- Modify: `backend/app/models/printer.py` +- Modify: `backend/app/models/job.py` +- Modify: `backend/app/models/preset.py` +- Modify: `backend/app/models/printer_state.py` +- Modify: `backend/app/models/printer_status_cache.py` +- Create: `backend/tests/unit/models/test_datetime_columns.py` + +- [ ] **Step 1: Write failing test** + +```python +# backend/tests/unit/models/test_datetime_columns.py +"""Phase 7b Cluster 1c — every datetime column must be timezone-aware.""" + +import pytest +from sqlalchemy import DateTime + +from app.models.template import Template +from app.models.printer import Printer +from app.models.job import Job +from app.models.printer_state import PrinterState +from app.models.printer_status_cache import PrinterStatusCache + + +@pytest.mark.parametrize("model,columns", [ + (Template, ["created_at", "updated_at"]), + (Printer, ["created_at", "updated_at"]), + (Job, ["created_at", "updated_at", "printed_at"]), + (PrinterState, ["created_at", "updated_at"]), + (PrinterStatusCache, ["captured_at", "updated_at"]), +]) +def test_datetime_columns_are_timezone_aware(model, columns): + for col_name in columns: + col = model.__table__.columns[col_name] + assert isinstance(col.type, DateTime), f"{model.__name__}.{col_name} is not DateTime" + assert col.type.timezone is True, \ + f"{model.__name__}.{col_name} must be DateTime(timezone=True)" +``` + +(If a `preset` model has datetimes, add it to the parametrize list. If not, skip.) + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cd backend && uv run pytest tests/unit/models/test_datetime_columns.py -v +``` + +Expected: FAIL — most columns are `DateTime()` without `timezone=True`. + +- [ ] **Step 3: Update each model** + +For every datetime column in the listed models, replace the existing column declaration with: + +```python +from datetime import datetime, timezone +from sqlalchemy import Column, DateTime +from sqlmodel import Field + +class Template(SQLModel, table=True): + # ... other columns ... + created_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc), + sa_column=Column(DateTime(timezone=True), nullable=False), + ) + updated_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc), + sa_column=Column( + DateTime(timezone=True), + nullable=False, + onupdate=lambda: datetime.now(timezone.utc), + ), + ) +``` + +For `Job.printed_at` (nullable): + +```python +printed_at: datetime | None = Field( + default=None, + sa_column=Column(DateTime(timezone=True), nullable=True), +) +``` + +For `PrinterStatusCache.captured_at` (nullable): + +```python +captured_at: datetime | None = Field( + default=None, + sa_column=Column(DateTime(timezone=True), nullable=True), +) +``` + +- [ ] **Step 4: Run tests to verify they pass** + +```bash +cd backend && uv run pytest tests/unit/models/test_datetime_columns.py -v +cd backend && uv run pytest tests/integration/api/test_api_datetime_format.py -v +``` + +Expected: all green. + +- [ ] **Step 5: Commit** + +```bash +git add backend/app/models/ backend/tests/unit/models/test_datetime_columns.py +git commit -m "$(cat <<'EOF' +refactor(api): SQLAlchemy datetime columns are timezone-aware UTC + +Every model column (templates/printers/jobs/presets/printer_state/ +printer_status_cache) now uses DateTime(timezone=True) with +default_factory=lambda: datetime.now(timezone.utc). Fresh inserts +write tz-aware values that survive the SQLite roundtrip. + +Refs #22 +EOF +)" +``` + +### Task B5: Alembic data migration for existing rows + +**Files:** +- Create: `backend/alembic/versions/20260517_phase7b_datetime_tz.py` +- Create: `backend/tests/integration/db/test_alembic_phase7b_migration.py` + +- [ ] **Step 1: Write failing test** + +```python +# backend/tests/integration/db/test_alembic_phase7b_migration.py +"""Phase 7b — datetime data migration is idempotent and adds +00:00 to naive rows.""" + +import pytest +from sqlalchemy import text + +from alembic import command +from alembic.config import Config + + +pytestmark = pytest.mark.asyncio + + +def _alembic_config(db_url: str) -> Config: + cfg = Config("backend/alembic.ini") + cfg.set_main_option("sqlalchemy.url", db_url) + cfg.attributes["configure_logger"] = False + return cfg + + +async def test_migration_adds_tz_to_naive_rows(empty_sqlite_db, async_engine): + """Insert naive datetimes, run migration, assert all rows have +00:00.""" + # 1. upgrade to head-1 (pre-7b) + cfg = _alembic_config(empty_sqlite_db.url) + command.upgrade(cfg, "head") + # 2. insert a naive datetime row + async with async_engine.begin() as conn: + await conn.execute(text( + "INSERT INTO templates (id, key, name, app, printer_model, tape_width_mm, " + "schema_version, definition, source, created_at, updated_at) " + "VALUES ('11111111-1111-1111-1111-111111111111', 'k', 'n', NULL, 'pt-series', " + "12, 1, '{}', 'seed', '2026-05-17T12:00:00', '2026-05-17T12:00:00')" + )) + # 3. patch the row to look like a pre-7b naive timestamp (alembic upgrade did not + # create this — the row was inserted manually above; we just want to verify the + # migration is idempotent on already-correct rows). Run migration a second time. + command.upgrade(cfg, "head") + async with async_engine.begin() as conn: + result = await conn.execute(text("SELECT created_at FROM templates")) + row = result.first() + assert "+00:00" in row[0] or row[0].endswith("Z"), \ + f"created_at not normalised: {row[0]!r}" + + +async def test_migration_idempotent_on_already_tz_aware_rows(async_engine): + """Running the migration twice does not append +00:00 twice.""" + cfg = _alembic_config("sqlite:///:memory:") + command.upgrade(cfg, "head") + command.upgrade(cfg, "head") # idempotent + # no exception, no doubled suffix +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cd backend && uv run pytest tests/integration/db/test_alembic_phase7b_migration.py -v +``` + +Expected: FAIL with `Can't locate revision identified by '20260517_phase7b_datetime_tz'`. + +- [ ] **Step 3: Write the migration** + +```python +# backend/alembic/versions/20260517_phase7b_datetime_tz.py +"""Phase 7b — normalise existing datetime rows to timezone-aware ISO strings. + +Existing rows in templates/printers/jobs/presets/printer_state/printer_status_cache +were inserted with naive datetimes when DateTime() lacked timezone=True. The Go +frontend's oapi-codegen client rejects them with `cannot parse "" as "Z07:00"`. + +This migration is idempotent: it only updates rows whose datetime strings do NOT +already contain `+` or `Z`. + +Revision ID: 20260517_phase7b_datetime_tz +Revises: +Create Date: 2026-05-17 +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "20260517_phase7b_datetime_tz" +down_revision = "" # implementer: run `cd backend && uv run alembic heads` +branch_labels = None +depends_on = None + +_TABLES_DT = [ + ("templates", ["created_at", "updated_at"]), + ("printers", ["created_at", "updated_at"]), + ("jobs", ["created_at", "updated_at", "printed_at"]), + ("presets", ["created_at", "updated_at"]), + ("printer_state", ["created_at", "updated_at"]), + ("printer_status_cache", ["captured_at", "updated_at"]), +] + + +def upgrade() -> None: + for table, cols in _TABLES_DT: + for col in cols: + op.execute( + f"UPDATE {table} SET {col} = {col} || '+00:00' " + f"WHERE {col} IS NOT NULL " + f"AND {col} NOT LIKE '%+%' " + f"AND {col} NOT LIKE '%Z'" + ) + + +def downgrade() -> None: + # Datetime suffix-stripping is risky and the prior naive behaviour is + # the bug being fixed — downgrade is a no-op. + pass +``` + +The implementer must replace `` with the result of `cd backend && uv run alembic heads` BEFORE running the test. List `presets` only if a `presets` table exists in the schema; otherwise remove that line. + +- [ ] **Step 4: Run tests to verify they pass** + +```bash +cd backend && uv run alembic upgrade head +cd backend && uv run pytest tests/integration/db/test_alembic_phase7b_migration.py -v +``` + +Expected: 2 passed. + +- [ ] **Step 5: Commit** + +```bash +git add backend/alembic/versions/20260517_phase7b_datetime_tz.py backend/tests/integration/db/test_alembic_phase7b_migration.py +git commit -m "$(cat <<'EOF' +fix(api): alembic data migration normalises naive datetimes to UTC + +Existing rows from Phase 5 inserts contain naive datetimes that break +the Go frontend's RFC3339 parser. Migration appends '+00:00' to any +value without an explicit TZ marker. Idempotent via WHERE NOT LIKE. + +Refs #22 +EOF +)" +``` + +--- + +## Phase C — Cluster 1b: Printer Identity + +### Task C1: derive_printer_id helper + +**Files:** +- Create: `backend/app/services/printer_identity.py` +- Create: `backend/tests/unit/services/test_printer_identity.py` + +- [ ] **Step 1: Write failing test** + +```python +# backend/tests/unit/services/test_printer_identity.py +from uuid import UUID + +from app.services.printer_identity import derive_printer_id + + +def test_same_inputs_produce_same_uuid(): + a = derive_printer_id("PT-P750W", "192.0.2.50", 9100) + b = derive_printer_id("PT-P750W", "192.0.2.50", 9100) + assert a == b + + +def test_host_change_produces_different_uuid(): + a = derive_printer_id("PT-P750W", "192.0.2.50", 9100) + b = derive_printer_id("PT-P750W", "192.0.2.51", 9100) + assert a != b + + +def test_returns_uuid_v5(): + out = derive_printer_id("PT-P750W", "192.0.2.50", 9100) + assert isinstance(out, UUID) + assert out.version == 5 + + +def test_case_insensitive_model_normalised(): + """Model is upper/lower case but identity stays stable.""" + a = derive_printer_id("PT-P750W", "192.0.2.50", 9100) + b = derive_printer_id("pt-p750w", "192.0.2.50", 9100) + assert a == b +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cd backend && uv run pytest tests/unit/services/test_printer_identity.py -v +``` + +Expected: FAIL with `ModuleNotFoundError`. + +- [ ] **Step 3: Implement** + +```python +# backend/app/services/printer_identity.py +"""Deterministic printer UUIDv5 from environment configuration. + +Lifespan derives a printer.id from `(model, host, port)` so that the +runtime printer and the DB row share the same id across restarts. +The namespace UUID is a constant committed to the repo; identical +env values always produce the same printer.id. +""" + +from __future__ import annotations + +from uuid import UUID, uuid5 + +# Constant namespace for printer identity derivation. Do not change without +# a coordinated DB migration — would orphan all existing printer rows. +_PRINTER_NAMESPACE = UUID("6f1b3c7e-9d6a-4f48-9a8c-d4e0e1c5a3b2") + + +def derive_printer_id(model: str, host: str, port: int) -> UUID: + """Return a stable UUIDv5 for the (model, host, port) triple.""" + return uuid5(_PRINTER_NAMESPACE, f"{model.lower()}|{host}|{port}") +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +cd backend && uv run pytest tests/unit/services/test_printer_identity.py -v +``` + +Expected: 4 passed. + +- [ ] **Step 5: Commit** + +```bash +git add backend/app/services/printer_identity.py backend/tests/unit/services/test_printer_identity.py +git commit -m "$(cat <<'EOF' +feat(api): derive_printer_id helper for deterministic UUIDv5 + +Lifespan can now compute a stable printer.id from env config so +runtime printer and DB row share the same id across restarts. + +Refs #22 +EOF +)" +``` + +### Task C2: upsert_runtime_printer lifespan helper + +**Files:** +- Modify: `backend/app/db/lifespan.py` +- Create: `backend/tests/integration/db/test_lifespan_printer_upsert.py` + +- [ ] **Step 1: Write failing test** + +```python +# backend/tests/integration/db/test_lifespan_printer_upsert.py +import pytest +from sqlmodel import select + +from app.db.lifespan import upsert_runtime_printer +from app.models.printer import Printer +from app.services.printer_identity import derive_printer_id +from app.config import Settings + + +pytestmark = pytest.mark.asyncio + + +def _settings_with_pt750w() -> Settings: + return Settings( + printer_backend="ptouch", + printer_model="PT-P750W", + pt750w_host="192.0.2.50", + pt750w_port=9100, + printer_discover_via_snmp=False, + printer_snmp_community="public", + webhook_api_key="x" * 32, + ) + + +async def test_upsert_creates_row_when_db_empty(async_session_empty): + settings = _settings_with_pt750w() + expected_id = derive_printer_id("PT-P750W", "192.0.2.50", 9100) + + returned_id = await upsert_runtime_printer(async_session_empty, settings) + + assert returned_id == expected_id + result = await async_session_empty.execute(select(Printer)) + rows = list(result.scalars()) + assert len(rows) == 1 + assert rows[0].id == expected_id + assert rows[0].connection["host"] == "192.0.2.50" + + +async def test_upsert_is_idempotent(async_session_empty): + settings = _settings_with_pt750w() + a = await upsert_runtime_printer(async_session_empty, settings) + b = await upsert_runtime_printer(async_session_empty, settings) + assert a == b + result = await async_session_empty.execute(select(Printer)) + assert len(list(result.scalars())) == 1 + + +async def test_upsert_returns_none_when_no_env_printer(async_session_empty): + settings = Settings( + printer_backend="mock", + printer_model="", + pt750w_host=None, + ql820_host=None, + webhook_api_key="x" * 32, + ) + assert await upsert_runtime_printer(async_session_empty, settings) is None +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cd backend && uv run pytest tests/integration/db/test_lifespan_printer_upsert.py -v +``` + +Expected: FAIL — `upsert_runtime_printer` not defined. + +- [ ] **Step 3: Implement** + +Append to `backend/app/db/lifespan.py`: + +```python +from uuid import UUID + +from app.config import Settings +from app.models.printer import Printer +from app.services.printer_identity import derive_printer_id + + +async def upsert_runtime_printer( + session: AsyncSession, + settings: Settings, +) -> UUID | None: + """Upsert one Printer row from env config. Idempotent. Returns the UUID + or None when the env does not declare a printer (e.g. mock backend). + + Lifespan calls this between `seed_templates` and `ensure_printer_state` + so that ensure_printer_state can create a printer_state row for the + upserted printer. + """ + model = settings.printer_model + host = settings.pt750w_host or getattr(settings, "ql820_host", None) or "" + port = ( + settings.pt750w_port + if settings.pt750w_host + else getattr(settings, "ql820_port", 0) + ) + if not (model and host and port): + return None + + printer_id = derive_printer_id(model, host, port) + existing = await session.get(Printer, printer_id) + connection = { + "host": host, + "port": port, + "snmp": settings.printer_discover_via_snmp, + "snmp_community": settings.printer_snmp_community, + } + if existing is not None: + existing.name = f"{model} ({host})" + existing.connection = connection + existing.enabled = True + else: + session.add( + Printer( + id=printer_id, + name=f"{model} ({host})", + model=model.lower(), + backend=settings.printer_backend, + connection=connection, + enabled=True, + ) + ) + await session.flush() + return printer_id +``` + +- [ ] **Step 4: Run tests to verify they pass** + +```bash +cd backend && uv run pytest tests/integration/db/test_lifespan_printer_upsert.py -v +``` + +Expected: 3 passed. + +- [ ] **Step 5: Commit** + +```bash +git add backend/app/db/lifespan.py backend/tests/integration/db/test_lifespan_printer_upsert.py +git commit -m "$(cat <<'EOF' +feat(api): upsert_runtime_printer lifespan helper + +Creates or refreshes one DB Printer row from env config using the +deterministic UUIDv5 derived in C1. Idempotent across restarts. + +Refs #22 +EOF +)" +``` + +### Task C3: Driver `make_queue_printer` accepts optional printer_id + +**Files:** +- Modify: the file that defines `make_queue_printer` (grep `def make_queue_printer` in `backend/app/`) +- Create: `backend/tests/unit/printer_backends/test_make_queue_printer_id_param.py` + +- [ ] **Step 1: Locate the implementation** + +```bash +cd backend && grep -rn "def make_queue_printer" app/ +``` + +Expected: one or two hits in `app/printer_backends/` or `app/printer_models/`. Note the exact file/class. + +- [ ] **Step 2: Write failing test** + +```python +# backend/tests/unit/printer_backends/test_make_queue_printer_id_param.py +from uuid import UUID, uuid4 + +# adjust the import to match the file located in Step 1 +from app.printer_models.pt_series import PtSeriesDriver # EXAMPLE PATH + + +class _MockBackend: + pass + + +def test_make_queue_printer_accepts_optional_printer_id(): + driver = PtSeriesDriver(backend=_MockBackend()) + custom_id = uuid4() + queue_printer = driver.make_queue_printer(tape_registry=None, printer_id=custom_id) + assert queue_printer.id == custom_id + + +def test_make_queue_printer_generates_uuid_when_id_omitted(): + driver = PtSeriesDriver(backend=_MockBackend()) + queue_printer = driver.make_queue_printer(tape_registry=None) + assert isinstance(queue_printer.id, UUID) +``` + +- [ ] **Step 3: Run test to verify it fails** + +```bash +cd backend && uv run pytest tests/unit/printer_backends/test_make_queue_printer_id_param.py -v +``` + +Expected: FAIL — `TypeError: unexpected keyword argument 'printer_id'`. + +- [ ] **Step 4: Add `printer_id` param** + +Modify the driver method: + +```python +from uuid import UUID, uuid4 + +def make_queue_printer(self, tape_registry, printer_id: UUID | None = None): + pid = printer_id if printer_id is not None else uuid4() + return _QueuePrinter(id=pid, driver=self, tape_registry=tape_registry) +``` + +(Apply identical change to the QL-series driver if it has its own `make_queue_printer`.) + +- [ ] **Step 5: Run tests to verify they pass** + +```bash +cd backend && uv run pytest tests/unit/printer_backends/test_make_queue_printer_id_param.py -v +``` + +Expected: 2 passed. + +- [ ] **Step 6: Commit** + +```bash +git add backend/app/printer_models/ backend/tests/unit/printer_backends/test_make_queue_printer_id_param.py +git commit -m "$(cat <<'EOF' +refactor(api): driver.make_queue_printer accepts optional printer_id + +Lifespan can now hand the DB-deterministic UUID to the in-memory +queue printer so app.state.printer_id matches the DB row id. + +Refs #22 +EOF +)" +``` + +--- + +## Phase D — Cluster 1a: Lifespan Init-Order + +### Task D1: Defensive check in seed_templates + +**Files:** +- Modify: `backend/app/db/lifespan.py` +- Modify: `backend/tests/unit/test_lifespan.py` (add a test) + +- [ ] **Step 1: Write failing test** + +Add to `backend/tests/unit/test_lifespan.py`: + +```python +async def test_seed_templates_raises_on_empty_loader_cache(): + """Defensive check — empty TemplateLoader cache must abort, not silently no-op.""" + from app.db.lifespan import seed_templates + from app.services.template_loader import TemplateLoader + + TemplateLoader._cache.clear() + with pytest.raises(RuntimeError, match="empty TemplateLoader cache"): + async with async_session() as s: + await seed_templates(s, TemplateLoader) +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cd backend && uv run pytest tests/unit/test_lifespan.py::test_seed_templates_raises_on_empty_loader_cache -v +``` + +Expected: FAIL (currently it silently upserts 0 rows). + +- [ ] **Step 3: Add defensive check** + +In `backend/app/db/lifespan.py`, modify `seed_templates`: + +```python +async def seed_templates(session: AsyncSession, loader: type[TemplateLoader]) -> int: + """Idempotent YAML → DB upsert, delegated to ``loader.seed_db(session)``.""" + if not loader._cache: + raise RuntimeError( + "seed_templates called with empty TemplateLoader cache — " + "lifespan must call TemplateLoader.load_dir() before seed_templates()" + ) + return await loader.seed_db(session) +``` + +- [ ] **Step 4: Run tests to verify they pass** + +```bash +cd backend && uv run pytest tests/unit/test_lifespan.py -v +``` + +Expected: green. Other lifespan tests must not break — if they do, those tests need to call `TemplateLoader.load_dir()` before `seed_templates`. + +- [ ] **Step 5: Commit** + +```bash +git add backend/app/db/lifespan.py backend/tests/unit/test_lifespan.py +git commit -m "$(cat <<'EOF' +fix(api): seed_templates aborts on empty loader cache instead of silent no-op + +Prevents the Phase 7a bug where lifespan called seed_templates +before load_dir — cache empty, 0 rows upserted, no error, UI shows +no templates. Defensive RuntimeError surfaces the misordering loudly. + +Refs #22 +EOF +)" +``` + +### Task D2: Re-order lifespan + wire upsert_runtime_printer + plumb printer_id + +**Files:** +- Modify: `backend/app/main.py` +- Modify: `backend/app/db/lifespan.py` (docstring at top of file) +- Create: `backend/tests/integration/test_lifespan_seeds_and_upserts.py` + +- [ ] **Step 1: Write failing E2E test** + +```python +# backend/tests/integration/test_lifespan_seeds_and_upserts.py +"""Phase 7b Cluster 1a + 1b end-to-end: a fresh DB after lifespan contains +12 templates and 1 deterministic-id printer, and app.state.printer_id matches +the DB printer.id.""" + +import pytest +from sqlmodel import select +from httpx import ASGITransport, AsyncClient + +from app.main import app, lifespan +from app.db.engine import async_session +from app.models.printer import Printer +from app.models.template import Template +from app.services.printer_identity import derive_printer_id + + +pytestmark = pytest.mark.asyncio + + +async def test_fresh_lifespan_seeds_templates_and_creates_printer(empty_sqlite_db): + async with lifespan(app): + async with async_session() as s: + templates = list((await s.execute(select(Template))).scalars()) + printers = list((await s.execute(select(Printer))).scalars()) + assert len(templates) >= 12, f"expected >=12 seed templates, got {len(templates)}" + assert len(printers) == 1 + expected_id = derive_printer_id("PT-P750W", "192.0.2.50", 9100) + assert printers[0].id == expected_id + assert app.state.printer_id == expected_id +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cd backend && uv run pytest tests/integration/test_lifespan_seeds_and_upserts.py -v +``` + +Expected: FAIL — current lifespan seeds 0 templates and runtime id != DB id. + +- [ ] **Step 3: Re-order lifespan** + +In `backend/app/main.py` replace the existing block (currently around lines 235–290): + +```python + settings = get_settings() + + # 1. DB schema first + await run_migrations() + await verify_alembic_at_head(settings) # added in Task E1 + + # 2. Application state BEFORE DB writes + if _SEED_TEMPLATES_DIR.exists(): + TemplateLoader.load_dir(_SEED_TEMPLATES_DIR) + else: + raise RuntimeError(f"Seed templates dir missing: {_SEED_TEMPLATES_DIR}") + + # 3. Plugin registry (idempotent) + if not IntegrationRegistry.names(): + _integrations_init._discover_plugins() + ModelRegistry.ensure_discovered() + + # 4. DB-bound init (cache is populated, plugins are loaded) + async with async_session() as s: + await recover_inflight_jobs(s) + await seed_templates(s, TemplateLoader) + db_printer_id = await upsert_runtime_printer(s, settings) + await ensure_printer_state(s) + await s.commit() + + # 5. Discovery hardware + runtime printer + discovery_host = settings.pt750w_host or "" + if discovery_host and settings.printer_discover_via_snmp: + model_id = await _resolve_model_id(settings, discovery_host) + else: + model_id = settings.printer_model + if not model_id: + raise ValueError( + "printer_model is empty and SNMP discovery is disabled. " + "Set PRINTER_HUB_PRINTER_MODEL or enable SNMP discovery." + ) + + backend = _build_backend(settings) + driver_cls = ModelRegistry.find_by_model_id(model_id) + driver = driver_cls(backend=backend) + + tape_registry = TapeRegistry() + printer = driver.make_queue_printer(tape_registry, printer_id=db_printer_id) + # ... rest unchanged (EventBus, producers, app.state, …) +``` + +(Skip the `verify_alembic_at_head` line — it lands in Phase E. For now the call will not exist; this task adds it. After Phase E lands the import is satisfied.) + +To keep this task self-contained: add a temporary stub at the top of `lifespan.py` if needed, then replace in E1. + +Update the `app/db/lifespan.py` top-of-file docstring to list the 6-step order: + +```python +"""FastAPI startup helpers. + +Call order in main.py lifespan: + 1. run_migrations() — apply alembic upgrade head + 2. verify_alembic_at_head() — fail-fast on revision drift (Cluster 1d) + 3. TemplateLoader.load_dir()— populate the in-memory template cache (Cluster 1a) + 4. recover_inflight_jobs() — mark stale jobs as FAILED_RESTART + 5. seed_templates() — YAML → DB upsert (defensive check on cache) + 6. upsert_runtime_printer() — env → DB Printer row (Cluster 1b) + 7. ensure_printer_state() — printer_state row per Printer +""" +``` + +- [ ] **Step 4: Run tests to verify they pass** + +```bash +cd backend && uv run pytest tests/integration/test_lifespan_seeds_and_upserts.py -v +cd backend && uv run pytest tests/unit/test_lifespan.py -v +``` + +Expected: all green. + +- [ ] **Step 5: Commit** + +```bash +git add backend/app/main.py backend/app/db/lifespan.py backend/tests/integration/test_lifespan_seeds_and_upserts.py +git commit -m "$(cat <<'EOF' +fix(api): re-order lifespan — load_dir before seed_templates + upsert printer + +Calls TemplateLoader.load_dir() before seed_templates(), and adds +upsert_runtime_printer(s, settings) between seed_templates and +ensure_printer_state. Hands the resulting DB UUID to +driver.make_queue_printer so app.state.printer_id matches the DB row. + +Closes the Phase 7a bug where a fresh deploy showed 0 templates and 0 +printers in the UI. + +Refs #22 +EOF +)" +``` + +--- + +## Phase E — Cluster 1d: Alembic Verify + +### Task E1: verify_alembic_at_head + +**Files:** +- Modify: `backend/app/db/lifespan.py` +- Create: `backend/tests/unit/test_alembic_verify.py` + +- [ ] **Step 1: Write failing test** + +```python +# backend/tests/unit/test_alembic_verify.py +import pytest + +from app.db.lifespan import verify_alembic_at_head +from app.config import Settings + + +pytestmark = pytest.mark.asyncio + + +async def test_verify_passes_when_db_at_head(empty_sqlite_db, settings_at_head): + # alembic upgrade head was run in the fixture + await verify_alembic_at_head(settings_at_head) # no raise + + +async def test_verify_raises_on_stale_db(stale_sqlite_db, settings_at_head): + """DB at one revision behind head → RuntimeError mentioning drift.""" + with pytest.raises(RuntimeError, match="migration drift"): + await verify_alembic_at_head(settings_at_head) +``` + +The two fixtures `empty_sqlite_db` (advances to head) and `stale_sqlite_db` (advances to head-1) live in `backend/tests/conftest.py` — add them there: + +```python +@pytest.fixture +async def empty_sqlite_db(tmp_path): + from alembic import command + from alembic.config import Config + db = tmp_path / "fresh.db" + cfg = Config("backend/alembic.ini") + cfg.set_main_option("sqlalchemy.url", f"sqlite:///{db}") + command.upgrade(cfg, "head") + yield db + + +@pytest.fixture +async def stale_sqlite_db(tmp_path): + from alembic import command + from alembic.config import Config + db = tmp_path / "stale.db" + cfg = Config("backend/alembic.ini") + cfg.set_main_option("sqlalchemy.url", f"sqlite:///{db}") + command.upgrade(cfg, "-1") # head minus 1 + yield db + + +@pytest.fixture +def settings_at_head(tmp_path): + from app.config import Settings + db = tmp_path / "settings_db.db" + return Settings( + database_url=f"sqlite+aiosqlite:///{db}", + printer_backend="mock", + webhook_api_key="x" * 32, + ) +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cd backend && uv run pytest tests/unit/test_alembic_verify.py -v +``` + +Expected: FAIL — `verify_alembic_at_head` not defined. + +- [ ] **Step 3: Implement** + +Append to `backend/app/db/lifespan.py`: + +```python +import asyncio +from pathlib import Path + +from sqlalchemy import create_engine + +from app.config import Settings + + +async def verify_alembic_at_head(settings: Settings) -> None: + """Raise RuntimeError if DB revision != alembic head. + + Takes Settings explicitly so the function is unit-testable without + depending on the lru_cache'd get_settings() singleton. + """ + from alembic.config import Config + from alembic.script import ScriptDirectory + from alembic.runtime.migration import MigrationContext + + ini_path = Path(__file__).resolve().parents[2] / "alembic.ini" + + def _check() -> tuple[str | None, str | None]: + cfg = Config(str(ini_path)) + script = ScriptDirectory.from_config(cfg) + head_rev = script.get_current_head() + sync_url = settings.database_url.replace("+aiosqlite", "") + engine = create_engine(sync_url) + with engine.connect() as conn: + ctx = MigrationContext.configure(conn) + current_rev = ctx.get_current_revision() + return current_rev, head_rev + + current_rev, head_rev = await asyncio.to_thread(_check) + if current_rev != head_rev: + raise RuntimeError( + f"Alembic migration drift detected: " + f"DB at {current_rev!r}, expected head {head_rev!r}" + ) +``` + +- [ ] **Step 4: Run tests to verify they pass** + +```bash +cd backend && uv run pytest tests/unit/test_alembic_verify.py tests/integration/test_lifespan_seeds_and_upserts.py -v +``` + +Expected: all green (the lifespan integration test now uses the function added in D2). + +- [ ] **Step 5: Commit** + +```bash +git add backend/app/db/lifespan.py backend/tests/unit/test_alembic_verify.py backend/tests/conftest.py +git commit -m "$(cat <<'EOF' +feat(api): verify_alembic_at_head fails fast on revision drift + +Lifespan calls verify_alembic_at_head(settings) right after +run_migrations(). If the DB revision deviates from the script head +(e.g. partial migration, downgrade, missing script file) the lifespan +raises and the container fails to start with a clear log message. + +Refs #22 +EOF +)" +``` + +--- + +## Phase F — Cluster 1e: /readiness Endpoint + +### Task F1: CheckStatus + ReadinessResponse schema + +**Files:** +- Create: `backend/app/schemas/readiness.py` +- Create: `backend/tests/unit/schemas/test_readiness_schema.py` + +- [ ] **Step 1: Write failing test** + +```python +# backend/tests/unit/schemas/test_readiness_schema.py +from app.schemas.readiness import CheckStatus, ReadinessResponse + + +def test_check_status_minimum_fields(): + c = CheckStatus(status="ok") + assert c.status == "ok" + assert c.detail is None + assert c.metric is None + + +def test_readiness_response_aggregate(): + body = ReadinessResponse( + status="ready", + checks={"database": CheckStatus(status="ok", metric={"latency_ms": 0.8})}, + version="dev", + revision="abc", + ) + assert body.status == "ready" + assert body.checks["database"].metric == {"latency_ms": 0.8} +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cd backend && uv run pytest tests/unit/schemas/test_readiness_schema.py -v +``` + +Expected: FAIL — module not found. + +- [ ] **Step 3: Implement** + +```python +# backend/app/schemas/readiness.py +"""Phase 7b Cluster 1e — readiness response shape.""" + +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import BaseModel, ConfigDict + + +class CheckStatus(BaseModel): + model_config = ConfigDict(frozen=True) + + status: Literal["ok", "fail", "skipped", "stale"] + detail: str | None = None + metric: dict[str, Any] | None = None + + +class ReadinessResponse(BaseModel): + model_config = ConfigDict(frozen=True) + + status: Literal["ready", "degraded", "not-ready"] + checks: dict[str, CheckStatus] + version: str + revision: str +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +cd backend && uv run pytest tests/unit/schemas/test_readiness_schema.py -v +``` + +- [ ] **Step 5: Commit** + +```bash +git add backend/app/schemas/readiness.py backend/tests/unit/schemas/test_readiness_schema.py +git commit -m "$(cat <<'EOF' +feat(api): readiness response schema (CheckStatus + ReadinessResponse) + +Frozen Pydantic models for the new /readiness deep-check endpoint +introduced by Phase 7b Cluster 1e. + +Refs #22 +EOF +)" +``` + +### Task F2: build_readiness_response — first half (database/alembic/template_seed/printer_runtime) + +**Files:** +- Create: `backend/app/services/readiness.py` +- Create: `backend/tests/integration/test_readiness_endpoint.py` + +- [ ] **Step 1: Write failing test (first 4 checks only)** + +```python +# backend/tests/integration/test_readiness_endpoint.py +"""Phase 7b Cluster 1e — /readiness deep check.""" + +import pytest + +pytestmark = pytest.mark.asyncio + + +async def test_readiness_database_check_ok(api_client_with_seed): + resp = await api_client_with_seed.get("/readiness") + body = resp.json() + assert body["checks"]["database"]["status"] == "ok" + assert "latency_ms" in body["checks"]["database"]["metric"] + + +async def test_readiness_alembic_check_ok(api_client_with_seed): + resp = await api_client_with_seed.get("/readiness") + body = resp.json() + assert body["checks"]["alembic"]["status"] == "ok" + + +async def test_readiness_template_seed_check_ok(api_client_with_seed): + resp = await api_client_with_seed.get("/readiness") + body = resp.json() + assert body["checks"]["template_seed"]["status"] == "ok" + assert body["checks"]["template_seed"]["metric"]["templates_in_db"] >= 1 + + +async def test_readiness_printer_runtime_check_ok(api_client_with_seed): + resp = await api_client_with_seed.get("/readiness") + body = resp.json() + assert body["checks"]["printer_runtime"]["status"] == "ok" +``` + +(The `/readiness` endpoint itself lands in Task F4; for now these tests fail with 404. That is the expected RED state.) + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +cd backend && uv run pytest tests/integration/test_readiness_endpoint.py -v +``` + +Expected: 4 FAIL with 404. + +- [ ] **Step 3: Implement first 4 checks** + +```python +# backend/app/services/readiness.py +"""Phase 7b Cluster 1e — readiness aggregator.""" + +from __future__ import annotations + +import time +from datetime import datetime, timezone +from typing import Any + +from sqlalchemy import select, func, text +from sqlalchemy.ext.asyncio import AsyncSession + +from app.models.template import Template +from app.schemas.readiness import CheckStatus, ReadinessResponse + + +async def _check_database(session: AsyncSession) -> CheckStatus: + try: + t0 = time.monotonic() + await session.execute(text("SELECT 1")) + latency_ms = round((time.monotonic() - t0) * 1000, 2) + return CheckStatus(status="ok", metric={"latency_ms": latency_ms}) + except Exception as exc: # noqa: BLE001 — surface in the API + return CheckStatus(status="fail", detail=str(exc)) + + +async def _check_alembic(settings) -> CheckStatus: + from app.db.lifespan import verify_alembic_at_head + try: + await verify_alembic_at_head(settings) + return CheckStatus(status="ok") + except Exception as exc: # noqa: BLE001 + return CheckStatus(status="fail", detail=str(exc)) + + +async def _check_template_seed(session: AsyncSession) -> CheckStatus: + count = await session.scalar(select(func.count()).select_from(Template)) + if (count or 0) >= 1: + return CheckStatus(status="ok", metric={"templates_in_db": count}) + return CheckStatus( + status="fail", + detail="Templates table is empty — lifespan init-order regression?", + metric={"templates_in_db": 0}, + ) + + +def _check_printer_runtime(app_state) -> CheckStatus: + pid = getattr(app_state, "printer_id", None) + if pid is None: + return CheckStatus(status="fail", detail="app.state.printer_id is None") + return CheckStatus(status="ok", metric={"printer_id": str(pid)}) + + +async def build_readiness_response( + session: AsyncSession, + app_state, + settings, + *, + version: str, + revision: str, +) -> ReadinessResponse: + checks: dict[str, CheckStatus] = { + "database": await _check_database(session), + "alembic": await _check_alembic(settings), + "template_seed": await _check_template_seed(session), + "printer_runtime": _check_printer_runtime(app_state), + } + return ReadinessResponse( + status=_aggregate(checks), + checks=checks, + version=version, + revision=revision, + ) + + +def _aggregate(checks: dict[str, CheckStatus]) -> str: + critical = {"database", "alembic", "template_seed"} + if any(checks[name].status == "fail" for name in critical if name in checks): + return "not-ready" + if any(c.status == "fail" for c in checks.values()): + return "degraded" + return "ready" +``` + +- [ ] **Step 4: Tests stay RED for now — endpoint added in F4** + +Skip the run; F4 wires the endpoint. + +- [ ] **Step 5: Commit** + +```bash +git add backend/app/services/readiness.py +git commit -m "$(cat <<'EOF' +feat(api): readiness aggregator — database/alembic/templates/printer_runtime + +First four /readiness checks plus the ready/degraded/not-ready aggregation +rule. Endpoint wiring follows in the next task. + +Refs #22 +EOF +)" +``` + +### Task F3: build_readiness_response — second half (printer_db_sync, snmp_discovery, print_queue, sse_bus) + +**Files:** +- Modify: `backend/app/services/readiness.py` +- Modify: `backend/tests/integration/test_readiness_endpoint.py` + +- [ ] **Step 1: Extend test file** + +```python +async def test_readiness_printer_db_sync_ok(api_client_with_seed): + resp = await api_client_with_seed.get("/readiness") + body = resp.json() + assert body["checks"]["printer_db_sync"]["status"] == "ok" + + +async def test_readiness_snmp_check_stale_when_no_probe_yet(api_client_with_seed): + resp = await api_client_with_seed.get("/readiness") + body = resp.json() + # The fixture does not run a probe, so the cache is empty → fail/stale acceptable + assert body["checks"]["snmp_discovery"]["status"] in {"stale", "fail", "skipped"} + + +async def test_readiness_aggregate_status_value(api_client_with_seed): + resp = await api_client_with_seed.get("/readiness") + body = resp.json() + assert body["status"] in {"ready", "degraded", "not-ready"} +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +cd backend && uv run pytest tests/integration/test_readiness_endpoint.py -v +``` + +Expected: still FAIL on 404, plus 3 new will fail. + +- [ ] **Step 3: Add remaining 4 checks** + +In `backend/app/services/readiness.py`: + +```python +from app.models.printer import Printer +from app.models.printer_status_cache import PrinterStatusCache + + +async def _check_printer_db_sync(session: AsyncSession, app_state) -> CheckStatus: + pid = getattr(app_state, "printer_id", None) + if pid is None: + return CheckStatus(status="skipped", detail="No runtime printer") + row = await session.get(Printer, pid) + if row is None: + return CheckStatus( + status="fail", + detail=f"app.state.printer_id={pid} has no matching DB row", + ) + return CheckStatus(status="ok") + + +async def _check_snmp_discovery(session: AsyncSession, app_state) -> CheckStatus: + pid = getattr(app_state, "printer_id", None) + if pid is None: + return CheckStatus(status="skipped", detail="No runtime printer") + row = await session.get(PrinterStatusCache, pid) + if row is None or row.captured_at is None: + return CheckStatus(status="fail", detail="No SNMP probe recorded yet") + age_s = (datetime.now(timezone.utc) - row.captured_at).total_seconds() + metric = {"last_probe_age_s": int(age_s)} + if age_s < 90: + return CheckStatus(status="ok", metric=metric) + if age_s < 600: + return CheckStatus(status="stale", detail=f"{int(age_s)}s ago (>90s)", metric=metric) + return CheckStatus(status="fail", detail=f"{int(age_s)}s ago (>600s) — printer offline?", metric=metric) + + +def _check_print_queue(app_state) -> CheckStatus: + queue = getattr(app_state, "print_queue", None) + if queue is None: + return CheckStatus(status="fail", detail="print_queue not in app.state") + worker_count = getattr(queue, "worker_count", lambda: 1)() + return CheckStatus(status="ok", metric={"worker_count": worker_count}) + + +def _check_sse_bus(app_state) -> CheckStatus: + bus = getattr(app_state, "event_bus", None) + if bus is None: + return CheckStatus(status="skipped", detail="event_bus not configured") + subs = getattr(bus, "subscriber_count", lambda: 0)() + max_subs = getattr(bus, "max_subscribers", 100) + metric = {"subscribers": subs, "max": max_subs} + if subs >= max_subs: + return CheckStatus(status="fail", detail="subscriber pool exhausted", metric=metric) + return CheckStatus(status="ok", metric=metric) +``` + +Extend `build_readiness_response`: + +```python +checks = { + "database": await _check_database(session), + "alembic": await _check_alembic(settings), + "template_seed": await _check_template_seed(session), + "printer_runtime": _check_printer_runtime(app_state), + "printer_db_sync": await _check_printer_db_sync(session, app_state), + "snmp_discovery": await _check_snmp_discovery(session, app_state), + "print_queue": _check_print_queue(app_state), + "sse_bus": _check_sse_bus(app_state), +} +``` + +- [ ] **Step 4: Wait for F4 to run tests** + +- [ ] **Step 5: Commit** + +```bash +git add backend/app/services/readiness.py backend/tests/integration/test_readiness_endpoint.py +git commit -m "$(cat <<'EOF' +feat(api): readiness aggregator — remaining 4 checks + +printer_db_sync, snmp_discovery (with <90s ok / <600s stale / else fail +thresholds), print_queue worker liveness, sse_bus subscriber capacity. + +Refs #22 +EOF +)" +``` + +### Task F4: /readiness route + HTTP status code mapping + +**Files:** +- Modify: `backend/app/main.py` (add the endpoint near /healthz) +- Modify: `backend/tests/integration/test_readiness_endpoint.py` (add HTTP-status tests) + +- [ ] **Step 1: Add HTTP-status assertions** + +Append to `backend/tests/integration/test_readiness_endpoint.py`: + +```python +async def test_readiness_returns_200_when_ready(api_client_with_seed): + resp = await api_client_with_seed.get("/readiness") + if resp.json()["status"] == "ready": + assert resp.status_code == 200 + + +async def test_readiness_returns_503_when_not_ready(api_client_with_broken_db): + """When the database check fails, status is not-ready → HTTP 503.""" + resp = await api_client_with_broken_db.get("/readiness") + assert resp.status_code == 503 + assert resp.json()["status"] == "not-ready" +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +cd backend && uv run pytest tests/integration/test_readiness_endpoint.py -v +``` + +Expected: many FAIL on 404. + +- [ ] **Step 3: Add /readiness endpoint in main.py** + +Near the `/healthz` endpoint in `backend/app/main.py`: + +```python +from fastapi import Depends, Response +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db.session import get_session +from app.schemas.readiness import ReadinessResponse +from app.services.readiness import build_readiness_response + + +@app.get( + "/readiness", + response_model=ReadinessResponse, + tags=["meta"], + responses={503: {"model": ReadinessResponse}}, +) +async def readiness( + response: Response, + session: AsyncSession = Depends(get_session), +) -> ReadinessResponse: + body = await build_readiness_response( + session, + app.state, + get_settings(), + version=HUB_VERSION, + revision=HUB_REVISION, + ) + if body.status == "not-ready": + response.status_code = 503 + return body +``` + +- [ ] **Step 4: Run tests to verify they pass** + +```bash +cd backend && uv run pytest tests/integration/test_readiness_endpoint.py -v +``` + +Expected: all green. + +- [ ] **Step 5: Commit** + +```bash +git add backend/app/main.py backend/tests/integration/test_readiness_endpoint.py +git commit -m "$(cat <<'EOF' +feat(api): expose /readiness deep-check endpoint + +Returns 200 with body.status in {ready, degraded} or 503 with status +not-ready when database/alembic/template_seed fail. Pangolin can +switch its healthcheck.path to /readiness — Docker keeps polling +/healthz for liveness-only. + +Refs #22 +EOF +)" +``` + +### Task F5: Confirm /healthz remains minimal + +**Files:** +- Modify: `backend/tests/integration/test_healthz_minimal.py` (add if missing) + +- [ ] **Step 1: Write the test** + +```python +# backend/tests/integration/test_healthz_minimal.py +"""Phase 7b Cluster 1e — /healthz never queries the database.""" + +import pytest + +pytestmark = pytest.mark.asyncio + + +async def test_healthz_returns_200_even_with_broken_db(api_client_with_broken_db): + """If DB explodes the liveness probe must still answer 200. + + Otherwise Docker autoheal restart-loops the container on transient DB + failures, which is exactly the opposite of what we want. + """ + resp = await api_client_with_broken_db.get("/healthz") + assert resp.status_code == 200 + assert resp.json()["status"] == "ok" +``` + +- [ ] **Step 2: Run test** + +```bash +cd backend && uv run pytest tests/integration/test_healthz_minimal.py -v +``` + +Expected: PASS — `/healthz` does not depend on the DB. + +- [ ] **Step 3: Commit** + +```bash +git add backend/tests/integration/test_healthz_minimal.py +git commit -m "$(cat <<'EOF' +test(api): regression guard — /healthz must answer 200 even when DB broken + +Locks in the Cluster 1e contract: liveness probe is restart-relevant, +readiness probe owns the deep checks. Prevents accidental DB queries +sneaking back into /healthz. + +Refs #22 +EOF +)" +``` + +--- + +## Phase G — Cluster 1f: Status Cache + +### Task G1: StatusProbeProducer writes printer_status_cache on success + +**Files:** +- Modify: `backend/app/services/producers/status_probe_producer.py` +- Create: `backend/tests/integration/test_status_cache_writer.py` + +- [ ] **Step 1: Locate _probe_once / on_probe_result** + +```bash +cd backend && grep -n "snmp_probe\|probe_once\|on_probe" app/services/producers/status_probe_producer.py +``` + +Note the existing method names so the new `_upsert_cache` call lands in the success path. + +- [ ] **Step 2: Write failing test** + +```python +# backend/tests/integration/test_status_cache_writer.py +"""Phase 7b Cluster 1f — StatusProbeProducer writes the printer_status_cache row.""" + +import pytest +from datetime import datetime, timezone +from uuid import uuid4 + +pytestmark = pytest.mark.asyncio + + +async def test_successful_probe_writes_cache(async_session_with_printer, mock_snmp_ok): + """A probe success path persists raw_block + parsed JSON + captured_at.""" + from app.services.producers.status_probe_producer import StatusProbeProducer + from app.services.event_bus import EventBus + + printer_id = async_session_with_printer.fixture_printer_id # set by fixture + producer = StatusProbeProducer( + bus=EventBus(), + printer_id=str(printer_id), + host="192.0.2.50", + interval_s=30, + community="public", + tape_change_producer=None, + ) + await producer._probe_once() # type: ignore[attr-defined] + + from app.models.printer_status_cache import PrinterStatusCache + row = await async_session_with_printer.get(PrinterStatusCache, printer_id) + assert row is not None + assert row.captured_at is not None + assert row.parsed["online"] is True + assert row.parsed["tape_width_mm"] == 12 + + +async def test_probe_failure_marks_offline(async_session_with_printer, mock_snmp_timeout): + from app.services.producers.status_probe_producer import StatusProbeProducer + from app.services.event_bus import EventBus + + printer_id = async_session_with_printer.fixture_printer_id + producer = StatusProbeProducer( + bus=EventBus(), + printer_id=str(printer_id), + host="192.0.2.50", + interval_s=30, + community="public", + tape_change_producer=None, + ) + await producer._probe_once() + + from app.models.printer_status_cache import PrinterStatusCache + row = await async_session_with_printer.get(PrinterStatusCache, printer_id) + assert row.parsed["online"] is False + assert "timeout" in row.parsed["last_error"].lower() +``` + +The two fixtures `mock_snmp_ok` and `mock_snmp_timeout` go in `backend/tests/conftest.py` — they monkeypatch the SNMP call to return a deterministic block (`tape_width_mm=12` etc) or raise `SnmpTimeoutError`. + +- [ ] **Step 3: Run tests to verify they fail** + +```bash +cd backend && uv run pytest tests/integration/test_status_cache_writer.py -v +``` + +Expected: FAIL — no cache writes happen today. + +- [ ] **Step 4: Add `_upsert_cache` and `_mark_offline`** + +In `backend/app/services/producers/status_probe_producer.py`: + +```python +from datetime import datetime, timezone +from app.db.engine import async_session +from app.models.printer_status_cache import PrinterStatusCache + + +async def _upsert_cache(self, snmp_result) -> None: + """Persist a successful SNMP probe into printer_status_cache.""" + parsed = { + "online": True, + "tape_width_mm": getattr(snmp_result, "tape_width_mm", None), + "tape_color": getattr(snmp_result, "tape_color", None), + "text_color": getattr(snmp_result, "text_color", None), + "model_id": getattr(snmp_result, "model_id", None), + } + raw_block = getattr(snmp_result, "raw_block", None) + async with async_session() as s: + row = await s.get(PrinterStatusCache, self._printer_id) + if row is not None: + row.parsed = parsed + row.raw_block = raw_block + row.captured_at = datetime.now(timezone.utc) + else: + s.add( + PrinterStatusCache( + printer_id=self._printer_id, + parsed=parsed, + raw_block=raw_block, + captured_at=datetime.now(timezone.utc), + ) + ) + await s.commit() + + +async def _mark_offline(self, exc: Exception) -> None: + """Persist a failed probe; preserves any previous parsed fields.""" + async with async_session() as s: + row = await s.get(PrinterStatusCache, self._printer_id) + parsed = dict(row.parsed) if (row is not None and row.parsed) else {} + parsed["online"] = False + parsed["last_error"] = str(exc) + if row is not None: + row.parsed = parsed + row.captured_at = datetime.now(timezone.utc) + else: + s.add( + PrinterStatusCache( + printer_id=self._printer_id, + parsed=parsed, + captured_at=datetime.now(timezone.utc), + ) + ) + await s.commit() +``` + +Then wire them into the existing `_probe_once`: + +```python +async def _probe_once(self) -> None: + try: + snmp_result = await self._snmp_probe() + except SnmpTimeoutError as exc: + await self._mark_offline(exc) + await self._publish_event_offline(exc) + return + await self._upsert_cache(snmp_result) + await self._publish_event(snmp_result) +``` + +- [ ] **Step 5: Run tests to verify they pass** + +```bash +cd backend && uv run pytest tests/integration/test_status_cache_writer.py -v +``` + +Expected: 2 passed. + +- [ ] **Step 6: Commit** + +```bash +git add backend/app/services/producers/status_probe_producer.py backend/tests/integration/test_status_cache_writer.py backend/tests/conftest.py +git commit -m "$(cat <<'EOF' +feat(status): StatusProbeProducer persists printer_status_cache rows + +Every probe success writes raw_block + parsed JSON + captured_at; +SNMP timeouts persist online=False + last_error in the parsed JSON. +No schema change — uses existing Phase 5 columns. + +Refs #22 +EOF +)" +``` + +### Task G2: PrinterStatus schema extensions + +**Files:** +- Modify: `backend/app/schemas/printer.py` +- Create: `backend/tests/unit/schemas/test_printer_status_fields.py` + +- [ ] **Step 1: Write failing test** + +```python +# backend/tests/unit/schemas/test_printer_status_fields.py +from datetime import datetime, timezone +from uuid import uuid4 +from app.schemas.printer import PrinterStatus + + +def test_printer_status_minimal_fields(): + s = PrinterStatus(printer_id=uuid4(), online=None, captured_at=None) + assert s.online is None + assert s.captured_at is None + + +def test_printer_status_full_fields(): + pid = uuid4() + now = datetime.now(timezone.utc) + s = PrinterStatus( + printer_id=pid, + online=True, + tape_width_mm=12, + captured_at=now, + last_probe_age_s=15, + last_error=None, + note=None, + ) + assert s.online is True + assert s.last_probe_age_s == 15 + + +def test_printer_status_pending_with_note(): + s = PrinterStatus( + printer_id=uuid4(), + online=None, + captured_at=None, + note="No probe yet — wait up to 30s", + ) + assert s.note.startswith("No probe yet") +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cd backend && uv run pytest tests/unit/schemas/test_printer_status_fields.py -v +``` + +Expected: FAIL — most fields not present. + +- [ ] **Step 3: Extend PrinterStatus** + +In `backend/app/schemas/printer.py`: + +```python +from datetime import datetime +from uuid import UUID + +from pydantic import BaseModel, ConfigDict, field_serializer + +from app.schemas._datetime import serialize_datetime_utc + + +class PrinterStatus(BaseModel): + """Cached SNMP status surfaced by GET /api/printers/{id}/status.""" + + model_config = ConfigDict(frozen=True) + + printer_id: UUID + online: bool | None = None + tape_width_mm: int | None = None + tape_color: str | None = None + text_color: str | None = None + captured_at: datetime | None = None + last_probe_age_s: int | None = None + last_error: str | None = None + note: str | None = None + + @field_serializer("captured_at") + def _serialise_captured_at(self, dt: datetime | None, _info): + return serialize_datetime_utc(dt, _info) if dt is not None else None +``` + +- [ ] **Step 4: Run tests to verify they pass** + +```bash +cd backend && uv run pytest tests/unit/schemas/test_printer_status_fields.py -v +``` + +Expected: 3 passed. + +- [ ] **Step 5: Commit** + +```bash +git add backend/app/schemas/printer.py backend/tests/unit/schemas/test_printer_status_fields.py +git commit -m "$(cat <<'EOF' +feat(status): PrinterStatus carries cache freshness + offline reason + +Adds captured_at, last_probe_age_s, last_error, note to the response +of /api/printers/{id}/status so the UI can render staleness and the +offline reason instead of guessing. + +Refs #22 +EOF +)" +``` + +### Task G3: REST endpoint reads cache, never blocks on SNMP + +**Files:** +- Modify: `backend/app/api/routes/printers.py` +- Create: `backend/tests/integration/test_status_endpoint_cached.py` + +- [ ] **Step 1: Write failing test** + +```python +# backend/tests/integration/test_status_endpoint_cached.py +"""Phase 7b Cluster 1f — /status returns cache; never blocks on SNMP.""" + +import asyncio +import time + +import pytest + +pytestmark = pytest.mark.asyncio + + +async def test_status_endpoint_returns_pending_when_cache_empty(api_client_with_seed): + """Cold start: cache row absent → 200 + online=null + note hint.""" + resp = await api_client_with_seed.get( + f"/api/printers/{api_client_with_seed.fixture_printer_id}/status" + ) + assert resp.status_code == 200 + body = resp.json() + assert body["online"] is None + assert "No probe yet" in body["note"] + + +async def test_status_endpoint_returns_under_100ms(api_client_with_warm_cache, mock_snmp_blocker): + """Even with SNMP blocked for 10s the endpoint must answer from cache.""" + pid = api_client_with_warm_cache.fixture_printer_id + t0 = time.monotonic() + resp = await api_client_with_warm_cache.get(f"/api/printers/{pid}/status") + elapsed_ms = (time.monotonic() - t0) * 1000 + assert resp.status_code == 200 + assert elapsed_ms < 100, f"endpoint blocked {elapsed_ms:.1f}ms" +``` + +`mock_snmp_blocker` patches `_snmp_probe` to `asyncio.sleep(10)` and would cause failure if the endpoint accidentally tries to invoke it. + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +cd backend && uv run pytest tests/integration/test_status_endpoint_cached.py -v +``` + +Expected: FAIL — current endpoint does sync SNMP. + +- [ ] **Step 3: Rewrite the endpoint** + +In `backend/app/api/routes/printers.py`, replace the body of the `/status` route: + +```python +from datetime import datetime, timezone +from uuid import UUID + +from fastapi import APIRouter, Depends +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db.session import get_session +from app.models.printer_status_cache import PrinterStatusCache +from app.schemas.printer import PrinterStatus + +router = APIRouter() + + +@router.get("/api/printers/{printer_id}/status", response_model=PrinterStatus) +async def get_printer_status( + printer_id: UUID, + session: AsyncSession = Depends(get_session), +) -> PrinterStatus: + """Return cached SNMP status. Never blocks on the printer. + + Fresh data arrives via SSE (Phase 6b) and via the periodic probe worker + every ``settings.sse_probe_interval_s`` seconds (default 30). + """ + row = await session.get(PrinterStatusCache, printer_id) + if row is None or row.captured_at is None: + return PrinterStatus( + printer_id=printer_id, + online=None, + captured_at=None, + note="No probe yet — wait up to 30s for first probe cycle", + ) + parsed = row.parsed or {} + age_s = (datetime.now(timezone.utc) - row.captured_at).total_seconds() + return PrinterStatus( + printer_id=printer_id, + online=parsed.get("online"), + tape_width_mm=parsed.get("tape_width_mm"), + tape_color=parsed.get("tape_color"), + text_color=parsed.get("text_color"), + captured_at=row.captured_at, + last_probe_age_s=int(age_s), + last_error=parsed.get("last_error"), + ) +``` + +- [ ] **Step 4: Run tests to verify they pass** + +```bash +cd backend && uv run pytest tests/integration/test_status_endpoint_cached.py -v +``` + +Expected: 2 passed. + +- [ ] **Step 5: Commit** + +```bash +git add backend/app/api/routes/printers.py backend/tests/integration/test_status_endpoint_cached.py +git commit -m "$(cat <<'EOF' +fix(status): /api/printers/{id}/status reads from cache, no sync SNMP + +Eliminates the 5-second block when the printer is offline. The +probe worker keeps printer_status_cache fresh in the background; +this endpoint returns whatever is there in <10 ms. + +Refs #22 +EOF +)" +``` + +--- + +## Phase H — Cluster 3: Frontend Proxy + +### Task H1: Mount /docs, /openapi.json, /redoc through the proxy + +**Files:** +- Modify: `frontend/cmd/server/main.go` (around lines 137–144) +- Modify: `frontend/cmd/server/main_test.go` + +- [ ] **Step 1: Write failing test** + +Add to `frontend/cmd/server/main_test.go`: + +```go +func TestProxyMountsBackendDocRoutes(t *testing.T) { + t.Parallel() + backend := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/docs": + w.Header().Set("Content-Type", "text/html") + io.WriteString(w, "Swagger UI") + case "/openapi.json": + w.Header().Set("Content-Type", "application/json") + io.WriteString(w, `{"openapi":"3.1.0"}`) + case "/redoc": + io.WriteString(w, "ReDoc") + default: + w.WriteHeader(http.StatusNotFound) + } + })) + defer backend.Close() + + r := newRouter(stubPageHandler(t), proxy.New(backend.URL), testStaticFS) + + for path, want := range map[string]string{ + "/docs": "Swagger UI", + "/openapi.json": `"openapi":"3.1.0"`, + "/redoc": "ReDoc", + } { + rec := httptest.NewRecorder() + r.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, path, nil)) + if rec.Code != http.StatusOK { + t.Fatalf("%s: got %d, want 200", path, rec.Code) + } + if !strings.Contains(rec.Body.String(), want) { + t.Errorf("%s: body = %q, want substring %q", path, rec.Body.String(), want) + } + } +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cd frontend && go test ./cmd/server/ -run TestProxyMountsBackendDocRoutes -v +``` + +Expected: FAIL — three 404s. + +- [ ] **Step 3: Add the mounts** + +In `frontend/cmd/server/main.go`, after the existing `r.Mount("/product", prx)` line: + +```go + // FastAPI auto-doc endpoints (Phase 7b Cluster 3). + r.Mount("/docs", prx) + r.Mount("/openapi.json", prx) + r.Mount("/redoc", prx) +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +cd frontend && go test ./cmd/server/ -v +``` + +Expected: all green. + +- [ ] **Step 5: Commit** + +```bash +git add frontend/cmd/server/main.go frontend/cmd/server/main_test.go +git commit -m "$(cat <<'EOF' +feat(frontend): proxy /docs, /openapi.json, /redoc to the backend + +Swagger UI and the raw OpenAPI document are now reachable under the +public domain (behind Pangolin SSO + the Basic-Auth bypass). Closes +the 404 reported in the production smoke test. + +Refs #22 +EOF +)" +``` + +--- + +## Phase I — Cluster 2: Documentation + +### Task I1: README mentions /readiness and the Phase 7b spec + +**Files:** +- Modify: `README.md` + +- [ ] **Step 1: Identify the section that documents the runtime API** + +```bash +cd /opt/repos/label-printer-hub && grep -n "healthz\|/api/" README.md | head -5 +``` + +- [ ] **Step 2: Add a paragraph just below the `/healthz` mention** + +```markdown +### Health probes + +The backend exposes two HTTP probes with different semantics: + +| Endpoint | Purpose | What it answers | +|---|---|---| +| `GET /healthz` | Liveness — for Docker/Kubernetes container restart | "the process and the event loop are alive" | +| `GET /readiness` | Readiness — for reverse-proxy routing | "the process can serve traffic right now": database connectable, alembic at head, templates seeded, runtime printer matches DB, SNMP probe fresh, queue worker alive | + +`/readiness` returns HTTP 200 with a `status` of `"ready"` or `"degraded"` (still routable), and HTTP 503 with `"not-ready"` when a critical check (database / alembic / template_seed) fails. See the Phase 7b foundation design in `docs/superpowers/specs/2026-05-17-phase-7b-foundation-design.md` for the full check list. +``` + +- [ ] **Step 3: Commit** + +```bash +git add README.md +git commit -m "$(cat <<'EOF' +docs(api): document /healthz vs /readiness contract in the README + +Explains the liveness/readiness split introduced in Phase 7b Cluster +1e and links to the spec for the full check list. + +Refs #22 +EOF +)" +``` + +--- + +## Phase J — Verification + +### Task J1: Full test suite + coverage gate + +**Files:** none + +- [ ] **Step 1: Run the full backend suite** + +```bash +cd backend && uv run pytest --cov=app --cov-report=term-missing -q +``` + +Expected: all tests pass and coverage ≥80 (the existing `fail_under = 80` in `pyproject.toml`). If coverage drops, add tests until back at or above the threshold — every cluster has at least one test file from the previous phases; gap-filling tests go into the same directories. + +- [ ] **Step 2: Type and lint checks** + +```bash +cd backend && uv run mypy app && uv run ruff check . && uv run ruff format --check . +``` + +Expected: all clean. + +- [ ] **Step 3: Frontend tests** + +```bash +cd frontend && go test ./... && go vet ./... +``` + +Expected: all clean. + +- [ ] **Step 4: oapi-codegen contract** + +```bash +cd frontend && make oapi-check # or: ./scripts/regen-and-diff-openapi.sh +``` + +Expected: generated client matches checked-in code. If not, re-generate and commit as `chore(frontend): regenerate oapi-codegen client for Phase 7b`. + +- [ ] **Step 5: Commit any regeneration or coverage gap fixes** + +If anything changed: + +```bash +git add -p +git commit -m "$(cat <<'EOF' +chore(ci): Phase 7b verification — regen client / coverage gap-fill + +Refs #22 +EOF +)" +``` + +### Task J2: Production smoke test against labels.example.com + +**Files:** none + +- [ ] **Step 1: Build + push images (CI does this on PR merge — only do it locally if testing pre-merge)** + +```bash +# (CI normally handles this — skip if testing post-merge) +``` + +- [ ] **Step 2: Pull Header-Auth credentials from the vault** + +```bash +# Vault item name documented in docs/policies/secrets.md (or whatever the maintainer points at) +# Result: BASIC_USER=claude-automation, BASIC_PASS=<64-hex> +``` + +- [ ] **Step 3: Hit /healthz and /readiness against the production resource** + +```bash +curl -fsS -u "claude-automation:${BASIC_PASS}" \ + https://labels.example.com/healthz +# expected: HTTP 200, {"status":"ok",...} + +curl -fsS -u "claude-automation:${BASIC_PASS}" \ + https://labels.example.com/readiness | jq +# expected: HTTP 200 with status=ready (printer online) OR status=degraded +# (printer offline, but database/alembic/template_seed all ok) +``` + +- [ ] **Step 4: Hit /docs through the proxy** + +```bash +curl -fsS -u "claude-automation:${BASIC_PASS}" \ + -o /dev/null -w '%{http_code}\n' https://labels.example.com/docs +# expected: 200 +curl -fsS -u "claude-automation:${BASIC_PASS}" \ + https://labels.example.com/openapi.json | jq '.info.title' +# expected: "label-printer-hub" (or whatever HUB_VERSION exposes) +``` + +- [ ] **Step 5: Hit /api/printers/{id}/status (cache fast-path)** + +```bash +PRINTER_ID=$(curl -fsS -u "claude-automation:${BASIC_PASS}" \ + https://labels.example.com/api/printers | jq -r '.[0].id') +time curl -fsS -u "claude-automation:${BASIC_PASS}" \ + https://labels.example.com/api/printers/${PRINTER_ID}/status | jq +# expected: response in well under 100 ms +``` + +- [ ] **Step 6: Hit the UI in a real browser** + +Navigate to `https://labels.example.com/` (SSO login via Pangolin), confirm: +- 12 templates render on `/templates` +- 1 printer renders on `/` +- Printer detail page shows live status (online/offline + tape width) +- No 503s + +Note results in the PR description as part of the production-smoke checklist. + +### Task J3: Push the branch and open the PR + +**Files:** none + +- [ ] **Step 1: Push** + +```bash +git push -u origin feat/phase-7b-foundation +``` + +- [ ] **Step 2: Open the PR** + +```bash +gh pr create --base main --head feat/phase-7b-foundation \ + --title "feat(api): Phase 7b foundation — init, datetime-TZ, /readiness, status cache, proxy widening" \ + --body "$(cat <<'EOF' +## Summary + +Implements the merged Phase 7b spec across nine clusters. Closes the +foundation gaps surfaced by the first production deploy. + +Highlights: +- Lifespan re-ordered (load_dir before seed_templates) + defensive check +- Deterministic UUIDv5 printer identity, lifespan auto-upsert +- DateTime(timezone=True) everywhere + Pydantic Z-suffix serialiser + idempotent Alembic data migration +- `verify_alembic_at_head(settings)` fails fast on revision drift +- New `/readiness` endpoint with 8 deep checks (200/503 mapping) +- `printer_status_cache` is now the source of truth for `/api/printers/{id}/status` (no sync SNMP) +- Frontend proxies `/docs`, `/openapi.json`, `/redoc` to the backend +- README documents the /healthz vs /readiness contract + +## Test plan + +- [x] `uv run pytest --cov=app` ≥ 80 +- [x] `uv run mypy app && uv run ruff check . && uv run ruff format --check .` +- [x] `go test ./... && go vet ./...` +- [x] Manual smoke against `labels.example.com` — /healthz, /readiness, /docs, /api/printers/{id}/status, UI in browser + +Refs #22 +EOF +)" +``` + +- [ ] **Step 3: Confirm PR opened, CI green, then hand off** + +Wait for CI + bot reviews. Address findings per `.claude/rules/review-feedback-policy.md` (≥15 min after each push, reply + resolve all threads, then squash-merge). + +--- + +## Self-review notes + +- **Spec coverage:** Every cluster (1a–3) has its own task. Cluster 2 = Task I1 (README + spec link). Cluster 3 = Task H1. +- **Ordering rationale:** 1c (datetime) lands first — every model touched downstream gets the correct column type from the outset. 1b (printer identity) lands before 1f (cache reader) so the cache always references a stable id. 1a (lifespan order) lands after 1b so the lifespan re-order already wires `upsert_runtime_printer` in. 1d (alembic verify) sits next to the lifespan changes. 1e and 1f are independent of each other but 1e tests reference the cache writer from 1f for the `snmp_discovery` check. +- **Scope discipline:** No tasks add unrelated features. Removal of the `printer_status_cache.last_error` column from the original spec (no schema change after all) is reflected in B5 — only data migration on existing rows, no DDL. +- **Estimated wall-clock under subagent-driven-development:** ~3.5 h (Phase B: ~60 min, C: ~30, D: ~30, E: ~15, F: ~45, G: ~30, H: ~10, I: ~5, J: ~30 minus review-loop time). + +--- + +## Execution Handoff + +Plan complete and saved to `docs/superpowers/plans/2026-05-17-phase-7b-foundation.md`. Two execution options: + +**1. Subagent-Driven (recommended)** — I dispatch a fresh subagent per task, two-stage review (spec compliance + code quality) between tasks, fast iteration. + +**2. Inline Execution** — Execute tasks in this session using `superpowers:executing-plans`, batch execution with checkpoints for review. + +Which approach? diff --git a/frontend/cmd/server/main.go b/frontend/cmd/server/main.go index 8af73f8..3d93c18 100644 --- a/frontend/cmd/server/main.go +++ b/frontend/cmd/server/main.go @@ -143,6 +143,13 @@ func newRouter(ph *handlers.PageHandler, prx http.Handler, staticSubFS fs.FS) *c r.Mount("/spool", prx) r.Mount("/product", prx) + // FastAPI auto-doc endpoints (Phase 7b Cluster 3). + // r.Handle is used (not r.Mount) so the full path is preserved when + // forwarded to the backend — chi.Mount strips the mount prefix. + r.Handle("/docs", prx) + r.Handle("/openapi.json", prx) + r.Handle("/redoc", prx) + return r } diff --git a/frontend/cmd/server/main_test.go b/frontend/cmd/server/main_test.go index 26ca0c8..c6b5e68 100644 --- a/frontend/cmd/server/main_test.go +++ b/frontend/cmd/server/main_test.go @@ -316,6 +316,65 @@ func min(a, b int) int { return b } +// TestProxyMountsBackendDocRoutes verifies that /docs, /openapi.json and /redoc +// are forwarded to the backend (Phase 7b Cluster 3). +// Without the three r.Handle lines in newRouter, the chi router returns 404 for +// each of these paths. +func TestProxyMountsBackendDocRoutes(t *testing.T) { + // Not parallel at the outer level: we need initBuildInfoForTests to run + // (sync.Once write) before the parallel subtests read the global. + initBuildInfoForTests(t) + + backend := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/docs": + w.Header().Set("Content-Type", "text/html") + fmt.Fprint(w, "Swagger UI") + case "/openapi.json": + w.Header().Set("Content-Type", "application/json") + fmt.Fprint(w, `{"openapi":"3.1.0"}`) + case "/redoc": + w.Header().Set("Content-Type", "text/html") + fmt.Fprint(w, "ReDoc") + default: + http.NotFound(w, r) + } + })) + // t.Cleanup (not defer) ensures the server stays up until all parallel + // subtests have finished — defer fires when the outer function returns, + // which is before t.Parallel subtests execute. + t.Cleanup(backend.Close) + + // Build the router directly against our mock backend so all three doc + // paths are proxied to the server that actually answers them. + ph := handlers.NewPageHandlerFromURL(t, backend.URL) + prx := proxy.New(backend.URL) + sub, err := fs.Sub(staticFS, "web/static") + if err != nil { + t.Fatalf("fs.Sub: %v", err) + } + r := newRouter(ph, prx, sub) + + for path, want := range map[string]string{ + "/docs": "Swagger UI", + "/openapi.json": `"openapi":"3.1.0"`, + "/redoc": "ReDoc", + } { + path, want := path, want // capture loop variables + t.Run(path, func(t *testing.T) { + t.Parallel() + rec := httptest.NewRecorder() + r.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, path, nil)) + if rec.Code != http.StatusOK { + t.Fatalf("GET %s: got status %d, want 200 (body: %q)", path, rec.Code, rec.Body.String()) + } + if !strings.Contains(rec.Body.String(), want) { + t.Errorf("GET %s: body = %q, want substring %q", path, rec.Body.String(), want) + } + }) + } +} + // TestRealTemplatesPerPageContent verifies that each page renders its own // content when using the real embedded templates — not the content of whatever // page file happens to be parsed last.