comppolicylab · jnu · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026
diff --git a/bc2/core/common/ontology.py b/bc2/core/common/ontology.py
@@ -66,42 +66,3 @@ class KeyValuePair(TypedDict):
 
 
 Table = list[dict[str, str]]
-
-
-class Palette:
-    Red1 = (0.9, 0.2, 0.1)
-    Red2 = (0.8, 0.1, 0.0)
-    Red3 = (0.7, 0.0, 0.0)
-    Orange1 = (0.9, 0.5, 0.0)
-    Orange2 = (0.8, 0.4, 0.0)
-    Orange3 = (0.7, 0.3, 0.0)
-    Yellow1 = (0.9, 0.9, 0.0)
-    Yellow2 = (0.8, 0.8, 0.0)
-    Yellow3 = (0.7, 0.7, 0.0)
-    Green1 = (0.2, 0.9, 0.1)
-    Green2 = (0.1, 0.8, 0.0)
-    Green3 = (0.0, 0.7, 0.0)
-    Blue1 = (0, 0.5, 1)
-    Blue2 = (0.0, 0.4, 0.8)
-    Blue3 = (0.0, 0.3, 0.7)
-    Purple1 = (0.9, 0.0, 0.9)
-    Purple2 = (0.8, 0.0, 0.8)
-    Purple3 = (0.7, 0.0, 0.7)
-    Pink1 = (0.9, 0.0, 0.9)
-    Pink2 = (0.8, 0.0, 0.8)
-    Pink3 = (0.7, 0.0, 0.7)
-    Brown1 = (0.5, 0.25, 0.0)
-    Brown2 = (0.4, 0.2, 0.0)
-    Brown3 = (0.3, 0.15, 0.0)
-    Cyan1 = (0.0, 0.9, 0.9)
-    Cyan2 = (0.0, 0.8, 0.8)
-    Cyan3 = (0.0, 0.7, 0.7)
-    Lime1 = (0.9, 0.9, 0.0)
-    Lime2 = (0.8, 0.8, 0.0)
-    Lime3 = (0.7, 0.7, 0.0)
-    Maroon1 = (0.5, 0.0, 0.0)
-    Maroon2 = (0.4, 0.0, 0.0)
-    Maroon3 = (0.3, 0.0, 0.0)
-    Gray1 = (0.5, 0.5, 0.5)
-    Gray2 = (0.4, 0.4, 0.4)
-    Gray3 = (0.3, 0.3, 0.3)
diff --git a/bc2/core/common/ontopainter.py b/bc2/core/common/ontopainter.py
@@ -0,0 +1,175 @@
+from enum import Enum
+from typing import Callable
+
+import pymupdf
+from pydantic import BaseModel, model_validator
+
+from .ontology import Cited, PoliceReport, PoliceReportParseResult, SourceChunk
+
+
+class OntoPainterMark(Enum):
+    RECT = "RECT"
+
+
+FieldAccessor = Callable[[PoliceReport], list[Cited]]
+
+
+class OntoPainterFieldConfig(BaseModel):
+    field: str | None = None  # TODO - validate against PoliceReport fields
+    label: str | None = None
+    mark: OntoPainterMark
+    fill: tuple[float, float, float] | None = None
+    stroke: tuple[float, float, float] | None = None
+    stroke_width: float = 0
+    accessor: FieldAccessor | None = None
+
+    @model_validator(mode="after")
+    def validate_field_accessor(self) -> "OntoPainterFieldConfig":
+        """Either field or accessor must be set, but not both."""
+        if not ((self.field is None) ^ (self.accessor is None)):
+            raise ValueError("Either field or accessor must be set, but not both.")
+        return self
+
+    def get_value(self, report: PoliceReport) -> list[Cited]:
+        """Get value of a field from the report.
+
+        Use the `field` attribute if set, otherwise use the `accessor` function.
+        """
+        v: Cited | list[Cited | None] | list[Cited] | None = None
+        if self.field is None:
+            if not self.accessor:
+                raise ValueError("Accessor is required if field is not set.")
+            v = self.accessor(report)
+        else:
+            v = getattr(report, self.field)
+
+        # Normalize none into an empty list.
+        if v is None:
+            return []
+        # Normalize singular value to a one-item list.
+        elif isinstance(v, Cited):
+            return [v]
+        # Normalize list of Optional values to a list of Cited values.
+        elif isinstance(v, list):
+            return [v for v in v if v is not None]
+        raise ValueError(f"Unexpected type: {type(v)}")
+
+
+class OntoPainter(BaseModel):
+    fields: list[OntoPainterFieldConfig]
+
+    def paint(
+        self,
+        pdf: str | pymupdf.Document,
+        parse_result: PoliceReportParseResult,
+        pages: str | None = None,
+    ) -> pymupdf.Document:
+        """Paint a document annotated with the parse result."""
+        # 1. Load the requested pages from the input path / doc
+        doc = self._load_pdf(pdf, pages)
+
+        # 2. Loop over field configs and paint each field.
+        for field_config in self.fields:
+            field_values = field_config.get_value(parse_result.report)
+            for i, field_value in enumerate(field_values):
+                for j, chunk_id in enumerate(field_value.ids):
+                    chunk = parse_result.chunks[chunk_id]
+                    self._paint_field(
+                        doc,
+                        field_config,
+                        chunk,
+                        label=f"{field_config.label} {i + 1}-{j + 1}",
+                    )
+
+        return doc
+
+    def _paint_field(
+        self,
+        doc: pymupdf.Document,
+        field_config: OntoPainterFieldConfig,
+        chunk: SourceChunk,
+        label: str | None = None,
+    ) -> None:
+        """Paint a field on a document."""
+        match field_config.mark:
+            case OntoPainterMark.RECT:
+                self._paint_rect(doc, field_config, chunk)
+            case _:
+                raise ValueError(f"Unsupported mark: {field_config.mark}")
+        if label:
+            page = doc.load_page(chunk.regions[0].page)
+            page_width, page_height = page.mediabox[2:]
+            scaled_points = [
+                (p[0] * page_width, p[1] * page_height) for p in chunk.regions[0].points
+            ]
+            x, y = scaled_points[0]
+            # Offset to avoid overlapping with bounding rectangle
+            y -= 2
+            page.insert_text(
+                (x, y), label, fontsize=5, fill=field_config.stroke, color=(1, 1, 1)
+            )
+
+    def _paint_rect(
+        self,
+        doc: pymupdf.Document,
+        field_config: OntoPainterFieldConfig,
+        chunk: SourceChunk,
+    ) -> None:
+        """Paint a rectangle on a document."""
+        for region in chunk.regions:
+            # The coordinates come normalized in (0, 1) space. Project into page coords.
+            page = doc.load_page(region.page)
+            page_width, page_height = page.mediabox[2:]
+            shape = page.new_shape()
+            scaled_points = [
+                (p[0] * page_width, p[1] * page_height) for p in region.points
+            ]
+            shape.draw_rect(pymupdf.Quad(*scaled_points).rect)
+            shape.finish(color=field_config.stroke, width=field_config.stroke_width)
+            shape.commit()
+
+    def _load_pdf(
+        self, doc: str | pymupdf.Document, pages: str | None = None
+    ) -> pymupdf.Document:
+        """Load a PDF document."""
+        if isinstance(doc, str):
+            with open(doc, "rb") as f:
+                pdf_doc = pymupdf.open(f)
+        else:
+            pdf_doc = doc
+
+        filter_pages = _parse_pages_range(pages)
+        if filter_pages:
+            pdf_doc.select(filter_pages)
+
+        return pdf_doc
+
+
+def _parse_pages_range(pages: str | None = None) -> list[int] | None:
+    """Parse page range specification as a list of page numbers.
+
+    If no spec is given, return None.
+
+    Spec looks like:
+      1      Single page
+      1-3    Range of pages
+      1,2,3  List of pages
+      1-3,5  Range and list of pages
+
+    Args:
+        pages: The page range specification, 1-indexed.
+
+    Returns:
+        A list of page numbers (0-indexed).
+    """
+    if pages is None:
+        return None
+    page_list = list[int]()
+    for segment in pages.split(","):
+        if "-" in segment:
+            start, end = segment.split("-")
+            page_list.extend(range(int(start.strip()), int(end.strip()) + 1))
+        else:
+            page_list.append(int(segment.strip()))
+    # Clean up duplicates and sort.
+    return sorted([x - 1 for x in set(page_list)])
diff --git a/bc2/core/common/palette.py b/bc2/core/common/palette.py
@@ -0,0 +1,37 @@
+class Palette:
+    Red1 = (0.9, 0.2, 0.1)
+    Red2 = (0.8, 0.1, 0.0)
+    Red3 = (0.7, 0.0, 0.0)
+    Orange1 = (0.9, 0.5, 0.0)
+    Orange2 = (0.8, 0.4, 0.0)
+    Orange3 = (0.7, 0.3, 0.0)
+    Yellow1 = (0.9, 0.9, 0.0)
+    Yellow2 = (0.8, 0.8, 0.0)
+    Yellow3 = (0.7, 0.7, 0.0)
+    Green1 = (0.2, 0.9, 0.1)
+    Green2 = (0.1, 0.8, 0.0)
+    Green3 = (0.0, 0.7, 0.0)
+    Blue1 = (0, 0.5, 1)
+    Blue2 = (0.0, 0.4, 0.8)
+    Blue3 = (0.0, 0.3, 0.7)
+    Purple1 = (0.9, 0.0, 0.9)
+    Purple2 = (0.8, 0.0, 0.8)
+    Purple3 = (0.7, 0.0, 0.7)
+    Pink1 = (0.9, 0.0, 0.9)
+    Pink2 = (0.8, 0.0, 0.8)
+    Pink3 = (0.7, 0.0, 0.7)
+    Brown1 = (0.5, 0.25, 0.0)
+    Brown2 = (0.4, 0.2, 0.0)
+    Brown3 = (0.3, 0.15, 0.0)
+    Cyan1 = (0.0, 0.9, 0.9)
+    Cyan2 = (0.0, 0.8, 0.8)
+    Cyan3 = (0.0, 0.7, 0.7)
+    Lime1 = (0.9, 0.9, 0.0)
+    Lime2 = (0.8, 0.8, 0.0)
+    Lime3 = (0.7, 0.7, 0.0)
+    Maroon1 = (0.5, 0.0, 0.0)
+    Maroon2 = (0.4, 0.0, 0.0)
+    Maroon3 = (0.3, 0.0, 0.0)
+    Gray1 = (0.5, 0.5, 0.5)
+    Gray2 = (0.4, 0.4, 0.4)
+    Gray3 = (0.3, 0.3, 0.3)
diff --git a/bc2/core/input/azureblob.py b/bc2/core/input/azureblob.py
@@ -21,7 +21,7 @@ class AzureBlobInput(BaseInputDriver, AzureBlobDriver):
     def __init__(self, config: AzureBlobInputConfig):
         self.init_client(config)
 
-    def __call__(self, path: str = "", buffer: io.BytesIO | None = None) -> MemoryFile:
+    def load_file(self, path: str = "", buffer: io.BytesIO | None = None) -> MemoryFile:
         """Read from an Azure Blob."""
         f = MemoryFile()
         full_path = f"{self.config.prefix}{path}"

diff --git a/bc2/core/input/base.py b/bc2/core/input/base.py
@@ -2,6 +2,7 @@
 from abc import ABC, abstractmethod
 from typing import Literal
 
+from ..common.context import Context
 from ..common.file import MemoryFile
 
 
@@ -16,7 +17,15 @@ class BaseInputDriver(ABC):
 
     required: list[Literal["path"] | Literal["buffer"]] = []
 
-    @abstractmethod
     def __call__(
+        self, context: Context, path: str = "", buffer: io.BytesIO | None = None
+    ) -> MemoryFile:
+        """Load a file from a path or buffer."""
+        f = self.load_file(path=path, buffer=buffer)
+        context.input_file = f
+        return f
+
+    @abstractmethod
+    def load_file(
         self, path: str = "", buffer: io.BytesIO | None = None
     ) -> MemoryFile: ...
diff --git a/bc2/core/input/file.py b/bc2/core/input/file.py
@@ -23,7 +23,7 @@ def __init__(self, config: FileInputConfig):
 
     required = ["path"]
 
-    def __call__(self, path: str = "", buffer: io.BytesIO | None = None) -> MemoryFile:
+    def load_file(self, path: str = "", buffer: io.BytesIO | None = None) -> MemoryFile:
         """Read from a file."""
         if not path:
             raise ValueError("Path is required for file input.")

diff --git a/bc2/core/input/memory.py b/bc2/core/input/memory.py
@@ -23,7 +23,7 @@ def __init__(self, config: MemoryInputConfig):
 
     required = ["buffer"]
 
-    def __call__(self, path: str = "", buffer: io.BytesIO | None = None) -> MemoryFile:
+    def load_file(self, path: str = "", buffer: io.BytesIO | None = None) -> MemoryFile:
         """Read from a buffer in memory."""
         if not buffer:
             raise ValueError("Buffer is required for memory input.")

diff --git a/bc2/core/input/stdin.py b/bc2/core/input/stdin.py
@@ -22,7 +22,7 @@ class StdinInput(BaseInputDriver):
     def __init__(self, config: StdinInputConfig):
         self.config = config
 
-    def __call__(self, path: str = "", buffer: io.BytesIO | None = None) -> MemoryFile:
+    def load_file(self, path: str = "", buffer: io.BytesIO | None = None) -> MemoryFile:
         """Read from stdin."""
         f = MemoryFile()
         # Consume all the stdin pipe and write it to memory

diff --git a/bc2/core/paint/__init__.py b/bc2/core/paint/__init__.py
diff --git a/bc2/core/paint/base.py b/bc2/core/paint/base.py
@@ -0,0 +1,24 @@
+from abc import ABC, abstractmethod
+from typing import Generic, TypeVar
+
+from ..common.context import Context
+from ..common.file import MemoryFile
+from ..common.preprocess import PreprocessMixin
+
+T = TypeVar("T")
+
+
+class BasePainter(ABC, Generic[T], PreprocessMixin[T]):
+    def __call__(self, file: MemoryFile, context: Context) -> MemoryFile:
+        """Paint a file, returning an annotated version.
+
+        `file` is the primary pipe value (e.g. a serialized ontology result).
+        The original input file is read from `context.input_file`.
+        """
+        data = self.preprocess(file)
+        return self.paint(context.input_file, data)
+
+    @abstractmethod
+    def paint(self, original: MemoryFile, data: T) -> MemoryFile:
+        """Paint the input file using current analysis."""
+        ...