Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions src/document_anonymizer/i18n/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""Internationalization support for the web UI."""

from __future__ import annotations

import json
from functools import lru_cache
from pathlib import Path
from typing import TYPE_CHECKING, Any, Literal, TypeGuard, get_args

if TYPE_CHECKING:
from collections.abc import Mapping

import structlog
from jinja2 import pass_context

logger = structlog.get_logger(__name__)

Lang = Literal["de", "en"]
SUPPORTED_LANGUAGES: set[str] = {"de", "en"}
DEFAULT_LANGUAGE: Lang = "de"

# Ensure Lang type and SUPPORTED_LANGUAGES stay in sync
if set(get_args(Lang)) != SUPPORTED_LANGUAGES: # pragma: no cover
msg = "Lang type and SUPPORTED_LANGUAGES are out of sync"
raise RuntimeError(msg)

_TRANSLATIONS_DIR = Path(__file__).parent / "translations"


def is_supported_lang(value: str) -> TypeGuard[Lang]:
"""Check if a string is a supported language code (narrows type to Lang)."""
return value in SUPPORTED_LANGUAGES


def _load_translations(lang: str) -> dict[str, str]:
"""Normalize unsupported lang codes to default, return cached."""
if lang not in SUPPORTED_LANGUAGES:
lang = DEFAULT_LANGUAGE
return _load_translations_cached(lang)


@lru_cache(maxsize=2) # maxsize matches len(SUPPORTED_LANGUAGES)
def _load_translations_cached(lang: str) -> dict[str, str]:
"""Load and cache a translation file."""
path = _TRANSLATIONS_DIR / f"{lang}.json"
try:
with path.open(encoding="utf-8") as f:
data: dict[str, str] = json.load(f)
except (OSError, json.JSONDecodeError, UnicodeDecodeError):
logger.exception("translation_file_load_failed", lang=lang)
return {}
return data


def get_translations(lang: str) -> dict[str, str]:
"""Get all translations for a language (public API)."""
return _load_translations(lang)


def translate(
key: str, lang: Lang = DEFAULT_LANGUAGE, **kwargs: str | int | float
) -> str:
"""Look up a translation key and interpolate any kwargs."""
translations = _load_translations(lang)
template = translations.get(key)
if template is None:
logger.warning("translation_key_missing", key=key, lang=lang)
return key
if kwargs:
try:
return template.format(**kwargs)
except (KeyError, IndexError, ValueError):
logger.warning("translation_format_error", key=key, lang=lang)
return template
return template


@pass_context
def jinja_translate(
context: Mapping[str, Any], key: str, **kwargs: str | int | float
) -> str:
"""Jinja2 global function: {{ _("key", arg=val) }}.

The @pass_context decorator injects a jinja2.runtime.Context (Mapping-like).
"""
raw_lang = context.get("lang", DEFAULT_LANGUAGE)
lang = raw_lang if is_supported_lang(raw_lang) else DEFAULT_LANGUAGE
return translate(key, lang=lang, **kwargs)
54 changes: 54 additions & 0 deletions src/document_anonymizer/i18n/translations/de.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"brand.name": "Dokument-Anonymisierer",
"brand.subtitle": "Deutsche PII-Erkennung & Schwärzung",
"index.heading": "Dokument anonymisieren",
"index.subtitle": "Text eingeben oder Datei hochladen. Alle Daten werden nur im Arbeitsspeicher verarbeitet — keine Persistenz.",
"index.text_label": "Text",
"index.example_btn": "Beispieltext laden",
"index.file_label": "Oder Datei hochladen (TXT, PDF)",
"index.threshold_label": "Konfidenzschwelle",
"index.detect_btn": "PII erkennen",
"index.loading": "Analysiere...",
"index.placeholder": "Geben Sie hier den zu anonymisierenden Text ein...\n\nBeispiel: Herr Max Mustermann, geboren am 15.03.1985, wohnhaft in 10115 Berlin, Musterstraße 42. IBAN: DE89 3704 0044 0532 0130 00. Steuer-ID: 12345679811. Tel: +49 30 12345678.",
"results.heading": "Erkennungsergebnisse",
"results.entities_found": "{count} Entität(en) gefunden",
"results.preview_heading": "Erkannte PII im Text",
"results.review_heading": "Entitäten prüfen und auswählen",
"results.tier_high": "Hohe Konfidenz",
"results.tier_medium": "Mittlere Konfidenz",
"results.tier_low": "Niedrige Konfidenz",
"results.strategy_label": "Strategie:",
"results.anonymize_btn": "Anonymisieren",
"results.redact_btn": "PDF schwärzen",
"results.loading": "Anonymisiere...",
"results.no_entities": "Keine PII-Entitäten erkannt.",
"anonymized.heading": "Ergebnis",
"anonymized.entities_anonymized": "{count} Entität(en) anonymisiert",
"anonymized.strategy": "Strategie: {strategy}",
"anonymized.original_heading": "Original (mit Markierungen)",
"anonymized.result_heading": "Anonymisiert",
"anonymized.copy_btn": "Text kopieren",
"anonymized.download_pdf_btn": "PDF mit Schwärzungen herunterladen",
"noscript.text": "JavaScript ist für die interaktive Oberfläche erforderlich. Bitte aktivieren Sie JavaScript oder nutzen Sie die",
"noscript.link": "REST API",
"footer.text": "Zero-Persistence-Architektur · Keine externen API-Aufrufe · Physische PDF-Schwärzung",
"error.no_input": "Bitte Text eingeben oder Datei hochladen.",
"error.unknown_strategy": "Unbekannte Strategie: {strategy}",
"error.detection_failed": "Fehler bei der PII-Erkennung. (Referenz: {request_id})",
"error.anonymization_failed": "Fehler bei der Anonymisierung. (Referenz: {request_id})",
"error.entity_parse_failed": "Entitätsauswahl konnte nicht verarbeitet werden.",
"error.entity_invalid_format": "Entitätsauswahl hat ein ungültiges Format.",
"error.entity_skipped": "{skipped} von {total} ausgewählten Entitäten konnten nicht verarbeitet werden. Bitte erneut versuchen.",
"error.pdf_redaction_failed": "PDF-Schwärzung fehlgeschlagen. (Referenz: {request_id})",
"error.invalid_pdf": "Ungültige PDF-Daten. Bitte laden Sie die Datei erneut hoch.",
"error.incomplete_redaction": "Unvollständige Schwärzung: {unredacted} von {total} erkannten PII-Entitäten konnten im PDF nicht visuell lokalisiert werden. Manuelle Überprüfung empfohlen.",
"common.confidence": "Konfidenz: {score}",
"review.entities_selected": "{count} von {total} Entitäten ausgewählt",
"review.no_selection_warning": "Keine Entitäten ausgewählt. Trotzdem fortfahren?",
"review.entity_load_error": "Fehler beim Laden der Entitätsdaten. Bitte führen Sie die Erkennung erneut durch.",
"review.download_error": "Fehler beim Herunterladen der PDF.",
"app.example_loaded": "Geladen!",
"app.example_error": "Fehler!",
"app.copied": "Kopiert!",
"app.copy_failed": "Kopieren fehlgeschlagen"
}
54 changes: 54 additions & 0 deletions src/document_anonymizer/i18n/translations/en.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"brand.name": "Document Anonymizer",
"brand.subtitle": "German PII Detection & Redaction",
"index.heading": "Anonymize document",
"index.subtitle": "Enter text or upload a file. All data is processed in memory only — no persistence.",
"index.text_label": "Text",
"index.example_btn": "Load example text",
"index.file_label": "Or upload file (TXT, PDF)",
"index.threshold_label": "Confidence threshold",
"index.detect_btn": "Detect PII",
"index.loading": "Analyzing...",
"index.placeholder": "Enter text to anonymize here...\n\nExample: Herr Max Mustermann, born on 15.03.1985, residing at 10115 Berlin, Musterstraße 42. IBAN: DE89 3704 0044 0532 0130 00. Tax ID: 12345679811. Tel: +49 30 12345678.",
"results.heading": "Detection results",
"results.entities_found": "{count} entity(ies) found",
"results.preview_heading": "Detected PII in text",
"results.review_heading": "Review and select entities",
"results.tier_high": "High confidence",
"results.tier_medium": "Medium confidence",
"results.tier_low": "Low confidence",
"results.strategy_label": "Strategy:",
"results.anonymize_btn": "Anonymize",
"results.redact_btn": "Redact PDF",
"results.loading": "Anonymizing...",
"results.no_entities": "No PII entities detected.",
"anonymized.heading": "Result",
"anonymized.entities_anonymized": "{count} entity(ies) anonymized",
"anonymized.strategy": "Strategy: {strategy}",
"anonymized.original_heading": "Original (with highlights)",
"anonymized.result_heading": "Anonymized",
"anonymized.copy_btn": "Copy text",
"anonymized.download_pdf_btn": "Download redacted PDF",
"noscript.text": "JavaScript is required for the interactive interface. Please enable JavaScript or use the",
"noscript.link": "REST API",
"footer.text": "Zero-persistence architecture · No external API calls · Physical PDF redaction",
"error.no_input": "Please enter text or upload a file.",
"error.unknown_strategy": "Unknown strategy: {strategy}",
"error.detection_failed": "PII detection failed. (Reference: {request_id})",
"error.anonymization_failed": "Anonymization failed. (Reference: {request_id})",
"error.entity_parse_failed": "Entity selection could not be processed.",
"error.entity_invalid_format": "Entity selection has an invalid format.",
"error.entity_skipped": "{skipped} of {total} selected entities could not be processed. Please try again.",
"error.pdf_redaction_failed": "PDF redaction failed. (Reference: {request_id})",
"error.invalid_pdf": "Invalid PDF data. Please upload the file again.",
"error.incomplete_redaction": "Incomplete redaction: {unredacted} of {total} detected PII entities could not be visually located in the PDF. Manual review recommended.",
"common.confidence": "Confidence: {score}",
"review.entities_selected": "{count} of {total} entities selected",
"review.no_selection_warning": "No entities selected. Continue anyway?",
"review.entity_load_error": "Error loading entity data. Please run detection again.",
"review.download_error": "Error downloading PDF.",
"app.example_loaded": "Loaded!",
"app.example_error": "Error!",
"app.copied": "Copied!",
"app.copy_failed": "Copy failed"
}
Loading
Loading