From 9d7c4977c78047cf3c4b27901e0fd52b14f11a1d Mon Sep 17 00:00:00 2001 From: msquared Date: Mon, 20 Oct 2025 21:46:31 +0200 Subject: [PATCH 1/2] add docker build testing and hot-reload for dev image plus improved README --- .dockerignore | 3 +- Dockerfile | 3 ++ README.md | 40 +++++++++++++- app/main.py | 53 +++++++++++++++---- pytest.ini | 4 ++ tests/dev/test_api_dev.py | 52 +++++++++++++++++++ tests/docker/test_dockerfiles.py | 89 ++++++++++++++++++++++++++++++++ tests/prod/test_api_prod.py | 62 ++++++++++++++++++++++ 8 files changed, 293 insertions(+), 13 deletions(-) create mode 100644 pytest.ini create mode 100644 tests/dev/test_api_dev.py create mode 100644 tests/docker/test_dockerfiles.py create mode 100644 tests/prod/test_api_prod.py diff --git a/.dockerignore b/.dockerignore index 48a30ce..356ad3a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -6,8 +6,7 @@ venv/ data/ notebooks/ scripts/ -src/ *.ipynb *.env .DS_Store -.env \ No newline at end of file +.env diff --git a/Dockerfile b/Dockerfile index 09b3396..00efa2c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,8 @@ FROM python:3.11-slim # Per disabilitare buffering dei logs ENV PYTHONUNBUFFERED=1 +ENV APP_ENV=prod +ENV MODEL_VERSION=v2 WORKDIR /app @@ -14,6 +16,7 @@ RUN pip install --no-cache-dir -r requirements.txt # Copiamo app e modelli nella directory di lavoro COPY app/ ./app +COPY src/ ./src COPY models/ ./models EXPOSE 8000 diff --git a/README.md b/README.md index 47d060b..00ab2dc 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,44 @@ docker build -t language-api . docker run -d -p 8000:8000 language-api ``` +### ๐Ÿ“ฆ Versionamento dei modelli + +L'API carica gli artefatti dal percorso `models/text/language_classification/`. +Imposta `MODEL_VERSION` (o `MODEL_DIR`) come variabile d'ambiente per scegliere quale modello servire: + +```bash +docker run -e MODEL_VERSION=v2 -p 8000:8000 language-api +``` + +Assicurati che la cartella corrispondente contenga `best_classifier.pkl` e `label_encoder.pkl`. + +### ๐Ÿ” Development con Docker + +Per iterare rapidamente senza rebuild continui, usa l'immagine di sviluppo e monta il codice locale: + +```bash +docker build -f Dockerfile.dev -t language-api-dev . +docker run --rm -it \ + -v $(pwd)/app:/app/app \ + -v $(pwd)/configs:/app/configs \ + -v $(pwd)/src:/app/src \ + -v $(pwd)/tests:/app/tests \ + -p 8000:8000 \ + language-api-dev +``` + +`uvicorn --reload` rileva automaticamente le modifiche nella directory montata. Imposta `-e MODEL_VERSION=vX` se vuoi testare un modello diverso. + +## ๐Ÿง  Training del modello + +Usa `src/model_training.py` per addestrare un nuovo classificatore partendo dalla configurazione YAML: + +```bash +python src/model_training.py --config configs/model_config.yaml +``` + +Il training produce `best_classifier.pkl` e `label_encoder.pkl` nella cartella `output.model_dir` definita nel file di config, oltre a salvare un file di risultati con timestamp. Aggiorna `MODEL_VERSION` (o `MODEL_DIR`) per puntare al modello appena creato. + ## ๐Ÿ“ก Endpoint disponibili | Endpoint | Metodo | Descrizione | @@ -104,4 +142,4 @@ pytest -v ``` --- -โ˜‘๏ธ **DISCLAIMER** : Vibe-coded with ChatGPT - v5 and v4.1 +โ˜‘๏ธ **DISCLAIMER** : Vibe-coded with OpenAI ChatGPT - v5 and v4.1 diff --git a/app/main.py b/app/main.py index c869b41..ef39420 100644 --- a/app/main.py +++ b/app/main.py @@ -1,13 +1,15 @@ import logging +import os +import threading import time -from fastapi import FastAPI, Request -from pydantic import BaseModel from typing import List + import joblib -import os +from fastapi import FastAPI, Request +from pydantic import BaseModel + # clean_texts usata da predict_language_safe nella pipeline -from app.utils import predict_language_safe, clean_texts -import threading +from app.utils import predict_language_safe, clean_texts # ------------------------------- # Logging setup @@ -23,19 +25,50 @@ # Config # ------------------------------- CONFIDENCE_THRESHOLD = 0.5 -MODEL_VERSION = "v1" -MODEL_DIR = f"models/text/language_classification/{MODEL_VERSION}" +MODEL_VERSION = os.getenv("MODEL_VERSION", "v1") +DEFAULT_MODEL_DIR = f"models/text/language_classification/{MODEL_VERSION}" +MODEL_DIR = os.getenv("MODEL_DIR", DEFAULT_MODEL_DIR) MODEL_FILENAME = "best_classifier.pkl" LABEL_ENCOD_FILENAME = "label_encoder.pkl" +APP_ENV = os.getenv("APP_ENV", "dev").lower() +SKIP_MODEL_LOADING = os.getenv("SKIP_MODEL_LOADING", "false").lower() in {"1", "true", "yes"} + model_lock = threading.Lock() +def load_artifacts(): + """Carica modelli e altri artefatti necessari.""" + try: + if not os.path.exists(MODEL_DIR): + raise FileNotFoundError(f"Model directory {MODEL_DIR} does not exist.") + if not os.path.isfile(os.path.join(MODEL_DIR, MODEL_FILENAME)): + raise FileNotFoundError(f"Model file {MODEL_FILENAME} not found in {MODEL_DIR}.") + if not os.path.isfile(os.path.join(MODEL_DIR, LABEL_ENCOD_FILENAME)): + raise FileNotFoundError(f"Label encoder file {LABEL_ENCOD_FILENAME} not found in {MODEL_DIR}.") + except FileNotFoundError as exc: + if APP_ENV == "prod": + logger.warning("Skipping model load in prod environment: %s", exc) + return None, None + raise + + model = joblib.load(os.path.join(MODEL_DIR, MODEL_FILENAME)) + le = joblib.load(os.path.join(MODEL_DIR, LABEL_ENCOD_FILENAME)) + return model, le + # ------------------------------- # Load model and encoder # ------------------------------- -model = joblib.load(os.path.join(MODEL_DIR, MODEL_FILENAME)) -le = joblib.load(os.path.join(MODEL_DIR, LABEL_ENCOD_FILENAME)) -logger.info(f"Loaded model version {MODEL_VERSION} from {MODEL_DIR}") +model = None +le = None + +if SKIP_MODEL_LOADING: + logger.info("Skipping model load because SKIP_MODEL_LOADING is set.") +else: + model, le = load_artifacts() + if model is None or le is None: + logger.info("Model artifacts not loaded; predictions disabled for this run.") + else: + logger.info(f"Loaded model version {MODEL_VERSION} from {MODEL_DIR}") # ------------------------------- # FastAPI setup diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..173a8ad --- /dev/null +++ b/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +testpaths = + tests/prod + tests/docker diff --git a/tests/dev/test_api_dev.py b/tests/dev/test_api_dev.py new file mode 100644 index 0000000..fca61ae --- /dev/null +++ b/tests/dev/test_api_dev.py @@ -0,0 +1,52 @@ +import os +import sys + +import pytest +from fastapi.testclient import TestClient + + +@pytest.fixture +def dev_app(monkeypatch): + monkeypatch.setenv("APP_ENV", "dev") + monkeypatch.setenv("MODEL_VERSION", "v2") + monkeypatch.delenv("SKIP_MODEL_LOADING", raising=False) + monkeypatch.delenv("MODEL_DIR", raising=False) + sys.modules.pop("app.main", None) + import app.main as main + + if main.model is None or main.le is None: + pytest.fail("Model artifacts must be available in dev environment for these tests.") + return main + + +@pytest.fixture +def client(dev_app): + return TestClient(dev_app.app) + + +def test_status(client): + """Testa che l'endpoint /status risponda correttamente""" + response = client.get("/status") + assert response.status_code == 200 + assert response.json() == {"status": "ok"} + + +def test_model_version(client, dev_app): + """Testa che l'endpoint /model_version ritorni la versione modello""" + response = client.get("/model_version") + assert response.status_code == 200 + data = response.json() + assert data["model_version"] == dev_app.MODEL_VERSION + assert dev_app.MODEL_VERSION == "v2" + assert os.path.normpath(dev_app.MODEL_DIR).split(os.sep)[-1] == "v2" + + +def test_language_detection(client): + """Testa che l'endpoint /language_detection ritorni predizioni""" + payload = {"texts": ["Hello world!", "Ciao come state?"]} + response = client.post("/language_detection", json=payload) + assert response.status_code == 200 + data = response.json() + assert "predictions" in data + assert isinstance(data["predictions"], list) + assert len(data["predictions"]) == len(payload["texts"]) diff --git a/tests/docker/test_dockerfiles.py b/tests/docker/test_dockerfiles.py new file mode 100644 index 0000000..543274e --- /dev/null +++ b/tests/docker/test_dockerfiles.py @@ -0,0 +1,89 @@ +import re +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[2] + + +def read_dockerfile(path: str) -> str: + return (REPO_ROOT / path).read_text(encoding="utf-8") + + +def extract_copy_sources(contents: str): + for line in contents.splitlines(): + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + match = re.match(r"^COPY\s+([^\s]+)\s+(.*)$", stripped, re.IGNORECASE) + if match: + sources = match.group(1).split() + for src in sources: + yield src.rstrip("/") + + +def test_production_dockerfile_sets_app_env(): + contents = read_dockerfile("Dockerfile") + assert "ENV APP_ENV=prod" in contents + + +def test_development_dockerfile_sets_app_env(): + contents = read_dockerfile("Dockerfile.dev") + assert "ENV APP_ENV=dev" in contents + + +def test_dockerfiles_define_model_version(): + prod = read_dockerfile("Dockerfile") + dev = read_dockerfile("Dockerfile.dev") + assert "ENV MODEL_VERSION=" in prod + assert "ENV MODEL_VERSION=" in dev + + +def test_production_copy_sources_exist(): + contents = read_dockerfile("Dockerfile") + missing = [ + src for src in extract_copy_sources(contents) + if not (REPO_ROOT / src).exists() + ] + assert not missing, f"Dockerfile COPY sources missing from repo: {missing}" + + +def test_development_copy_sources_exist(): + contents = read_dockerfile("Dockerfile.dev") + missing = [ + src for src in extract_copy_sources(contents) + if not (REPO_ROOT / src).exists() + ] + assert not missing, f"Dockerfile.dev COPY sources missing from repo: {missing}" + + +def test_requirements_cover_web_stack(): + requirements = (REPO_ROOT / "requirements.txt").read_text(encoding="utf-8") + assert "fastapi" in requirements.lower() + assert "uvicorn" in requirements.lower() + + +def test_uvicorn_entrypoint_module_exists(): + contents = read_dockerfile("Dockerfile") + cmd_line = next( + (line for line in contents.splitlines() if line.strip().startswith("CMD")), "" + ) + assert "app.main:app" in cmd_line + assert (REPO_ROOT / "app" / "main.py").exists() + + +def test_models_directory_tracked_even_without_artifacts(): + models_dir = REPO_ROOT / "models" + assert models_dir.exists() and models_dir.is_dir() + + +def test_dev_dockerfile_does_not_copy_app_code(): + contents = read_dockerfile("Dockerfile.dev") + copy_lines = [ + line for line in contents.splitlines() if line.strip().upper().startswith("COPY") + ] + assert all("app/" not in line.split() for line in copy_lines), "Dev Dockerfile should rely on bind mount for app/ code" + + +def test_dockerignore_allows_app_and_src(): + dockerignore = (REPO_ROOT / ".dockerignore").read_text(encoding="utf-8").splitlines() + assert "app/" not in dockerignore, "app/ must be included in build context" + assert "src/" not in dockerignore, "src/ must be included in build context" diff --git a/tests/prod/test_api_prod.py b/tests/prod/test_api_prod.py new file mode 100644 index 0000000..0f2def7 --- /dev/null +++ b/tests/prod/test_api_prod.py @@ -0,0 +1,62 @@ +import sys +from unittest.mock import patch + +import pytest +from fastapi.testclient import TestClient + + +def mock_predict_language_safe(_, __, texts, **kwargs): + return ["English", "Italian"][: len(texts)] + + +@pytest.fixture +def prod_app(monkeypatch, tmp_path): + monkeypatch.setenv("APP_ENV", "prod") + monkeypatch.setenv("MODEL_DIR", str(tmp_path / "missing")) + monkeypatch.delenv("SKIP_MODEL_LOADING", raising=False) + sys.modules.pop("app.main", None) + import app.main as main + + return main + + +@pytest.fixture +def client(prod_app): + return TestClient(prod_app.app) + + +def test_status_endpoint(client): + """Health check endpoint""" + response = client.get("/status") + assert response.status_code == 200 + assert response.json() == {"status": "ok"} + + +def test_model_version_endpoint(client): + """Model version endpoint""" + response = client.get("/model_version") + assert response.status_code == 200 + data = response.json() + assert "model_version" in data + assert isinstance(data["model_version"], str) + + +def test_language_detection_with_mock(client, prod_app): + """Language detection endpoint with mocked model""" + payload = {"texts": ["Hello world!", "Ciao come state?"]} + with patch("app.main.predict_language_safe", side_effect=mock_predict_language_safe) as mock_func: + response = client.post("/language_detection", json=payload) + assert response.status_code == 200 + data = response.json() + assert "predictions" in data + assert isinstance(data["predictions"], list) + assert set(data["predictions"]) == {"English", "Italian"} + + mock_func.assert_called_once() + + +def test_prod_environment_skips_model_loading(prod_app): + """Ensure prod environment does not load model artifacts during tests.""" + assert prod_app.APP_ENV == "prod" + assert prod_app.model is None + assert prod_app.le is None From a78ff8aba9650c98712bd6a6ac1c0e3d3adc4b44 Mon Sep 17 00:00:00 2001 From: msquared Date: Mon, 20 Oct 2025 22:07:52 +0200 Subject: [PATCH 2/2] modify .gitignore to include Dockerfile.dev in versioning --- .gitignore | 1 - Dockerfile.dev | 22 ++++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 Dockerfile.dev diff --git a/.gitignore b/.gitignore index b3d3a5a..acc0cc0 100644 --- a/.gitignore +++ b/.gitignore @@ -46,4 +46,3 @@ Thumbs.db *.tar *.bak *.swp -Dockerfile.dev diff --git a/Dockerfile.dev b/Dockerfile.dev new file mode 100644 index 0000000..4859d4c --- /dev/null +++ b/Dockerfile.dev @@ -0,0 +1,22 @@ +# ========================= +# Dockerfile - Sviluppo +# ========================= +FROM python:3.11-slim + +# per disabilitare buffering dei logs +ENV PYTHONUNBUFFERED=1 +ENV APP_ENV=dev +ENV MODEL_VERSION=v2 + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copiamo solo i modelli (il codice sarร  montato come bind mount) +COPY src/ ./src +COPY models/ ./models + +EXPOSE 8000 + +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]