diff --git a/README.md b/README.md index 89f8dd2..ce88e9b 100644 --- a/README.md +++ b/README.md @@ -20,13 +20,13 @@ EU Fact Force is a collaborative platform developed by [EUPHA](https://www.eupha ### Use Case > **Marie**, a health communicator at a national public health association, sees a viral post claiming "vaccines cause autism." She needs to respond quickly with solid evidence. -> +> > She searches **"vaccines autism"** on EU Fact Force and immediately sees: > - An interactive graph showing 15+ peer-reviewed articles that refute this claim > - The scientific consensus: **"Refuted with high confidence"** > - Current disinformation trends: 1,200 mentions this week, peak in France/Belgium > - Key evidence to cite in her response -> +> > **Time to find relevant evidence: <30 seconds** ## Key Features @@ -112,7 +112,7 @@ uv run pytest ### Déploiement de l'application -L'application se compose d'un serveur Django, d'une base PostgreSQL (avec pgvector) et de LocalStack pour le stockage S3. +L'application se compose d'un serveur Django, d'une base PostgreSQL (avec pgvector) et de LocalStack pour le stockage S3. Pour déployer et utiliser l'application en local : **1. Prérequis** @@ -176,3 +176,44 @@ AWS_S3_REGION_NAME=eu-west-1 ``` Sans ces variables, l'application utilise le stockage fichier local par défaut. + + +**7. Démarrer la web-app d'ingestion vers le S3 local** + +Pour rapatrier l'upload d'un couple PDF/métadatas : + +***Lancer le containeur Docker*** +```bash +docker compose up -d +``` + +Cela démarre PostgreSQL (port 5432) et LocalStack S3 (port 4566) et écoute sur +ce port. +Le bucket configuré est créé automatiquement au démarrage de LocalStack. + +***Installer les dépendances et appliquer les migrations*** + +```bash +uv sync +uv run python manage.py migrate +``` + +***Lancer le pipeline Dash-app > API > Localstack*** + +***Démarrer le serveur Django :*** + +```bash +uv run python manage.py runserver +``` + +***Démarrer le script FastAPI*** + +```bash +uv run python ingestion/front_upload/api_front_upload.py +``` + +***Démarrer la webapp Dash*** + +```bash +uv run python ingestion/front_upload/api_front_upload.py +``` diff --git a/docker-compose.yml b/docker-compose.yml index 1365879..b246bba 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -48,20 +48,22 @@ services: retries: 5 localstack: - image: localstack/localstack:latest - ports: - - 4566 - environment: - SERVICES: s3 - PERSISTENCE: 1 - AWS_DEFAULT_REGION: ${AWS_S3_REGION_NAME:-eu-west-1} - AWS_STORAGE_BUCKET_NAME: ${AWS_STORAGE_BUCKET_NAME:-eu-fact-force-files} - AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID:-test} - AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY:-test} - DEBUG: ${DEBUG:-0} - volumes: - - ./s3:/var/lib/localstack - - ./docker/localstack/init-ready.d:/etc/localstack/init/ready.d:ro + image: localstack/localstack:3.3 #latest conflicted w/ FastAPI + ports: + - "4566:4566" #required listening port for FastAPI + environment: + SERVICES: s3 + PERSISTENCE: 1 + AWS_DEFAULT_REGION: ${AWS_S3_REGION_NAME:-eu-west-1} + AWS_STORAGE_BUCKET_NAME: ${AWS_STORAGE_BUCKET_NAME:-eu-fact-force-files} + AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID:-test} + AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY:-test} + DEBUG: ${DEBUG:-0} + LOCALSTACK_ACKNOWLEDGE_ACCOUNT_REQUIREMENT: 1 #required licensing + volumes: + - ./s3:/var/lib/localstack + - ./docker/localstack/init-ready.d:/etc/localstack/init/ready.d:ro + - /var/run/docker.sock:/var/run/docker.sock volumes: postgres_data: diff --git a/eu_fact_force/dash-app/api/upload_to_s3.py b/eu_fact_force/dash-app/api/upload_to_s3.py new file mode 100644 index 0000000..c3308a0 --- /dev/null +++ b/eu_fact_force/dash-app/api/upload_to_s3.py @@ -0,0 +1,95 @@ +import os +import json +from fastapi import FastAPI, File, UploadFile, Form, HTTPException +from botocore.client import Config +from fastapi.middleware.cors import CORSMiddleware +import boto3 +import uvicorn +from dotenv import load_dotenv + +# 1. Environment var loading +load_dotenv() + +app = FastAPI(title="EUFactForce API") + +# 2. Dash-app URL +origins = [ + "http://localhost:8050", +] + +app.add_middleware( + CORSMiddleware, + allow_origins=origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# 3. S3 instancing +s3_client = boto3.client( + "s3", + endpoint_url=os.getenv("AWS_S3_ENDPOINT_URL"), + aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), + aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), + region_name=os.getenv("AWS_REGION"), + config=Config(s3={'addressing_style': 'path'}) # <-- Ajoute cette ligne impérativement +) + +BUCKET_NAME = os.getenv("AWS_STORAGE_BUCKET_NAME") + +# API print debugging +try: + s3_client.create_bucket(Bucket=BUCKET_NAME) + print(f"Bucket '{BUCKET_NAME}' créé ou déjà existant.") +except Exception as e: + print(f"Note: Le bucket existe peut-être déjà : {e}") + +@app.get("/") +async def root(): + return {"message": "API EUFactForce opérationnelle"} + +# 4. Upload routine +@app.post("/upload/") +async def upload_file( + file: UploadFile = File(...), + metadata: str = Form(...) +): + try: + # filename cleanup + filename = file.filename + json_filename = f"{os.path.splitext(filename)[0]}.json" + + # A. PDF upload on S3 + file_content = await file.read() + s3_client.put_object( + Bucket=BUCKET_NAME, + Key=filename, + Body=file_content, + ContentType="application/pdf" + ) + + # B. JSON Metadatas S3 upload + # Json type check + try: + json_data = json.loads(metadata) + except json.JSONDecodeError: + raise HTTPException(status_code=400, detail="Métadonnées JSON invalides") + + s3_client.put_object( + Bucket=BUCKET_NAME, + Key=json_filename, + Body=json.dumps(json_data), + ContentType="application/json" + ) + + return { + "status": "success", + "message": f"Fichiers {filename} et {json_filename} téléchargés avec succès." + } + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +if __name__ == "__main__": + # Uvicorn server exposition on 8001 + uvicorn.run(app, host="0.0.0.0", port=8001) diff --git a/eu_fact_force/dash-app/app.py b/eu_fact_force/dash-app/app.py index 345bf65..9553e62 100644 --- a/eu_fact_force/dash-app/app.py +++ b/eu_fact_force/dash-app/app.py @@ -5,9 +5,12 @@ import plotly.io as pio import plotly.graph_objects as go +# PDF ingestion import base64 import io import json +from pathlib import Path +import requests import uuid from utils.colors import EUPHAColors @@ -16,7 +19,7 @@ from pages import readme, ingest, graph # Plotly template -with open("assets/template.json", "r") as f: +with open(Path(__file__).parent / "assets/template.json", "r") as f: debate_template = json.load(f) pio.templates["app_template"] = go.layout.Template(debate_template) pio.templates.default = "app_template" @@ -368,6 +371,9 @@ def lock_authors(is_correct, ids): @app.callback( Output('final-output', 'children'), Input('btn-final-upload', 'n_clicks'), + # AJOUT DES DEUX STATES MANQUANTS ICI + State('upload-pdf', 'contents'), + State('upload-pdf', 'filename'), State('input-doi', 'value'), State('input-abstract', 'value'), State('input-journal', 'value'), @@ -381,14 +387,20 @@ def lock_authors(is_correct, ids): State({'type': 'auth-email', 'index': ALL}, 'value'), prevent_initial_call=True ) -def finalize_and_display_json(n_clicks, doi, abstract, journal, date, link, category, study_type, title, names, surnames, emails): +def finalize_and_send(n_clicks, pdf_base64, filename, doi, abstract, journal, date, link, category, study_type, title, names, surnames, emails): + # Dash vérifie maintenant que pdf_base64 reçoit bien 'contents' et filename reçoit 'filename' + + if not n_clicks or pdf_base64 is None: + return no_update + + print(f"Tentative d'envoi pour : {filename}") # DEBUG CLI authors_list = [ {"name": n, "surname": s, "email": e} for n, s, e in zip(names, surnames, emails) if n or s ] - metadata_json = { + metadata_payload = { "title": title, "category": category, "study_type": study_type, @@ -400,11 +412,31 @@ def finalize_and_display_json(n_clicks, doi, abstract, journal, date, link, cate "authors": authors_list } - return html.Div([ - dbc.Alert("Successfully contributed, thank you!", color="success"), - html.H4("Metadata JSON"), - html.Pre(json.dumps(metadata_json, indent=4), style={'backgroundColor': '#f8f9fa', 'padding': '15px', 'borderRadius': '8px', 'border': '1px solid #dee2e6'}) - ]) + try: + # Décodage propre + content_type, content_string = pdf_base64.split(',') + pdf_bytes = base64.b64decode(content_string) + + url = "http://localhost:8001/upload/" + + files = { + 'file': (filename, pdf_bytes, 'application/pdf') + } + data = { + 'metadata': json.dumps(metadata_payload) + } + + # Timeout ajouté pour éviter que Dash ne freeze si FastAPI est éteint + response = requests.post(url, files=files, data=data, timeout=10) + + if response.status_code == 200: + return dbc.Alert(f"Succès ! {filename} est sur S3.", color="success") + else: + return dbc.Alert(f"Erreur API : {response.text}", color="danger") + + except Exception as e: + print(f"Erreur détaillée : {str(e)}") # Visible dans ton terminal Dash + return dbc.Alert(f"Erreur lors de l'envoi : {str(e)}", color="danger") if __name__ == "__main__": app.run(debug=True) diff --git a/pyproject.toml b/pyproject.toml index e3a170c..96128e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,9 @@ dependencies = [ "dash-bootstrap-components>=2.0.4", "dash-cytoscape>=1.0.2", "pymupdf>=1.27.1", + "fastapi>=0.135.2", + "uvicorn>=0.42.0", + "python-multipart>=0.0.22", ] [tool.pytest.ini_options] diff --git a/uv.lock b/uv.lock index dcdeaf3..a9e8583 100644 --- a/uv.lock +++ b/uv.lock @@ -17,12 +17,15 @@ dependencies = [ { name = "dash-cytoscape" }, { name = "django" }, { name = "django-storages", extra = ["s3"] }, + { name = "fastapi" }, { name = "gunicorn" }, { name = "pgvector" }, { name = "psycopg", extra = ["binary"] }, { name = "pymupdf" }, { name = "python-dotenv" }, + { name = "python-multipart" }, { name = "sentence-transformers" }, + { name = "uvicorn" }, ] [package.dev-dependencies] @@ -60,12 +63,15 @@ requires-dist = [ { name = "dash-cytoscape", specifier = ">=1.0.2" }, { name = "django", specifier = ">=6.0.2" }, { name = "django-storages", extras = ["s3"], specifier = ">=1.14" }, + { name = "fastapi", specifier = ">=0.135.2" }, { name = "gunicorn", specifier = ">=25.1.0" }, { name = "pgvector", specifier = ">=0.2.4" }, { name = "psycopg", extras = ["binary"], specifier = ">=3.2" }, { name = "pymupdf", specifier = ">=1.27.1" }, { name = "python-dotenv", specifier = ">=1.0" }, + { name = "python-multipart", specifier = ">=0.0.22" }, { name = "sentence-transformers", specifier = ">=5.2.3" }, + { name = "uvicorn", specifier = ">=0.42.0" }, ] [package.metadata.requires-dev] @@ -1076,6 +1082,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4d/a9/1eed4db92d0aec2f9bfdf1faae0ab0418b5e121dda5701f118a7a4f0cd6a/faker-40.5.1-py3-none-any.whl", hash = "sha256:c69640c1e13bad49b4bcebcbf1b52f9f1a872b6ea186c248ada34d798f1661bf", size = 1987053, upload-time = "2026-02-23T21:34:36.418Z" }, ] +[[package]] +name = "fastapi" +version = "0.135.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "pydantic" }, + { name = "starlette" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c4/73/5903c4b13beae98618d64eb9870c3fac4f605523dd0312ca5c80dadbd5b9/fastapi-0.135.2.tar.gz", hash = "sha256:88a832095359755527b7f63bb4c6bc9edb8329a026189eed83d6c1afcf419d56", size = 395833, upload-time = "2026-03-23T14:12:41.697Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/ea/18f6d0457f9efb2fc6fa594857f92810cadb03024975726db6546b3d6fcf/fastapi-0.135.2-py3-none-any.whl", hash = "sha256:0af0447d541867e8db2a6a25c23a8c4bd80e2394ac5529bd87501bbb9e240ca5", size = 117407, upload-time = "2026-03-23T14:12:43.284Z" }, +] + [[package]] name = "fastjsonschema" version = "2.21.2" @@ -3754,6 +3776,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/51/e5/fecf13f06e5e5f67e8837d777d1bc43fac0ed2b77a676804df5c34744727/python_json_logger-4.0.0-py3-none-any.whl", hash = "sha256:af09c9daf6a813aa4cc7180395f50f2a9e5fa056034c9953aec92e381c5ba1e2", size = 15548, upload-time = "2025-10-06T04:15:17.553Z" }, ] +[[package]] +name = "python-multipart" +version = "0.0.22" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/01/979e98d542a70714b0cb2b6728ed0b7c46792b695e3eaec3e20711271ca3/python_multipart-0.0.22.tar.gz", hash = "sha256:7340bef99a7e0032613f56dc36027b959fd3b30a787ed62d310e951f7c3a3a58", size = 37612, upload-time = "2026-01-25T10:15:56.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1b/d0/397f9626e711ff749a95d96b7af99b9c566a9bb5129b8e4c10fc4d100304/python_multipart-0.0.22-py3-none-any.whl", hash = "sha256:2b2cd894c83d21bf49d702499531c7bafd057d730c201782048f7945d82de155", size = 24579, upload-time = "2026-01-25T10:15:54.811Z" }, +] + [[package]] name = "python-pptx" version = "1.0.2" @@ -4578,6 +4609,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521, upload-time = "2023-09-30T13:58:03.53Z" }, ] +[[package]] +name = "starlette" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/81/69/17425771797c36cded50b7fe44e850315d039f28b15901ab44839e70b593/starlette-1.0.0.tar.gz", hash = "sha256:6a4beaf1f81bb472fd19ea9b918b50dc3a77a6f2e190a12954b25e6ed5eea149", size = 2655289, upload-time = "2026-03-22T18:29:46.779Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl", hash = "sha256:d3ec55e0bb321692d275455ddfd3df75fff145d009685eb40dc91fc66b03d38b", size = 72651, upload-time = "2026-03-22T18:29:45.111Z" }, +] + [[package]] name = "sympy" version = "1.14.0" @@ -5057,6 +5101,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, ] +[[package]] +name = "uvicorn" +version = "0.42.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e3/ad/4a96c425be6fb67e0621e62d86c402b4a17ab2be7f7c055d9bd2f638b9e2/uvicorn-0.42.0.tar.gz", hash = "sha256:9b1f190ce15a2dd22e7758651d9b6d12df09a13d51ba5bf4fc33c383a48e1775", size = 85393, upload-time = "2026-03-16T06:19:50.077Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/89/f8827ccff89c1586027a105e5630ff6139a64da2515e24dafe860bd9ae4d/uvicorn-0.42.0-py3-none-any.whl", hash = "sha256:96c30f5c7abe6f74ae8900a70e92b85ad6613b745d4879eb9b16ccad15645359", size = 68830, upload-time = "2026-03-16T06:19:48.325Z" }, +] + [[package]] name = "virtualenv" version = "21.1.0"