From 8716cb2ffb79ba3f581ca367104a5f1fc4c9afc7 Mon Sep 17 00:00:00 2001 From: Baptiste Schelle Date: Tue, 24 Mar 2026 11:13:21 +0100 Subject: [PATCH 1/8] parsing updating DOI regex --- eu_fact_force/dash-app/utils/parsing.py | 222 ++++++++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 eu_fact_force/dash-app/utils/parsing.py diff --git a/eu_fact_force/dash-app/utils/parsing.py b/eu_fact_force/dash-app/utils/parsing.py new file mode 100644 index 0000000..d3f158e --- /dev/null +++ b/eu_fact_force/dash-app/utils/parsing.py @@ -0,0 +1,222 @@ +import base64 +from typing import Optional +import re +import fitz # PyMuPDF + +def load_svg_as_data_uri(svg_path: str) -> Optional[str]: + """Return a data URI for an SVG file, or None if not found.""" + try: + with open(svg_path, "rb") as f: + b64 = base64.b64encode(f.read()).decode("utf-8") + return f"data:image/svg+xml;base64,{b64}" + except FileNotFoundError: + return None + +def extract_doi_from_pdf(text: str) -> Optional[str]: + """Extract DOI from PDF text using regex pattern.""" + # Pattern for DOI: 10.xxxx/xxxxx + match = re.search(r'(?:doi[:\s]+)?(?:https?://)?(?:dx\.)?doi\.org/(10\.\S+)', text, re.IGNORECASE) + if match: + return match.group(1) if match.group(1).startswith('10.') else match.group(0) + + # Alternative pattern + match = re.search(r'10\.\d{4,}/\S+', text) + if match: + return match.group(0) + return None + + +def extract_abstract_from_pdf(text: str) -> Optional[str]: + """Extract abstract from PDF text.""" + # Look for "Abstract" section + abstract_pattern = r'(?:abstract|summary)\s*[:]*\s*(.+?)(?=(?:introduction|keywords|1\.\s|methods|methodology|introduction|related work|background)|\Z)' + match = re.search(abstract_pattern, text, re.IGNORECASE | re.DOTALL) + if match: + abstract_text = match.group(1).strip() + # Clean up and limit to reasonable length + abstract_text = re.sub(r'\s+', ' ', abstract_text)[:500] + return abstract_text if len(abstract_text) > 20 else None + return None + + +def extract_authors_from_pdf(text: str) -> list[dict]: + """Extract authors by finding the typical author line in scientific papers.""" + authors = [] + + def clean_name(name: str) -> str: + # Supprime chiffres, *, †, § collés au nom + return re.sub(r'[\d\*†‡§]+', '', name).strip() + + lines = text.split('\n')[:50] + + for line in lines: + line = line.strip() + + # Une ligne d'auteurs contient typiquement "and" ou une virgule + # et ressemble à des noms propres (Majuscule, pas trop longue) + if len(line) > 150 or len(line) < 5: + continue + if not re.search(r'\band\b|,', line): + continue + # Doit commencer par une majuscule + if not re.match(r'^[A-Z]', line): + continue + # Ne doit pas contenir de mots typiques de non-auteurs + skip_words = ['abstract', 'keywords', 'introduction', 'figure', + 'table', 'doi', 'http', 'university', 'institute', + 'open access', 'copyright', 'license', 'received'] + if any(w in line.lower() for w in skip_words): + continue + # Tous les "mots" (après nettoyage) doivent ressembler à des noms propres + # càd commencer par une majuscule ou être un chiffre/symbole + test_line = clean_name(line) + words = [w for w in re.split(r'[\s,]+', test_line) if w] + if not words: + continue + # Au moins 80% des mots doivent commencer par une majuscule + capitalized = sum(1 for w in words if re.match(r'^[A-Z]', w) or w.lower() == 'and') + if capitalized / len(words) < 0.8: + continue + + # C'est probablement une ligne d'auteurs — on parse + raw_names = re.split(r',\s*|\s+and\s+', line) + for raw in raw_names: + name = clean_name(raw).strip() + if not name or len(name) < 3: + continue + parts = name.split() + if len(parts) >= 2: + authors.append({ + "name": " ".join(parts[:-1]), + "surname": parts[-1], + "email": "" + }) + + # Rattache l'email du corresponding author + corr_match = re.search( + r'\*Correspondence[:\s]+([A-Z][a-z]+(?:[\s\-][A-Za-z\-]+)+)\s+([\w.\-]+@[\w.\-]+\.\w+)', + text + ) + if corr_match and authors: + corr_name = re.sub(r'[\d\*†‡§]+', '', corr_match.group(1)).strip() + corr_email = corr_match.group(2) + for author in authors: + full = f"{author['name']} {author['surname']}" + if corr_name in full or full in corr_name: + author['email'] = corr_email + + return authors[:10] + +def extract_date_from_pdf(text: str) -> Optional[str]: + """Extract publication date (year only) from PDF text.""" + # YYYY-MM-DD or YYYY/MM/DD + match = re.search(r'\b((?:19|20)\d{2})[-/.](?:0[1-9]|1[012])[-/.](?:0[1-9]|[12][0-9]|3[01])\b', text) + if match: + return match.group(1) + + # DD-MM-YYYY or DD/MM/YYYY + match = re.search(r'\b(?:0[1-9]|[12][0-9]|3[01])[-/.](?:0[1-9]|1[012])[-/.]((?:19|20)\d{2})\b', text) + + # Pattern: Month Year (e.g., "January 2023") + match = re.search(r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+((?:19|20)\d{2})\b', text, re.IGNORECASE) + if match: + return match.group(1) + + # We look for years typically appearing in headers or near "Copyright" or "Received" + # Just a year (between 1900 and 2099) + match = re.search(r'\b(19\d{2}|20\d{2})\b', text) + if match: + return match.group(1) + + return None + +def extract_journal_from_pdf(text: str) -> Optional[str]: + """Extract journal name from PDF text.""" + journal_patterns = [ + r'Published in\s*[:]?\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)', + r'Journal of\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)', + r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+\s+Journal)', + r'Source\s*[:]?\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)' + ] + for pattern in journal_patterns: + match = re.search(pattern, text) + if match: + return match.group(1).strip() + + # Try to find it in the first few lines if not found by pattern + lines = text.split('\n')[:15] + for line in lines: + line = line.strip() + if any(kw in line for kw in ["Journal", "Review", "Nature", "Science", "Lancet", "Medicine"]): + if len(line.split()) < 10: # Avoid long sentences + return line + return None + +def extract_link_from_pdf(text: str, doi: Optional[str] = None) -> Optional[str]: + """Extract article link from PDF text or DOI.""" + if doi: + return f"https://doi.org/{doi}" + + # Look for https links that might be the editor's link + links = re.findall(r'https?://(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-z]{2,6}\b(?:[-a-zA-Z0-9@:%_\+.~#?&//=]*)', text) + for link in links: + if any(domain in link for domain in ['sciencedirect', 'springer', 'wiley', 'nature.com', 'thelancet', 'bmj', 'frontiersin', 'plos', 'pubmed.ncbi.nlm.nih.gov'\ + , 'who.int', 'cdc.gov', 'acpjournals', 'nejm.org', 'jama.jamanetwork.com']): + return link + return links[0] if links else None + +def extract_text_by_blocks(uploaded_file_bytes) -> str: + doc = fitz.open(stream=uploaded_file_bytes, filetype="pdf") + full_text = "" + for page in doc[:3]: + # Trie les blocs par position verticale puis horizontale + blocks = page.get_text("blocks") + blocks.sort(key=lambda b: (round(b[1] / 20), b[0])) + for block in blocks: + full_text += block[4] + "\n" + return full_text + +def extract_title_from_pdf(text: str) -> Optional[str]: + """Try to extract the title from the first few lines of the PDF.""" + lines = [line.strip() for line in text.split('\n') if line.strip()] + if not lines: + return None + + # Typically the title is in the first few lines, is not too long, + # and doesn't contain certain keywords. + for line in lines[:10]: + # Skip lines that are likely not titles (e.g., journal names, DOI, authors) + if any(kw in line.lower() for kw in ["journal", "doi:", "http", "vol.", "issn", "received:", "accepted:", "copyright"]): + continue + # Titles are usually at least 3 words and not excessively long (e.g. < 250 chars) + if 3 <= len(line.split()) <= 40 and len(line) < 450: + return line + return None + +def extract_pdf_metadata(uploaded_file) -> dict: + """Extract metadata from PDF file.""" + metadata = { + "title": None, + "doi": None, + "abstract": None, + "publication_date": None, + "journal": None, + "article_link": None, + "authors": [] + } + try: + # Extract text from PDF + pdf_text = extract_text_by_blocks(uploaded_file.read()) + + # Extract metadata + metadata["title"] = extract_title_from_pdf(pdf_text) + metadata["doi"] = extract_doi_from_pdf(pdf_text) + metadata["abstract"] = extract_abstract_from_pdf(pdf_text) + metadata["authors"] = extract_authors_from_pdf(pdf_text) + metadata["publication_date"] = extract_date_from_pdf(pdf_text) + metadata["journal"] = extract_journal_from_pdf(pdf_text) + metadata["article_link"] = extract_link_from_pdf(pdf_text, metadata["doi"]) + + except Exception as e: + print(f"Error processing PDF: {e}") + return metadata From 2f52dcc7d233938df8422bd27e2f8baa65a6137d Mon Sep 17 00:00:00 2001 From: Baptiste Schelle Date: Fri, 27 Mar 2026 16:22:46 +0100 Subject: [PATCH 2/8] added parsing regex logic for metadata fields and metadata JSON generation --- eu_fact_force/dash-app/pages/ingest.py | 315 ++++++++++++++++++++++++- 1 file changed, 308 insertions(+), 7 deletions(-) diff --git a/eu_fact_force/dash-app/pages/ingest.py b/eu_fact_force/dash-app/pages/ingest.py index 40ba966..3c0f0bc 100644 --- a/eu_fact_force/dash-app/pages/ingest.py +++ b/eu_fact_force/dash-app/pages/ingest.py @@ -1,18 +1,319 @@ from dash import dcc, html +import dash_bootstrap_components as dbc from utils.colors import EUPHAColors +from utils.parsing import * + +logo_uri = load_png_as_data_uri("eupha-logo.svg") def make_layout(): - return html.Div( + #Sidebar + sidebar = html.Div( [ - html.H2("Ingestion"), - dcc.Markdown("Ingestion layout to be completed here..."), + html.Div( + [ + html.Img( + src=logo_uri, + style={ + "width": "100%", + "maxWidth": "220px", + "height": "auto", + "display": "block", + "margin": "0 auto 20px auto", + "marginTop": "40px" + } + ) if logo_uri else html.Div(), + + html.H3( + "EU Fact Force", + className="text-center", + style={ + "fontWeight": "700", + "fontSize": "1.9rem", + "marginBottom": "20px", + "color": "#212529" + } + ), + + html.Hr(style={"margin": "1.2rem 0"}), + + html.H5( + "How it works", + style={ + "fontWeight": "500", + "marginBottom": "12px", + "marginTop": "45px" + } + ), + + html.Ol( + [ + html.Li("Upload a PDF"), + html.Li("Validate DOI + abstract"), + html.Li("Validate authors"), + html.Li("Click Upload file") + ], + style={ + "paddingLeft": "1.2rem", + "marginLeft": "0", + "lineHeight": "1.8" + } + ), + ], + style={ + "maxWidth": "240px", + "margin": "0 auto" + } + ) ], style={ - "border-radius": "15px", - "padding": "20px", - "background-color": EUPHAColors.white, - }, + "padding": "2rem 1rem", + "backgroundColor": "#f5f7fa", + "height": "100vh", + "position": "fixed", + "top": 0, + "left": 0, + "width": "16%", + "borderRight": "1px solid #dee2e6" + } ) + + # Main page + main_content = html.Div( + [ + html.Div( + [ + html.H1( + "EU Fact Force - Article uploading page", + className="mb-3 text-center", + style={ + "fontWeight": "700", + "fontSize": "2.5rem", + "lineHeight": "1.15" + } + ), + html.H3( + "Welcome to EU Fact Force articles uploading pages", + className="text-center mb-4", + style={ + "color": "#6c757d", + "fontWeight": "500", + "fontSize": "1.5rem", + "lineHeight": "1.3" + } + ), + html.P( + "Thank you for collaborating with us, you will find here a page where you can upload and declare authors of your papers in attempt to build a safer and healthier community! Thank you for your contribution!", + className="text-center mb-5", + style={ + "maxWidth": "900px", + "margin": "0 auto", + "fontSize": "1.1rem", + "lineHeight": "1.7", + "color": "#212529" + } + ), + ], + style={ + "maxWidth": "1100px", + "margin": "0 auto 2rem auto" + } + ), + + dbc.Card([ + dbc.CardBody([ + html.H4( + "Upload & Metadatas", + className="card-title font-weight-bold mb-4" + ), + dcc.Upload( + id='upload-pdf', + children=html.Div(['Drop your article here or ', html.A('Select a PDF', className="font-weight-bold")]), + style={ + 'width': '100%', + 'height': '80px', + 'lineHeight': '80px', + 'borderWidth': '2px', + 'borderStyle': 'dashed', + 'borderColor': '#adb5bd', + 'textAlign': 'center', + 'borderRadius': '10px', + 'marginBottom': '20px', + 'backgroundColor': '#f8f9fa', + 'cursor': 'pointer' + } + ), + html.H5("General informations", className="mt-4 font-weight-bold"), + dbc.Row([ + dbc.Col([ + dbc.Label("Article Title"), + dbc.Input(id='input-title', type='text', placeholder="Title of the article", className="mb-3"), + + dbc.Row([ + dbc.Col([ + dbc.Label("Category"), + dcc.Dropdown( + id='input-category', + options=[ + {'label': 'Scientific Article', 'value': 'scientific_article'}, + {'label': 'Report', 'value': 'report'}, + {'label': 'Thesis', 'value': 'thesis'}, + {'label': 'Working Paper', 'value': 'working_paper'}, + {'label': 'Book Chapter', 'value': 'book_chapter'}, + {'label': 'Other', 'value': 'other'} + ], + value='scientific_article', + className="mb-3" + ), + ], width=6), + dbc.Col([ + dbc.Label("Study Type"), + dcc.Dropdown( + id='input-type', + options=[ + {'label': 'Meta-analysis', 'value': 'meta_analysis'}, + {'label': 'Systematic review', 'value': 'systematic_review'}, + {'label': 'Evidence review', 'value': 'evidence_review'}, + {'label': 'Cohort study', 'value': 'cohort_study'}, + {'label': 'Case-control study', 'value': 'case_control_study'}, + {'label': 'Cross-sectional study', 'value': 'cross_sectional_study'}, + {'label': 'Randomized controlled trial', 'value': 'rct'}, + {'label': 'Other', 'value': 'other'} + ], + className="mb-3" + ), + ], width=6), + ]), + dbc.Label("Journal / Source"), + dbc.Input(id='input-journal', type='text', placeholder="ex: The Lancet Public Health", className="mb-3"), + + dbc.Row([ + dbc.Col([ + dbc.Label("Publication Year"), + dbc.Input(id='input-date', type='text', placeholder="ex: 2023"), + ], width=6), + dbc.Col([ + dbc.Label("DOI"), + dbc.Input(id='input-doi', type='text', placeholder="ex: 10.1038/s41586-021-00000-x"), + ], width=6), + ], className="mb-3"), + + dbc.Label("Publication URL"), + dbc.Input(id='input-link', type='text', placeholder="https://pubmed.ncbi.nlm.nih.gov/...", className="mb-3"), + + dbc.Label("Abstract"), + dbc.Textarea(id='input-abstract', style={'height': 150}, placeholder="Lorem ipsum dolor sit amet"), + + dbc.Checkbox(id='chk-meta-correct', label="This information is correct", className="mt-3 font-weight-bold text-success"), + ], width=12) + ]), + ]) + ], className="mb-4 shadow-sm", style={"borderRadius": "16px"}), + + dbc.Card([ + dbc.CardBody([ + html.H4( + "Authors", + className="card-title font-weight-bold mb-4" + ), + html.Div(id='authors-container'), + dbc.Button( + "➕ Add an author", + id='btn-add-author', + n_clicks=0, + outline=True, + className="mt-3", + style={ + "color": "#3B6096", + "borderColor": "#3B6096", + "borderRadius": "10px", + "fontWeight": "500" + } + ), + html.Br(), + dbc.Checkbox(id='chk-authors-correct', label="Authors information is correct", className="mt-3 font-weight-bold text-success"), + ]) + ], className="mb-4 shadow-sm", style={"borderRadius": "16px"}), + + dbc.Button( + "Upload file", + id='btn-final-upload', + size="lg", + className="w-100 mb-4", + style={ + "backgroundColor": "#3B6096", + "borderColor": "#3B6096", + "color": "white", + "fontWeight": "600", + "borderRadius": "10px" + } + ), + + html.Div(id='final-output', className="mt-4 pb-5") + ], + style={ + "marginLeft": "16%", + "padding": "5rem 1.5rem 2rem 1.5rem", + "width": "84%", + "backgroundColor": "#ffffff" + } + ) + + return html.Div([ + dcc.Store(id='session-store', data={}), + sidebar, main_content], + style={"fontFamily": "system-ui, -apple-system, sans-serif", + "backgroundColor": "#f5f7fa"}) + + + + +def add_author_line(index, name="", surname="", email=""): + """One-click addition/suppression of a new author line""" + + return dbc.Card([ + dbc.CardBody([ + dbc.Row([ + dbc.Col( + dbc.Input( + id={'type': 'auth-name', 'index': index}, + value=name, + placeholder="Name" + ), + width=3 + ), + dbc.Col( + dbc.Input( + id={'type': 'auth-surname', 'index': index}, + value=surname, + placeholder="Surname" + ), + width=3 + ), + dbc.Col( + dbc.Input( + id={'type': 'auth-email', 'index': index}, + value=email, + placeholder="Email (Corresponding)" + ), + width=4 + ), + dbc.Col( + dbc.Button( + "Remove", + id={'type': 'remove-author', 'index': index}, + color="danger", + outline=True, + className="w-100", + style={ + "whiteSpace": "nowrap", + "minWidth": "100px" + } + ), + width=2 + ) + ], className="align-items-center g-2") + ], className="p-2") + ], className="mb-3 border-light shadow-sm") From aa0bb56f38b1f95d1b21d6f78469ff1163535302 Mon Sep 17 00:00:00 2001 From: Baptiste Schelle Date: Fri, 27 Mar 2026 16:24:18 +0100 Subject: [PATCH 3/8] added parsing regex logic for metadata fields and metadata JSON generation --- eu_fact_force/dash-app/utils/parsing.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/eu_fact_force/dash-app/utils/parsing.py b/eu_fact_force/dash-app/utils/parsing.py index d3f158e..3300782 100644 --- a/eu_fact_force/dash-app/utils/parsing.py +++ b/eu_fact_force/dash-app/utils/parsing.py @@ -3,12 +3,12 @@ import re import fitz # PyMuPDF -def load_svg_as_data_uri(svg_path: str) -> Optional[str]: - """Return a data URI for an SVG file, or None if not found.""" +def load_png_as_data_uri(png_path: str) -> Optional[str]: + """Return a data URI for an PNG file, or None if not found.""" try: - with open(svg_path, "rb") as f: + with open(png_path, "rb") as f: b64 = base64.b64encode(f.read()).decode("utf-8") - return f"data:image/svg+xml;base64,{b64}" + return f"data:image/png+xml;base64,{b64}" except FileNotFoundError: return None From 17d7e4218ebca1a7dfc87bcb2b7777e5cbc5af58 Mon Sep 17 00:00:00 2001 From: Baptiste Schelle Date: Fri, 27 Mar 2026 16:26:31 +0100 Subject: [PATCH 4/8] added ingest page to Dash app and associated callbacks --- eu_fact_force/dash-app/app.py | 158 +++++++++++++++++++++++++++++++++- 1 file changed, 156 insertions(+), 2 deletions(-) diff --git a/eu_fact_force/dash-app/app.py b/eu_fact_force/dash-app/app.py index a2d3cc5..e76f232 100644 --- a/eu_fact_force/dash-app/app.py +++ b/eu_fact_force/dash-app/app.py @@ -1,11 +1,14 @@ -from dash import Dash, dcc, html -from dash.dependencies import Input, Output, State +from dash import Dash, dcc, html, Input, Output, State, ALL, ctx from dash.exceptions import PreventUpdate import dash_bootstrap_components as dbc + import plotly.io as pio import plotly.graph_objects as go +import base64 +import io import json +import uuid from utils.colors import EUPHAColors from utils.graph import RandomGraphGenerator @@ -251,6 +254,157 @@ def toggle_offcanvas(node_data, is_open): ### Create here callbacks for ingestions +@app.callback( + Output('input-doi', 'value'), + Output('input-abstract', 'value'), + Output('input-journal', 'value'), + Output('input-date', 'value'), + Output('input-link', 'value'), + Output('input-title', 'value'), + Output('session-store', 'data'), + Input('upload-pdf', 'contents') +) +def handle_pdf_upload(contents): + + # TOI BE ADDRESSED + # if contents is None: + # return dash.no_update, dash.no_update, dash.no_update, dash.no_update, dash.no_update, dash.no_update, {} + + # decoding of passed PDFs + content_type, content_string = contents.split(',') + decoded = base64.b64decode(content_string) + + # extract_pdf_metadata call + metadata = ingest.extract_pdf_metadata(io.BytesIO(decoded)) + + return ( + metadata.get('doi', ''), + metadata.get('abstract', ''), + metadata.get('journal', ''), + metadata.get('publication_date', ''), + metadata.get('article_link', ''), + metadata.get('title', ''), + metadata + ) +@app.callback( + Output('authors-container', 'children'), + Input('btn-add-author', 'n_clicks'), + Input({'type': 'remove-author', 'index': ALL}, 'n_clicks'), + Input('session-store', 'data'), + State({'type': 'auth-name', 'index': ALL}, 'value'), + State({'type': 'auth-surname', 'index': ALL}, 'value'), + State({'type': 'auth-email', 'index': ALL}, 'value'), + State({'type': 'auth-name', 'index': ALL}, 'id'), +) +def update_authors_list(add_clicks, remove_clicks, metadata, names, surnames, emails, ids): + triggered = ctx.triggered_id + + # on a new pdf uplaod + if triggered == 'session-store' and metadata: + authors = metadata.get('authors', []) + return [ingest.add_author_line(str(uuid.uuid4()), a.get('name', ''), a.get('surname', ''), a.get('email', '')) for a in authors] + + # reconstructing authors list + current_authors = [] + if ids: + for idx_id, name, surname, email in zip(ids, names, surnames, emails): + current_authors.append({ + 'index': idx_id['index'], + 'name': name or "", + 'surname': surname or "", + 'email': email or "" + }) + + # if missing author + if triggered == 'btn-add-author': + current_authors.append({ + 'index': str(uuid.uuid4()), + 'name': "", + 'surname': "", + 'email': "" + }) + + # remove blank/irrelevant author field + if isinstance(triggered, dict) and triggered.get('type') == 'remove-author': + remove_index = triggered.get('index') + current_authors = [a for a in current_authors if a['index'] != remove_index] + + return [ingest.add_author_line(a['index'], a['name'], a['surname'], a['email']) for a in current_authors] + + +@app.callback( + Output('input-doi', 'disabled'), + Output('input-abstract', 'disabled'), + Output('input-journal', 'disabled'), + Output('input-date', 'disabled'), + Output('input-link', 'disabled'), + Output('input-category', 'disabled'), + Output('input-type', 'disabled'), + Output('input-title', 'disabled'), + Input('chk-meta-correct', 'value') +) +def lock_metadata(is_correct): + val = bool(is_correct) + return val, val, val, val, val, val, val, val + + +@app.callback( + Output({'type': 'auth-name', 'index': ALL}, 'disabled'), + Output({'type': 'auth-surname', 'index': ALL}, 'disabled'), + Output({'type': 'auth-email', 'index': ALL}, 'disabled'), + Output({'type': 'remove-author', 'index': ALL}, 'disabled'), + Output('btn-add-author', 'disabled'), + Input('chk-authors-correct', 'value'), + State({'type': 'auth-name', 'index': ALL}, 'id') +) +def lock_authors(is_correct, ids): + is_corr = bool(is_correct) + if not ids: + return [], [], [], [], is_corr + length = len(ids) + return [is_corr]*length, [is_corr]*length, [is_corr]*length, [is_corr]*length, is_corr + + +@app.callback( + Output('final-output', 'children'), + Input('btn-final-upload', 'n_clicks'), + State('input-doi', 'value'), + State('input-abstract', 'value'), + State('input-journal', 'value'), + State('input-date', 'value'), + State('input-link', 'value'), + State('input-category', 'value'), + State('input-type', 'value'), + State('input-title', 'value'), + State({'type': 'auth-name', 'index': ALL}, 'value'), + State({'type': 'auth-surname', 'index': ALL}, 'value'), + State({'type': 'auth-email', 'index': ALL}, 'value'), + prevent_initial_call=True +) +def finalize_and_display_json(n_clicks, doi, abstract, journal, date, link, category, study_type, title, names, surnames, emails): + + authors_list = [ + {"name": n, "surname": s, "email": e} + for n, s, e in zip(names, surnames, emails) if n or s + ] + + metadata_json = { + "title": title, + "category": category, + "study_type": study_type, + "journal": journal, + "publication_year": date, + "doi": doi, + "article_link": link, + "abstract": abstract, + "authors": authors_list + } + + return html.Div([ + dbc.Alert("Successfully contributed, thank you!", color="success"), + html.H4("Metadata JSON"), + html.Pre(json.dumps(metadata_json, indent=4), style={'backgroundColor': '#f8f9fa', 'padding': '15px', 'borderRadius': '8px', 'border': '1px solid #dee2e6'}) + ]) if __name__ == "__main__": app.run(debug=True) From 231330745ce75fe423191339d1d65db16e365a66 Mon Sep 17 00:00:00 2001 From: Baptiste Schelle Date: Mon, 30 Mar 2026 09:49:33 +0200 Subject: [PATCH 5/8] corrected parsing import function --- eu_fact_force/dash-app/app.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/eu_fact_force/dash-app/app.py b/eu_fact_force/dash-app/app.py index e76f232..345bf65 100644 --- a/eu_fact_force/dash-app/app.py +++ b/eu_fact_force/dash-app/app.py @@ -1,4 +1,4 @@ -from dash import Dash, dcc, html, Input, Output, State, ALL, ctx +from dash import Dash, dcc, html, Input, Output, State, ALL, ctx, no_update from dash.exceptions import PreventUpdate import dash_bootstrap_components as dbc @@ -12,6 +12,7 @@ from utils.colors import EUPHAColors from utils.graph import RandomGraphGenerator +from utils.parsing import extract_pdf_metadata from pages import readme, ingest, graph # Plotly template @@ -266,16 +267,15 @@ def toggle_offcanvas(node_data, is_open): ) def handle_pdf_upload(contents): - # TOI BE ADDRESSED - # if contents is None: - # return dash.no_update, dash.no_update, dash.no_update, dash.no_update, dash.no_update, dash.no_update, {} + if contents is None: + return no_update, no_update, no_update, no_update, no_update, no_update, {} # decoding of passed PDFs content_type, content_string = contents.split(',') decoded = base64.b64decode(content_string) # extract_pdf_metadata call - metadata = ingest.extract_pdf_metadata(io.BytesIO(decoded)) + metadata = extract_pdf_metadata(io.BytesIO(decoded)) return ( metadata.get('doi', ''), From b73de4f5dc8021794e02f0a0c5674ed7c8afbc3a Mon Sep 17 00:00:00 2001 From: Baptiste Schelle Date: Mon, 30 Mar 2026 09:50:37 +0200 Subject: [PATCH 6/8] homogeonized colors with EUPHA style sheet --- eu_fact_force/dash-app/pages/ingest.py | 31 +++++++------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/eu_fact_force/dash-app/pages/ingest.py b/eu_fact_force/dash-app/pages/ingest.py index 3c0f0bc..4bc34a0 100644 --- a/eu_fact_force/dash-app/pages/ingest.py +++ b/eu_fact_force/dash-app/pages/ingest.py @@ -2,10 +2,6 @@ import dash_bootstrap_components as dbc from utils.colors import EUPHAColors -from utils.parsing import * - -logo_uri = load_png_as_data_uri("eupha-logo.svg") - def make_layout(): @@ -14,17 +10,6 @@ def make_layout(): [ html.Div( [ - html.Img( - src=logo_uri, - style={ - "width": "100%", - "maxWidth": "220px", - "height": "auto", - "display": "block", - "margin": "0 auto 20px auto", - "marginTop": "40px" - } - ) if logo_uri else html.Div(), html.H3( "EU Fact Force", @@ -33,7 +18,7 @@ def make_layout(): "fontWeight": "700", "fontSize": "1.9rem", "marginBottom": "20px", - "color": "#212529" + "color": EUPHAColors.dark_blue } ), @@ -70,7 +55,7 @@ def make_layout(): ], style={ "padding": "2rem 1rem", - "backgroundColor": "#f5f7fa", + "backgroundColor": EUPHAColors.white, "height": "100vh", "position": "fixed", "top": 0, @@ -98,7 +83,7 @@ def make_layout(): "Welcome to EU Fact Force articles uploading pages", className="text-center mb-4", style={ - "color": "#6c757d", + "color": EUPHAColors.black, "fontWeight": "500", "fontSize": "1.5rem", "lineHeight": "1.3" @@ -112,7 +97,7 @@ def make_layout(): "margin": "0 auto", "fontSize": "1.1rem", "lineHeight": "1.7", - "color": "#212529" + "color": EUPHAColors.black } ), ], @@ -137,11 +122,11 @@ def make_layout(): 'lineHeight': '80px', 'borderWidth': '2px', 'borderStyle': 'dashed', - 'borderColor': '#adb5bd', + 'borderColor': EUPHAColors.dark_blue, 'textAlign': 'center', 'borderRadius': '10px', 'marginBottom': '20px', - 'backgroundColor': '#f8f9fa', + 'backgroundColor': EUPHAColors.white, 'cursor': 'pointer' } ), @@ -243,8 +228,8 @@ def make_layout(): size="lg", className="w-100 mb-4", style={ - "backgroundColor": "#3B6096", - "borderColor": "#3B6096", + "backgroundColor": EUPHAColors.dark_blue, + "borderColor": EUPHAColors.dark_blue, "color": "white", "fontWeight": "600", "borderRadius": "10px" From f98d96976ba6346348530a074eb8a782233c13cd Mon Sep 17 00:00:00 2001 From: Baptiste Schelle Date: Mon, 30 Mar 2026 10:15:15 +0200 Subject: [PATCH 7/8] added necessary dependencies for parsing and grouped 'em un toml file --- pyproject.toml | 10 +++ uv.lock | 168 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 178 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index dc3cd89..f9e31cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,10 @@ dependencies = [ "django-storages[s3]>=1.14", "boto3>=1.34", "gunicorn>=25.1.0", + "dash>=4.1.0", + "dash-bootstrap-components>=2.0.4", + "dash-cytoscape>=1.0.2", + "pymupdf>=1.27.1", ] [tool.pytest.ini_options] @@ -30,6 +34,12 @@ dev = [ "ruff>=0.15.0", "seaborn>=0.13.2", ] +graph = [ + "dash>=4.0.0", + "dash-bootstrap-components>=2.0.4", + "dash-cytoscape>=1.0.2", + "pymupdf>=1.27.1", +] parsing = [ "docling>=2.73.1", "docling-hierarchical-pdf>=0.1.3", diff --git a/uv.lock b/uv.lock index ee70200..dcdeaf3 100644 --- a/uv.lock +++ b/uv.lock @@ -12,11 +12,15 @@ version = "0.1.0" source = { virtual = "." } dependencies = [ { name = "boto3" }, + { name = "dash" }, + { name = "dash-bootstrap-components" }, + { name = "dash-cytoscape" }, { name = "django" }, { name = "django-storages", extra = ["s3"] }, { name = "gunicorn" }, { name = "pgvector" }, { name = "psycopg", extra = ["binary"] }, + { name = "pymupdf" }, { name = "python-dotenv" }, { name = "sentence-transformers" }, ] @@ -32,6 +36,12 @@ dev = [ { name = "ruff" }, { name = "seaborn" }, ] +graph = [ + { name = "dash" }, + { name = "dash-bootstrap-components" }, + { name = "dash-cytoscape" }, + { name = "pymupdf" }, +] parsing = [ { name = "docling" }, { name = "docling-hierarchical-pdf" }, @@ -45,11 +55,15 @@ parsing = [ [package.metadata] requires-dist = [ { name = "boto3", specifier = ">=1.34" }, + { name = "dash", specifier = ">=4.1.0" }, + { name = "dash-bootstrap-components", specifier = ">=2.0.4" }, + { name = "dash-cytoscape", specifier = ">=1.0.2" }, { name = "django", specifier = ">=6.0.2" }, { name = "django-storages", extras = ["s3"], specifier = ">=1.14" }, { name = "gunicorn", specifier = ">=25.1.0" }, { name = "pgvector", specifier = ">=0.2.4" }, { name = "psycopg", extras = ["binary"], specifier = ">=3.2" }, + { name = "pymupdf", specifier = ">=1.27.1" }, { name = "python-dotenv", specifier = ">=1.0" }, { name = "sentence-transformers", specifier = ">=5.2.3" }, ] @@ -65,6 +79,12 @@ dev = [ { name = "ruff", specifier = ">=0.15.0" }, { name = "seaborn", specifier = ">=0.13.2" }, ] +graph = [ + { name = "dash", specifier = ">=4.0.0" }, + { name = "dash-bootstrap-components", specifier = ">=2.0.4" }, + { name = "dash-cytoscape", specifier = ">=1.0.2" }, + { name = "pymupdf", specifier = ">=1.27.1" }, +] parsing = [ { name = "docling", specifier = ">=2.73.1" }, { name = "docling-hierarchical-pdf", specifier = ">=0.1.3" }, @@ -403,6 +423,15 @@ css = [ { name = "tinycss2" }, ] +[[package]] +name = "blinker" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/21/28/9b3f50ce0e048515135495f198351908d99540d69bfdc8c1d15b73dc55ce/blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf", size = 22460, upload-time = "2024-11-08T17:25:47.436Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458, upload-time = "2024-11-08T17:25:46.184Z" }, +] + [[package]] name = "boto3" version = "1.42.59" @@ -703,6 +732,47 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" }, ] +[[package]] +name = "dash" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "flask" }, + { name = "importlib-metadata" }, + { name = "nest-asyncio" }, + { name = "plotly" }, + { name = "requests" }, + { name = "retrying" }, + { name = "setuptools" }, + { name = "typing-extensions" }, + { name = "werkzeug" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/44/da/a13ae3a6528bd51a6901461dbff4549c6009de203d6249a89b9a09ac5cfb/dash-4.1.0.tar.gz", hash = "sha256:17a92a87b0c1eacc025079a705e44e72cd4c5794629c0a2909942b611faeb595", size = 6927689, upload-time = "2026-03-23T20:39:47.578Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/00/10b1f8b3885fc4add1853e9603af15c593fa0be20d37c158c4d811e868dc/dash-4.1.0-py3-none-any.whl", hash = "sha256:1af9f302bc14061061012cdb129b7e370d3604b12a7f730b252ad8e4966f01f7", size = 7232489, upload-time = "2026-03-23T20:39:40.658Z" }, +] + +[[package]] +name = "dash-bootstrap-components" +version = "2.0.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dash" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cc/d4/5b7da808ff5acb3a6ca702f504d8ef05bc7d4c475b18dadefd783b1120c3/dash_bootstrap_components-2.0.4.tar.gz", hash = "sha256:c3206c0923774bbc6a6ddaa7822b8d9aa5326b0d3c1e7cd795cc975025fe2484", size = 115599, upload-time = "2025-08-20T19:42:09.449Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d6/38/1efeec8b4d741c09ccd169baf8a00c07a0176b58e418d4cd0c30dffedd22/dash_bootstrap_components-2.0.4-py3-none-any.whl", hash = "sha256:767cf0084586c1b2b614ccf50f79fe4525fdbbf8e3a161ed60016e584a14f5d1", size = 204044, upload-time = "2025-08-20T19:42:07.928Z" }, +] + +[[package]] +name = "dash-cytoscape" +version = "1.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dash" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ea/b7/0d511af853024241dc3192bea77e4753ea606187bd2dd777a8209a5b01bb/dash_cytoscape-1.0.2.tar.gz", hash = "sha256:a61019d2184d63a2b3b5c06d056d3b867a04223a674cc3c7cf900a561a9a59aa", size = 3992593, upload-time = "2024-07-15T11:39:06.185Z" } + [[package]] name = "dataclasses-json" version = "0.6.7" @@ -1033,6 +1103,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/18/79/1b8fa1bb3568781e84c9200f951c735f3f157429f44be0495da55894d620/filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25", size = 19970, upload-time = "2022-11-02T17:34:01.425Z" }, ] +[[package]] +name = "flask" +version = "3.1.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "blinker" }, + { name = "click" }, + { name = "itsdangerous" }, + { name = "jinja2" }, + { name = "markupsafe" }, + { name = "werkzeug" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/26/00/35d85dcce6c57fdc871f3867d465d780f302a175ea360f62533f12b27e2b/flask-3.1.3.tar.gz", hash = "sha256:0ef0e52b8a9cd932855379197dd8f94047b359ca0a78695144304cb45f87c9eb", size = 759004, upload-time = "2026-02-19T05:00:57.678Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7f/9c/34f6962f9b9e9c71f6e5ed806e0d0ff03c9d1b0b2340088a0cf4bce09b18/flask-3.1.3-py3-none-any.whl", hash = "sha256:f4bcbefc124291925f1a26446da31a5178f9483862233b23c0c96a20701f670c", size = 103424, upload-time = "2026-02-19T05:00:56.027Z" }, +] + [[package]] name = "fonttools" version = "4.61.1" @@ -1232,6 +1319,7 @@ dependencies = [ { name = "griffecli" }, { name = "griffelib" }, ] +sdist = { url = "https://files.pythonhosted.org/packages/04/56/28a0accac339c164b52a92c6cfc45a903acc0c174caa5c1713803467b533/griffe-2.0.0.tar.gz", hash = "sha256:c68979cd8395422083a51ea7cf02f9c119d889646d99b7b656ee43725de1b80f", size = 293906, upload-time = "2026-03-23T21:06:53.402Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/8b/94/ee21d41e7eb4f823b94603b9d40f86d3c7fde80eacc2c3c71845476dddaa/griffe-2.0.0-py3-none-any.whl", hash = "sha256:5418081135a391c3e6e757a7f3f156f1a1a746cc7b4023868ff7d5e2f9a980aa", size = 5214, upload-time = "2026-02-09T19:09:44.105Z" }, ] @@ -1244,6 +1332,7 @@ dependencies = [ { name = "colorama" }, { name = "griffelib" }, ] +sdist = { url = "https://files.pythonhosted.org/packages/a4/f8/2e129fd4a86e52e58eefe664de05e7d502decf766e7316cc9e70fdec3e18/griffecli-2.0.0.tar.gz", hash = "sha256:312fa5ebb4ce6afc786356e2d0ce85b06c1c20d45abc42d74f0cda65e159f6ef", size = 56213, upload-time = "2026-03-23T21:06:54.8Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/e6/ed/d93f7a447bbf7a935d8868e9617cbe1cadf9ee9ee6bd275d3040fbf93d60/griffecli-2.0.0-py3-none-any.whl", hash = "sha256:9f7cd9ee9b21d55e91689358978d2385ae65c22f307a63fb3269acf3f21e643d", size = 9345, upload-time = "2026-02-09T19:09:42.554Z" }, ] @@ -1252,6 +1341,7 @@ wheels = [ name = "griffelib" version = "2.0.0" source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ad/06/eccbd311c9e2b3ca45dbc063b93134c57a1ccc7607c5e545264ad092c4a9/griffelib-2.0.0.tar.gz", hash = "sha256:e504d637a089f5cab9b5daf18f7645970509bf4f53eda8d79ed71cce8bd97934", size = 166312, upload-time = "2026-03-23T21:06:55.954Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/4d/51/c936033e16d12b627ea334aaaaf42229c37620d0f15593456ab69ab48161/griffelib-2.0.0-py3-none-any.whl", hash = "sha256:01284878c966508b6d6f1dbff9b6fa607bc062d8261c5c7253cb285b06422a7f", size = 142004, upload-time = "2026-02-09T19:09:40.561Z" }, ] @@ -1374,6 +1464,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, ] +[[package]] +name = "importlib-metadata" +version = "9.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "zipp" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a9/01/15bb152d77b21318514a96f43af312635eb2500c96b55398d020c93d86ea/importlib_metadata-9.0.0.tar.gz", hash = "sha256:a4f57ab599e6a2e3016d7595cfd72eb4661a5106e787a95bcc90c7105b831efc", size = 56405, upload-time = "2026-03-20T06:42:56.999Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/3d/2d244233ac4f76e38533cfcb2991c9eb4c7bf688ae0a036d30725b8faafe/importlib_metadata-9.0.0-py3-none-any.whl", hash = "sha256:2d21d1cc5a017bd0559e36150c21c830ab1dc304dedd1b7ea85d20f45ef3edd7", size = 27789, upload-time = "2026-03-20T06:42:55.665Z" }, +] + [[package]] name = "inflection" version = "0.5.1" @@ -1477,6 +1579,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7b/55/e5326141505c5d5e34c5e0935d2908a74e4561eca44108fbfb9c13d2911a/isoduration-20.11.0-py3-none-any.whl", hash = "sha256:b2904c2a4228c3d44f409c8ae8e2370eb21a26f7ac2ec5446df141dde3452042", size = 11321, upload-time = "2020-11-01T10:59:58.02Z" }, ] +[[package]] +name = "itsdangerous" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9c/cb/8ac0172223afbccb63986cc25049b154ecfb5e85932587206f42317be31d/itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173", size = 54410, upload-time = "2024-04-16T21:28:15.614Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/96/92447566d16df59b2a776c0fb82dbc4d9e07cd95062562af01e408583fc4/itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef", size = 16234, upload-time = "2024-04-16T21:28:14.499Z" }, +] + [[package]] name = "jedi" version = "0.19.2" @@ -2423,6 +2534,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, ] +[[package]] +name = "narwhals" +version = "2.18.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/59/96/45218c2fdec4c9f22178f905086e85ef1a6d63862dcc3cd68eb60f1867f5/narwhals-2.18.1.tar.gz", hash = "sha256:652a1fcc9d432bbf114846688884c215f17eb118aa640b7419295d2f910d2a8b", size = 620578, upload-time = "2026-03-24T15:11:25.456Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/c3/06490e98393dcb4d6ce2bf331a39335375c300afaef526897881fbeae6ab/narwhals-2.18.1-py3-none-any.whl", hash = "sha256:a0a8bb80205323851338888ba3a12b4f65d352362c8a94be591244faf36504ad", size = 444952, upload-time = "2026-03-24T15:11:23.801Z" }, +] + [[package]] name = "nbclient" version = "0.10.4" @@ -2976,6 +3096,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/48/31/05e764397056194206169869b50cf2fee4dbbbc71b344705b9c0d878d4d8/platformdirs-4.9.2-py3-none-any.whl", hash = "sha256:9170634f126f8efdae22fb58ae8a0eaa86f38365bc57897a6c4f781d1f5875bd", size = 21168, upload-time = "2026-02-16T03:56:08.891Z" }, ] +[[package]] +name = "plotly" +version = "6.6.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "narwhals" }, + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/24/fb/41efe84970cfddefd4ccf025e2cbfafe780004555f583e93dba3dac2cdef/plotly-6.6.0.tar.gz", hash = "sha256:b897f15f3b02028d69f755f236be890ba950d0a42d7dfc619b44e2d8cea8748c", size = 7027956, upload-time = "2026-03-02T21:10:25.321Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/52/d2/c6e44dba74f17c6216ce1b56044a9b93a929f1c2d5bdaff892512b260f5e/plotly-6.6.0-py3-none-any.whl", hash = "sha256:8d6daf0f87412e0c0bfe72e809d615217ab57cc715899a1e5145135a7800d1d0", size = 9910315, upload-time = "2026-03-02T21:10:18.131Z" }, +] + [[package]] name = "pluggy" version = "1.6.0" @@ -3906,6 +4039,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" }, ] +[[package]] +name = "retrying" +version = "1.4.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c8/5a/b17e1e257d3e6f2e7758930e1256832c9ddd576f8631781e6a072914befa/retrying-1.4.2.tar.gz", hash = "sha256:d102e75d53d8d30b88562d45361d6c6c934da06fab31bd81c0420acb97a8ba39", size = 11411, upload-time = "2025-08-03T03:35:25.189Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/67/f3/6cd296376653270ac1b423bb30bd70942d9916b6978c6f40472d6ac038e7/retrying-1.4.2-py3-none-any.whl", hash = "sha256:bbc004aeb542a74f3569aeddf42a2516efefcdaff90df0eb38fbfbf19f179f59", size = 10859, upload-time = "2025-08-03T03:35:23.829Z" }, +] + [[package]] name = "rfc3339-validator" version = "0.1.4" @@ -4616,6 +4758,11 @@ dependencies = [ wheels = [ { url = "https://files.pythonhosted.org/packages/d3/54/a2ba279afcca44bbd320d4e73675b282fcee3d81400ea1b53934efca6462/torch-2.10.0-2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:13ec4add8c3faaed8d13e0574f5cd4a323c11655546f91fbe6afa77b57423574", size = 79498202, upload-time = "2026-02-10T21:44:52.603Z" }, { url = "https://files.pythonhosted.org/packages/ec/23/2c9fe0c9c27f7f6cb865abcea8a4568f29f00acaeadfc6a37f6801f84cb4/torch-2.10.0-2-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:e521c9f030a3774ed770a9c011751fb47c4d12029a3d6522116e48431f2ff89e", size = 79498254, upload-time = "2026-02-10T21:44:44.095Z" }, + { url = "https://files.pythonhosted.org/packages/b3/7a/abada41517ce0011775f0f4eacc79659bc9bc6c361e6bfe6f7052a6b9363/torch-2.10.0-3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:98c01b8bb5e3240426dcde1446eed6f40c778091c8544767ef1168fc663a05a6", size = 915622781, upload-time = "2026-03-11T14:17:11.354Z" }, + { url = "https://files.pythonhosted.org/packages/ab/c6/4dfe238342ffdcec5aef1c96c457548762d33c40b45a1ab7033bb26d2ff2/torch-2.10.0-3-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:80b1b5bfe38eb0e9f5ff09f206dcac0a87aadd084230d4a36eea5ec5232c115b", size = 915627275, upload-time = "2026-03-11T14:16:11.325Z" }, + { url = "https://files.pythonhosted.org/packages/d8/f0/72bf18847f58f877a6a8acf60614b14935e2f156d942483af1ffc081aea0/torch-2.10.0-3-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:46b3574d93a2a8134b3f5475cfb98e2eb46771794c57015f6ad1fb795ec25e49", size = 915523474, upload-time = "2026-03-11T14:17:44.422Z" }, + { url = "https://files.pythonhosted.org/packages/f4/39/590742415c3030551944edc2ddc273ea1fdfe8ffb2780992e824f1ebee98/torch-2.10.0-3-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:b1d5e2aba4eb7f8e87fbe04f86442887f9167a35f092afe4c237dfcaaef6e328", size = 915632474, upload-time = "2026-03-11T14:15:13.666Z" }, + { url = "https://files.pythonhosted.org/packages/b6/8e/34949484f764dde5b222b7fe3fede43e4a6f0da9d7f8c370bb617d629ee2/torch-2.10.0-3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:0228d20b06701c05a8f978357f657817a4a63984b0c90745def81c18aedfa591", size = 915523882, upload-time = "2026-03-11T14:14:46.311Z" }, { url = "https://files.pythonhosted.org/packages/cc/af/758e242e9102e9988969b5e621d41f36b8f258bb4a099109b7a4b4b50ea4/torch-2.10.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:5fd4117d89ffd47e3dcc71e71a22efac24828ad781c7e46aaaf56bf7f2796acf", size = 145996088, upload-time = "2026-01-21T16:24:44.171Z" }, { url = "https://files.pythonhosted.org/packages/23/8e/3c74db5e53bff7ed9e34c8123e6a8bfef718b2450c35eefab85bb4a7e270/torch-2.10.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:787124e7db3b379d4f1ed54dd12ae7c741c16a4d29b49c0226a89bea50923ffb", size = 915711952, upload-time = "2026-01-21T16:23:53.503Z" }, { url = "https://files.pythonhosted.org/packages/6e/01/624c4324ca01f66ae4c7cd1b74eb16fb52596dce66dbe51eff95ef9e7a4c/torch-2.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:2c66c61f44c5f903046cc696d088e21062644cbe541c7f1c4eaae88b2ad23547", size = 113757972, upload-time = "2026-01-21T16:24:39.516Z" }, @@ -4961,6 +5108,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/34/db/b10e48aa8fff7407e67470363eac595018441cf32d5e1001567a7aeba5d2/websocket_client-1.9.0-py3-none-any.whl", hash = "sha256:af248a825037ef591efbf6ed20cc5faa03d3b47b9e5a2230a529eeee1c1fc3ef", size = 82616, upload-time = "2025-10-07T21:16:34.951Z" }, ] +[[package]] +name = "werkzeug" +version = "3.1.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b5/43/76ded108b296a49f52de6bac5192ca1c4be84e886f9b5c9ba8427d9694fd/werkzeug-3.1.7.tar.gz", hash = "sha256:fb8c01fe6ab13b9b7cdb46892b99b1d66754e1d7ab8e542e865ec13f526b5351", size = 875700, upload-time = "2026-03-24T01:08:07.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7f/b2/0bba9bbb4596d2d2f285a16c2ab04118f6b957d8441566e1abb892e6a6b2/werkzeug-3.1.7-py3-none-any.whl", hash = "sha256:4b314d81163a3e1a169b6a0be2a000a0e204e8873c5de6586f453c55688d422f", size = 226295, upload-time = "2026-03-24T01:08:06.133Z" }, +] + [[package]] name = "widgetsnbextension" version = "4.0.15" @@ -5136,3 +5295,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/51/47/3fa2286c3cb162c71cdb34c4224d5745a1ceceb391b2bd9b19b668a8d724/yarl-1.23.0-cp314-cp314t-win_arm64.whl", hash = "sha256:44bb7bef4ea409384e3f8bc36c063d77ea1b8d4a5b2706956c0d6695f07dcc25", size = 86041, upload-time = "2026-03-01T22:07:49.026Z" }, { url = "https://files.pythonhosted.org/packages/69/68/c8739671f5699c7dc470580a4f821ef37c32c4cb0b047ce223a7f115757f/yarl-1.23.0-py3-none-any.whl", hash = "sha256:a2df6afe50dea8ae15fa34c9f824a3ee958d785fd5d089063d960bae1daa0a3f", size = 48288, upload-time = "2026-03-01T22:07:51.388Z" }, ] + +[[package]] +name = "zipp" +version = "3.23.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload-time = "2025-06-08T17:06:39.4Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" }, +] From 20e24e6198607f4b20e34d2bebb7f32162138b1e Mon Sep 17 00:00:00 2001 From: Baptiste Schelle Date: Mon, 30 Mar 2026 10:18:29 +0200 Subject: [PATCH 8/8] collapsing both graph entries in TOML --- pyproject.toml | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 09c6420..e3a170c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,12 +34,6 @@ dev = [ "ruff>=0.15.0", "seaborn>=0.13.2", ] -graph = [ - "dash>=4.0.0", - "dash-bootstrap-components>=2.0.4", - "dash-cytoscape>=1.0.2", - "pymupdf>=1.27.1", -] parsing = [ "docling>=2.73.1", "docling-hierarchical-pdf>=0.1.3", @@ -53,4 +47,5 @@ graph = [ "dash>=4.0.0", "dash-bootstrap-components>=2.0.4", "dash-cytoscape>=1.0.2", -] \ No newline at end of file + "pymupdf>=1.27.1" +]