Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
7305e60
Travail sur geoloc - proposition solution calcul distance Point-Littoral
Feb 6, 2026
4ddaf4c
Force existence of TAXREF_v18_2025 directory
Feb 9, 2026
0c7bdf9
Add taxref url in init file
Feb 9, 2026
a50218e
Check taxref.txt existence, download the archive and extract the taxo…
Feb 9, 2026
6d1a47e
Warning about temp url for TaxRef file
Feb 19, 2026
b2eae98
Store taxref zip archive in a temp directory
Feb 19, 2026
611ea66
Remove specific directory for taxref
Feb 19, 2026
7850388
File download and extraction pushed to next level of abstraciton
Feb 26, 2026
05b1bcb
Useless whitespace
Feb 26, 2026
44d1dc6
Enrichissement des infos des communes + check distance à la côte
Feb 27, 2026
b6a7522
feat: ajout des fonctions pour l'ingestion API
alexpetit Mar 1, 2026
fc34dcc
Merge pull request #9 from cyrilbecot/dataing/taxref/check_existence
cgoudet Mar 4, 2026
ab79393
Merge branch 'main' into fix_geoloc
cgoudet Mar 4, 2026
12b36d1
add basic test
cgoudet Mar 4, 2026
0f3f545
fix precommit
cgoudet Mar 4, 2026
d1a0c88
Modifs geoloc.py + ajout tests unitaires
Mar 5, 2026
20be62e
dernieres modifs - typo
Mar 5, 2026
9eac06d
Merge pull request #8 from dataforgoodfr/fix_geoloc
TimCo31 Mar 5, 2026
f2be0e8
nouvelle API URL, modification des champs avec tous les champs de l'A…
alexpetit Mar 15, 2026
a04b0ed
Fix pre-commit issues and update API processing
alexpetit Mar 17, 2026
c464a05
Features : Lien Doris scrapping
TimCo31 Mar 19, 2026
b9414c9
Changement de l'API + ajout de postgre
alexpetit Mar 22, 2026
d38fac8
Add README
alexpetit Mar 22, 2026
ed5bb5c
Fix uv.lock after merge conflicts
alexpetit Mar 22, 2026
4d48e8b
Merge pull request #19 from dataforgoodfr/lien_doris
Hpoinseaux Mar 23, 2026
d8edfb0
Merge branch 'main' into feature/biolit-api-fix
Hpoinseaux Mar 23, 2026
b2aede9
Merge pull request #18 from alexpetit/feature/biolit-api-fix
Hpoinseaux Mar 23, 2026
8116a5f
Bioclip + MLP entrainé sur les niveaux hierarchiques superieurs + Pro…
Mar 31, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ cover/
local_settings.py
db.sqlite3
db.sqlite3-journal

# autres
test.py
# Flask stuff:
instance/
.webassets-cache
Expand Down Expand Up @@ -170,4 +171,8 @@ images/
labels/
failed_downloads.csv
metadata.csv
data.yaml
data.yaml

# orchestrateur - tests Timothée
orchestrateur/
flows/
12 changes: 12 additions & 0 deletions biolit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,18 @@
RAWDIR = DATADIR / "raw"
EXPORTDIR = DATADIR / "exports"

DATA_GOUV_CONTOUR_COMMUNES_URL = (
"https://www.data.gouv.fr/api/1/datasets/r/00c0c560-3ad1-4a62-9a29-c34c98c3701e"
)
DATA_GOUV_INFO_COMMUNES_URL = (
"https://www.data.gouv.fr/api/1/datasets/r/f5df602b-3800-44d7-b2df-fa40a0350325"
)
WORLD_COAST_LINES_URL = (
"https://osmdata.openstreetmap.de/download/coastlines-split-4326.zip"
)
# /!\ URL emporaire de l'archive le temps que le museum corrige sa cybersec...
TAXREFURL = "https://assets.patrinat.fr/files/referentiel/TAXREF_v18_2025.zip"


mpl.rcParams["axes.spines.right"] = False
mpl.rcParams["axes.spines.top"] = False
89 changes: 89 additions & 0 deletions biolit/export_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import requests
import polars as pl
import structlog
import re
import os

LOGGER = structlog.get_logger()

# ------------------------------
# FETCH API
# ------------------------------
def fetch_biolit_from_api():

url = os.getenv("BIOLIT_API_URL")

response = requests.get(url)
Comment on lines +15 to +16
Copy link

Copilot AI Apr 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fetch_biolit_from_api() appelle requests.get(url) sans vérifier que BIOLIT_API_URL est défini et sans timeout. Si l’env var est absente (cas typique en CI/dev), requests plantera avec une erreur peu explicite. Ajoutez une validation explicite (raise ValueError avec message) et un timeout= (éventuellement raise_for_status() est déjà ok).

Suggested change
response = requests.get(url)
if not url:
raise ValueError("BIOLIT_API_URL environment variable is not set")
response = requests.get(url, timeout=30)

Copilot uses AI. Check for mistakes.
response.raise_for_status()

data = response.json()

print(f"{len(data)} observations récupérées")
return data

# ------------------------------
# RENAME OF COLUMNS
# ------------------------------


def normalize_column_name(col: str) -> str:
"""Convertit les noms API en snake_case propre FR"""
col = col.lower()
col = col.replace("-", "_")
col = col.replace(" ", "_")
col = col.replace("é", "e").replace("è", "e").replace("à", "a")
col = col.replace("ù", "u").replace("ô", "o")
col = re.sub(r"[^a-z0-9_]", "", col)
return col


COLUMN_MAPPING = {
"id": "id_observation",
"date": "date_observation",
"link": "lien_observation",
"author": "observateur",
"_url_sortie": "url_sortie",
"espece-identifiee": "espece_identifiee",
"heure-debut": "heure_debut",
"heure-fin": "heure_fin",
"latitude": "latitude",
"longitude": "longitude",
"photos": "photos",
"relais": "relais",
"espece_id": "id_espece",
"espece": "nom_scientifique",
"common": "nom_commun",
"categorie-programme": "categorie_programme",
"programme": "programme",
}


# ------------------------------
# ADAPT API -> PARQUET
# ------------------------------
def adapt_api_to_dataframe(data: list) -> pl.DataFrame:
rows = []

for item in data:
new_row = {}

for key, value in item.items():
# mapping si connu, sinon normalisation auto
new_key = COLUMN_MAPPING.get(key, normalize_column_name(key))
new_row[new_key] = value

rows.append(new_row)

df = pl.DataFrame(rows)

return df


# ------------------------------
# LOAD (Fetch + Adapt)
# ------------------------------
def load_biolit_from_api() -> pl.DataFrame:
raw_data = fetch_biolit_from_api()
df = adapt_api_to_dataframe(raw_data)
return df

Loading
Loading