diff --git a/.gitignore b/.gitignore index de57f6b..0f9b454 100644 --- a/.gitignore +++ b/.gitignore @@ -127,4 +127,5 @@ example_bboxs.default.json cache-directory runs .DS_Store -**annotation_api/outputs \ No newline at end of file +**annotation_api/outputs +.claude/ diff --git a/annotation_api/scripts/data_transfer/ingestion/platform/batch_import_local_yolo.py b/annotation_api/scripts/data_transfer/ingestion/platform/batch_import_local_yolo.py index a2113b8..f616d88 100644 --- a/annotation_api/scripts/data_transfer/ingestion/platform/batch_import_local_yolo.py +++ b/annotation_api/scripts/data_transfer/ingestion/platform/batch_import_local_yolo.py @@ -170,7 +170,12 @@ def build_command( "--loglevel", args.loglevel, ] - azimuth_raw = row.get("sequence_azimuth") or row.get("detection_azimuth") or "" + azimuth_raw = ( + row.get("camera_azimuth") + or row.get("sequence_azimuth") # legacy CSVs + or row.get("detection_azimuth") + or "" + ) try: cmd.extend(["--azimuth", str(int(float(azimuth_raw)))]) except ValueError: diff --git a/annotation_api/scripts/data_transfer/ingestion/platform/client.py b/annotation_api/scripts/data_transfer/ingestion/platform/client.py index 15fc930..8a06947 100644 --- a/annotation_api/scripts/data_transfer/ingestion/platform/client.py +++ b/annotation_api/scripts/data_transfer/ingestion/platform/client.py @@ -84,18 +84,27 @@ def make_request_headers(access_token: str) -> dict[str, str]: } -def list_cameras(api_endpoint: str, access_token: str) -> list[dict]: +def list_cameras( + api_endpoint: str, + access_token: str, + include_non_trustable: bool = True, +) -> list[dict]: """ List all cameras using the platform API. Args: api_endpoint (str): The base URL for the API endpoint. access_token (str): The access token for API authentication. + include_non_trustable (bool): If True, include cameras flagged as + non-trustable (default). The platform's `/cameras/` endpoint + otherwise hides them, which would leave detections from those + cameras without metadata when sequences are imported. Returns: list[dict]: A list of dictionaries containing camera information. """ - url = f"{api_endpoint}/api/v1/cameras/" + flag = "true" if include_non_trustable else "false" + url = f"{api_endpoint}/api/v1/cameras/?include_non_trustable={flag}" return api_get(route=url, access_token=access_token) diff --git a/annotation_api/scripts/data_transfer/ingestion/platform/import.py b/annotation_api/scripts/data_transfer/ingestion/platform/import.py index ffc5b88..26397d0 100644 --- a/annotation_api/scripts/data_transfer/ingestion/platform/import.py +++ b/annotation_api/scripts/data_transfer/ingestion/platform/import.py @@ -551,7 +551,7 @@ def fetch_records_from_annotation_api( "sequence_started_at": seq.get("recorded_at"), "sequence_last_seen_at": seq.get("last_seen_at") or seq.get("recorded_at"), - "sequence_azimuth": seq.get("azimuth"), + "camera_azimuth": seq.get("azimuth"), "detection_id": det.get("alert_api_id") or det["id"], "detection_created_at": det.get("created_at") or det.get("recorded_at"), diff --git a/annotation_api/scripts/data_transfer/ingestion/platform/sequence_fetching.py b/annotation_api/scripts/data_transfer/ingestion/platform/sequence_fetching.py index 1fdec21..c551866 100644 --- a/annotation_api/scripts/data_transfer/ingestion/platform/sequence_fetching.py +++ b/annotation_api/scripts/data_transfer/ingestion/platform/sequence_fetching.py @@ -156,37 +156,58 @@ def process_single_sequence_detections( ... detections_order_by="asc" ... ) """ - try: - camera_id = sequence.get("camera_id") - camera = indexed_cameras.get(camera_id, {}) - org_id = camera.get("organization_id") - organization = indexed_organizations.get(org_id, {}) + camera_id = sequence.get("camera_id") + camera = indexed_cameras.get(camera_id, {}) + org_id = camera.get("organization_id") + organization = indexed_organizations.get(org_id, {}) + + # The platform stores one Detection row per bbox even when several boxes + # share the same image (each row carries `bbox` + the siblings in + # `others_bboxes`). We dedupe by `bucket_key` below to import one record + # per image, and `to_record` then re-assembles all boxes for that image + # from the retained row's bbox + others_bboxes. + # + # The platform's /sequences/{id}/detections endpoint doesn't support + # offset pagination and caps `limit` at 100. When `detections_limit > 0` + # we fetch a small buffer above the requested count so the unique-image + # count stays close to what the caller asked for even when a few images + # carry multiple bboxes; `<= 0` means "no limit, fetch all the API will + # return" (matches the `--max-sequences 0` convention used elsewhere). + if detections_limit and detections_limit > 0: + fetch_limit = min(detections_limit + 10, 100) + unique_cap: Optional[int] = detections_limit + else: + fetch_limit = 100 + unique_cap = None + + detections = platform_client.list_sequence_detections( + api_endpoint=api_endpoint, + sequence_id=sequence["id"], + access_token=access_token, + limit=fetch_limit, + desc=(detections_order_by == "desc"), + ) - # Fetch detections for this sequence - detections = platform_client.list_sequence_detections( - api_endpoint=api_endpoint, - sequence_id=sequence["id"], - access_token=access_token, - limit=detections_limit, - desc=(detections_order_by == "desc"), + unique_detections: list[dict] = [] + seen_bucket_keys: set[str] = set() + for detection in detections: + if unique_cap is not None and len(unique_detections) >= unique_cap: + break + bucket_key = detection.get("bucket_key") + if bucket_key is None or bucket_key in seen_bucket_keys: + continue + seen_bucket_keys.add(bucket_key) + unique_detections.append(detection) + + return [ + platform_utils.to_record( + detection=detection, + camera=camera, + organization=organization, + sequence=sequence, ) - - # Build flattened records (one per detection) using the proven platform_utils.to_record function - records = [] - for detection in detections: - record = platform_utils.to_record( - detection=detection, - camera=camera, - organization=organization, - sequence=sequence, - ) - records.append(record) - - return records - - except Exception as e: - logging.error(f"Error processing sequence {sequence.get('id', 'unknown')}: {e}") - return [] + for detection in unique_detections + ] def fetch_all_sequences_within( diff --git a/annotation_api/scripts/data_transfer/ingestion/platform/shared.py b/annotation_api/scripts/data_transfer/ingestion/platform/shared.py index 72aa54b..e402410 100644 --- a/annotation_api/scripts/data_transfer/ingestion/platform/shared.py +++ b/annotation_api/scripts/data_transfer/ingestion/platform/shared.py @@ -163,6 +163,12 @@ def transform_sequence_data(record: dict, source_api: str = "pyronear_french") - Returns: Dictionary formatted for annotation API sequence creation """ + raw_azimuth = record.get("camera_azimuth") + # Platform stores azimuth as a float in [0, 360); rounding can land on 360 + # (e.g. 359.6 → 360), which is out of range. Wrap with modulo to keep + # the canonical 0–359 convention. + azimuth = int(round(float(raw_azimuth))) % 360 if raw_azimuth is not None else None + return { "source_api": source_api, "alert_api_id": record["sequence_id"], # Platform sequence ID @@ -175,7 +181,7 @@ def transform_sequence_data(record: dict, source_api: str = "pyronear_french") - ], # Platform enum: 'wildfire_smoke', 'other_smoke', 'other' "lat": record["camera_lat"], "lon": record["camera_lon"], - "azimuth": record["sequence_azimuth"], + "azimuth": azimuth, "recorded_at": record["sequence_started_at"], "last_seen_at": record["sequence_last_seen_at"], } diff --git a/annotation_api/scripts/data_transfer/ingestion/platform/utils.py b/annotation_api/scripts/data_transfer/ingestion/platform/utils.py index 661f8c6..38b2675 100644 --- a/annotation_api/scripts/data_transfer/ingestion/platform/utils.py +++ b/annotation_api/scripts/data_transfer/ingestion/platform/utils.py @@ -1,6 +1,8 @@ """ """ +import ast from pathlib import Path +from typing import Optional import yaml @@ -37,6 +39,31 @@ def index_by(xs: list[dict], key: str) -> dict[str, dict]: return {x[key]: x for x in xs} +def _parse_bbox_string(s: Optional[str]) -> list: + """Parse a platform bbox string into a list of [x1,y1,x2,y2,conf] lists. + + Accepts the canonical wrapped form `"[(x1,y1,x2,y2,conf), ...]"` (what the + platform validates against), and is defensive about edge variants such as + a flat singular tuple `"(x1,y1,x2,y2,conf)"` that might leak through. + """ + if not s: + return [] + try: + parsed = ast.literal_eval(s) + except (ValueError, SyntaxError): + return [] + if not isinstance(parsed, (list, tuple)): + return [] + # Flat 5-element box: wrap as a single-box list. + if len(parsed) == 5 and all(isinstance(x, (int, float)) for x in parsed): + return [list(parsed)] + out = [] + for box in parsed: + if isinstance(box, (list, tuple)) and len(box) >= 4: + out.append(list(box)) + return out + + def to_record( detection: dict, camera: dict, @@ -56,28 +83,36 @@ def to_record( dict: A structured record containing relevant metadata for the detection. """ + bboxes = _parse_bbox_string(detection.get("bbox")) + _parse_bbox_string( + detection.get("others_bboxes") + ) + return { # Organization metadata - "organization_id": camera["organization_id"], - "organization_name": organization["name"], + "organization_id": camera.get("organization_id"), + "organization_name": organization.get("name"), # Camera metadata "camera_id": sequence["camera_id"], - "camera_name": camera["name"], - "camera_lat": camera["lat"], - "camera_lon": camera["lon"], - "camera_is_trustable": camera["is_trustable"], - "camera_angle_of_view": camera["angle_of_view"], + "camera_name": camera.get("name"), + "camera_lat": camera.get("lat"), + "camera_lon": camera.get("lon"), + "camera_is_trustable": camera.get("is_trustable"), + "camera_angle_of_view": camera.get("angle_of_view"), # Sequence metadata "sequence_id": sequence["id"], - "sequence_is_wildfire": sequence["is_wildfire"], + "sequence_is_wildfire": sequence.get("is_wildfire"), "sequence_started_at": sequence["started_at"], "sequence_last_seen_at": sequence["last_seen_at"], - "sequence_azimuth": sequence["azimuth"], + # Camera/pose pointing direction. The platform also exposes + # `sequence_azimuth` (the inferred smoke cone direction); we + # deliberately ignore that one — annotators care about where the + # camera was looking, not where the smoke is. + "camera_azimuth": sequence.get("camera_azimuth"), # Detection metadata "detection_id": detection["id"], "detection_created_at": detection["created_at"], - "detection_azimuth": detection["azimuth"], - "detection_url": detection["url"], - "detection_bboxes": detection["bboxes"], + "detection_azimuth": None, + "detection_url": detection.get("url"), + "detection_bboxes": bboxes, "detection_bucket_key": detection["bucket_key"], }