diff --git a/isic/core/tests/test_metadata_download.py b/isic/core/tests/test_metadata_download.py index baf6db92..0dfef394 100644 --- a/isic/core/tests/test_metadata_download.py +++ b/isic/core/tests/test_metadata_download.py @@ -15,6 +15,7 @@ def image_with_metadata(image): { "age": 32, "diagnosis": "Nevus", + "anatom_site": "Scalp", "patient_id": "supersecretpatientid", "lesion_id": "supersecretlesionid", "rcm_case_id": "supersecretrcmcaseid", @@ -34,6 +35,9 @@ def test_image_metadata_csv_rows_correct(image_with_metadata): row = next(rows) assert row == { "age_approx": image_with_metadata.accession.age_approx, + "anatom_site_1": "Head and neck", + "anatom_site_2": "Head", + "anatom_site_3": "Scalp", "attribution": image_with_metadata.accession.attribution, "copyright_license": image_with_metadata.accession.copyright_license, "diagnosis_1": "Benign", @@ -55,6 +59,9 @@ def test_staff_image_metadata_csv_rows_correct(image_with_metadata): assert row == { "age_approx": image_with_metadata.accession.age_approx, "age": image_with_metadata.accession.age, + "anatom_site_1": "Head and neck", + "anatom_site_2": "Head", + "anatom_site_3": "Scalp", "attribution": image_with_metadata.accession.attribution, "cohort_id": image_with_metadata.accession.cohort_id, "cohort": image_with_metadata.accession.cohort.name, diff --git a/isic/core/tests/test_search.py b/isic/core/tests/test_search.py index b6329c25..a1faffe8 100644 --- a/isic/core/tests/test_search.py +++ b/isic/core/tests/test_search.py @@ -30,10 +30,12 @@ def searchable_images(image_factory, _search_index): image_factory( public=True, accession__short_diagnosis="melanoma", + accession__short_anatom_site="scalp", ), image_factory( public=False, accession__short_diagnosis="nevus", + accession__short_anatom_site="forearm", ), ] for image in images: @@ -193,6 +195,14 @@ def test_core_api_image_search(searchable_images, staff_client): assert r.status_code == 200, r.json() assert r.json()["count"] == 1, r.json() + r = staff_client.get("/api/v2/images/search/", {"query": "anatom_site_3:Scalp"}) + assert r.status_code == 200, r.json() + assert r.json()["count"] == 1, r.json() + + r = staff_client.get("/api/v2/images/search/", {"query": 'anatom_site_1:"Upper extremity"'}) + assert r.status_code == 200, r.json() + assert r.json()["count"] == 1, r.json() + @pytest.mark.django_db def test_core_api_image_search_private_image(private_searchable_image, authenticated_client): @@ -346,6 +356,16 @@ def test_core_api_image_faceting_structure(searchable_images, client): "present_count": 1, }, r.json() + assert len(r.json()["anatom_site_3"]["buckets"]) == 1, r.json() + assert r.json()["anatom_site_3"]["meta"] == { + "missing_count": 0, + "present_count": 1, + }, r.json() + assert r.json()["anatom_site_1"]["meta"] == { + "missing_count": 0, + "present_count": 1, + }, r.json() + @pytest.mark.parametrize( "client_", diff --git a/isic/core/tests/test_view_image_list.py b/isic/core/tests/test_view_image_list.py index f19f8c86..2d04211f 100644 --- a/isic/core/tests/test_view_image_list.py +++ b/isic/core/tests/test_view_image_list.py @@ -16,6 +16,7 @@ def test_image_list_metadata_download_view(mocker, staff_client, mailoutbox, use "patient_id": "bar", "rcm_case_id": "baz", "diagnosis": "Melanoma Invasive", + "anatom_site": "Scalp", "image_type": "RCM: macroscopic", }, ignore_image_check=True, @@ -41,6 +42,9 @@ def test_image_list_metadata_download_view(mocker, staff_client, mailoutbox, use "public", "age", "age_approx", + "anatom_site_1", + "anatom_site_2", + "anatom_site_3", "diagnosis_1", "diagnosis_2", "diagnosis_3", @@ -67,6 +71,9 @@ def test_image_list_metadata_download_view(mocker, staff_client, mailoutbox, use image.public, "57", "55", + "Head and neck", + "Head", + "Scalp", "Malignant", "Malignant melanocytic proliferations (Melanoma)", "Melanoma Invasive", diff --git a/isic/ingest/migrations/0036_add_anatom_site_hierarchical_fields.py b/isic/ingest/migrations/0036_add_anatom_site_hierarchical_fields.py new file mode 100644 index 00000000..3b1b0150 --- /dev/null +++ b/isic/ingest/migrations/0036_add_anatom_site_hierarchical_fields.py @@ -0,0 +1,39 @@ +# Generated by Django 5.2.3 on 2026-02-19 16:44 + +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("ingest", "0035_alter_distinctnessmeasure_checksum"), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.AddField( + model_name="accession", + name="anatom_site_1", + field=models.CharField(blank=True, max_length=255, null=True), + ), + migrations.AddField( + model_name="accession", + name="anatom_site_2", + field=models.CharField(blank=True, max_length=255, null=True), + ), + migrations.AddField( + model_name="accession", + name="anatom_site_3", + field=models.CharField(blank=True, max_length=255, null=True), + ), + migrations.AddField( + model_name="accession", + name="anatom_site_4", + field=models.CharField(blank=True, max_length=255, null=True), + ), + migrations.AddField( + model_name="accession", + name="anatom_site_5", + field=models.CharField(blank=True, max_length=255, null=True), + ), + ] diff --git a/isic/ingest/models/accession.py b/isic/ingest/models/accession.py index 52521518..6e2207c6 100644 --- a/isic/ingest/models/accession.py +++ b/isic/ingest/models/accession.py @@ -76,6 +76,11 @@ class AccessionMetadata(models.Model): sex = models.CharField(max_length=6, null=True, blank=True) anatom_site_general = models.CharField(max_length=255, null=True, blank=True) anatom_site_special = models.CharField(max_length=255, null=True, blank=True) + anatom_site_1 = models.CharField(max_length=255, null=True, blank=True) + anatom_site_2 = models.CharField(max_length=255, null=True, blank=True) + anatom_site_3 = models.CharField(max_length=255, null=True, blank=True) + anatom_site_4 = models.CharField(max_length=255, null=True, blank=True) + anatom_site_5 = models.CharField(max_length=255, null=True, blank=True) diagnosis_1 = models.CharField(max_length=255, null=True, blank=True) diagnosis_2 = models.CharField(max_length=255, null=True, blank=True) diagnosis_3 = models.CharField(max_length=255, null=True, blank=True) diff --git a/isic/ingest/tests/factories.py b/isic/ingest/tests/factories.py index 50a532d1..7c367786 100644 --- a/isic/ingest/tests/factories.py +++ b/isic/ingest/tests/factories.py @@ -4,7 +4,7 @@ import factory import factory.django -from isic_metadata.fields import DiagnosisEnum +from isic_metadata.fields import AnatomSiteEnum, DiagnosisEnum from isic.core.models import CopyrightLicense from isic.factories import UserFactory @@ -182,6 +182,24 @@ def short_diagnosis(self, create: bool, extracted: Any, **kwargs: Any) -> None: if create: self.save() + @factory.post_generation + def short_anatom_site(self, create: bool, extracted: Any, **kwargs: Any) -> None: # noqa: FBT001 + if extracted is None: + return + + if extracted == "scalp": + anatom_site = AnatomSiteEnum.head_and_neck_head_scalp + elif extracted == "forearm": + anatom_site = AnatomSiteEnum.upper_extremity_forearm + else: + raise ValueError(f"Unknown short_anatom_site: {extracted}") + + for key, value in AnatomSiteEnum.as_dict(anatom_site).items(): + setattr(self, key, value) + + if create: + self.save() + class AccessionReviewFactory(factory.django.DjangoModelFactory): class Meta: diff --git a/isic/ingest/tests/test_metadata.py b/isic/ingest/tests/test_metadata.py index b8b5c29d..2a146627 100644 --- a/isic/ingest/tests/test_metadata.py +++ b/isic/ingest/tests/test_metadata.py @@ -397,6 +397,28 @@ def test_accession_update_metadata_iddx(user, imageless_accession) -> None: assert imageless_accession.metadata_versions.count() == 1 +@pytest.mark.django_db +def test_accession_update_metadata_anatom_site(user, imageless_accession) -> None: + imageless_accession.update_metadata(user, {"anatom_site": "Scalp"}) + assert imageless_accession.metadata == { + "anatom_site_1": "Head and neck", + "anatom_site_2": "Head", + "anatom_site_3": "Scalp", + } + assert imageless_accession.metadata_versions.count() == 1 + + +@pytest.mark.django_db +def test_accession_remove_metadata_anatom_site(user, imageless_accession) -> None: + imageless_accession.update_metadata(user, {"anatom_site": "Scalp"}) + imageless_accession.remove_metadata(user, ["anatom_site_3"]) + assert imageless_accession.metadata == { + "anatom_site_1": "Head and neck", + "anatom_site_2": "Head", + } + assert imageless_accession.metadata_versions.count() == 2 + + @pytest.mark.django_db def test_accession_update_metadata_idempotent(user, imageless_accession) -> None: imageless_accession.update_metadata(user, {"sex": "male", "foo": "bar", "baz": "qux"}) diff --git a/isic/ingest/utils/metadata.py b/isic/ingest/utils/metadata.py index 6c06fd0c..bcdf38c6 100644 --- a/isic/ingest/utils/metadata.py +++ b/isic/ingest/utils/metadata.py @@ -127,7 +127,15 @@ def validate_internal_consistency( return _validate_df_consistency(rows) -def validate_archive_consistency( # noqa: C901 +def _reassemble_hierarchical_fields(values: dict[str, Any]) -> None: + """Reassemble level fields (e.g. diagnosis_1..5) into colon-separated parent fields.""" + for field_name in ["diagnosis", "anatom_site"]: + level_fields = [f"{field_name}_{i}" for i in range(1, 6) if f"{field_name}_{i}" in values] + if any(values[f] for f in level_fields): + values[field_name] = ":".join(values[f] for f in level_fields if values[f]) + + +def validate_archive_consistency( rows: csv.DictReader, cohort: Cohort ) -> tuple[ColumnRowErrors, list[Problem]]: """ @@ -173,13 +181,7 @@ def accession_values_to_metadata_dict(accession_values: dict[str, Any]) -> dict[ ] del accession_values[f"{field.relation_name}__{field.internal_id_name}"] - diagnosis_fields = [ - f"diagnosis_{i}" for i in range(1, 6) if f"diagnosis_{i}" in accession_values - ] - if any(accession_values[field] for field in diagnosis_fields): - accession_values["diagnosis"] = ":".join( - accession_values[field] for field in diagnosis_fields if accession_values[field] - ) + _reassemble_hierarchical_fields(accession_values) return {k: v for (k, v) in accession_values.items() if v is not None} diff --git a/pyproject.toml b/pyproject.toml index 99c22509..989e0075 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ "gdal", "google-analytics-data", "elasticsearch", - "isic-metadata", + "isic-metadata>=4.12.0", "jaro-winkler", "numpy", "pandas", diff --git a/uv.lock b/uv.lock index 656f9efa..091da9d1 100644 --- a/uv.lock +++ b/uv.lock @@ -1318,7 +1318,7 @@ requires-dist = [ { name = "google-analytics-data" }, { name = "gunicorn" }, { name = "ipython", marker = "extra == 'development'" }, - { name = "isic-metadata" }, + { name = "isic-metadata", specifier = ">=4.12.0" }, { name = "jaro-winkler" }, { name = "numpy" }, { name = "orjson" }, @@ -1384,14 +1384,14 @@ type = [ [[package]] name = "isic-metadata" -version = "4.11.0" +version = "4.12.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/40/38/cb100876a50eb4292222a4206fc3aab435d1ff4a0553405fd1c0cd1d0f55/isic_metadata-4.11.0.tar.gz", hash = "sha256:457e3f3ac43fddade45f6c03bbcd536903162b1112dcaf639db8bbec19ca61ec", size = 28750, upload-time = "2025-11-05T19:31:22.085Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5e/52/aa3b6dd8e313b734b788838c5ebdd7242b59f75b1896820776f9a69523de/isic_metadata-4.12.0.tar.gz", hash = "sha256:ef1e5ac481e82bdb3a16a8f6873e378da998c0d5b01e587089c5503a71b13706", size = 33158, upload-time = "2026-02-23T06:57:31.703Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/82/f5/e7a92af039dcc27b650cf73bbb24a9ba125c10afbe87b8ecf477e5d0ddf4/isic_metadata-4.11.0-py3-none-any.whl", hash = "sha256:45eaaf9d2eb2a453428d07c10a0e84578fead06beb7e6e43e6d3114fd39e8950", size = 27634, upload-time = "2025-11-05T19:31:20.877Z" }, + { url = "https://files.pythonhosted.org/packages/32/c5/0b8e41cfde6dd9b73d931219c3d0acc6e62d68637ca361c38496e156f577/isic_metadata-4.12.0-py3-none-any.whl", hash = "sha256:4c1a4cff4eb3bff838ee75323da590e14431a04cb3f885763e2e068043fd0658", size = 32866, upload-time = "2026-02-23T06:57:30.848Z" }, ] [[package]]