From 13aa22d51cc669dde704670f857504e3482f8f17 Mon Sep 17 00:00:00 2001 From: Dan LaManna Date: Mon, 23 Feb 2026 14:48:06 -0500 Subject: [PATCH] Store default_attribution on publish requests This was due to an inexplicable bug where an attribution wasn't copied to one of the images. This solves the potential race condition where the default attribution is changed while publishing. --- isic/core/health.py | 20 +++++++++++++++++++ ...0037_publishrequest_default_attribution.py | 17 ++++++++++++++++ isic/ingest/models/publish_request.py | 1 + isic/ingest/services/publish/__init__.py | 9 ++++++++- isic/ingest/tasks.py | 2 ++ isic/ingest/tests/test_accession.py | 2 ++ isic/ingest/tests/test_publish.py | 8 +++++++- 7 files changed, 57 insertions(+), 2 deletions(-) create mode 100644 isic/ingest/migrations/0037_publishrequest_default_attribution.py diff --git a/isic/core/health.py b/isic/core/health.py index 3c599c83..0a57e522 100644 --- a/isic/core/health.py +++ b/isic/core/health.py @@ -196,6 +196,25 @@ def check_iptc_metadata_consistency() -> HealthCheckResult: ) +def check_published_images_have_attribution() -> HealthCheckResult: + published_without_attribution = Image.objects.filter( + accession__attribution="", + ).count() + + passed = published_without_attribution == 0 + message = ( + "All published images have attribution" + if passed + else f"{published_without_attribution} published images missing attribution" + ) + + return HealthCheckResult( + name="published_images_have_attribution", + passed=passed, + message=message, + ) + + def check_embeddings_only_for_public_images() -> HealthCheckResult: embeddings_for_private_images = ImageEmbedding.objects.filter(image__public=False).count() @@ -224,6 +243,7 @@ def check_embeddings_only_for_public_images() -> HealthCheckResult: ("every_user_has_profile", check_every_user_has_profile), ("collection_image_consistency", check_collection_image_consistency), ("iptc_metadata_consistency", check_iptc_metadata_consistency), + ("published_images_have_attribution", check_published_images_have_attribution), ("embeddings_only_for_public_images", check_embeddings_only_for_public_images), ] diff --git a/isic/ingest/migrations/0037_publishrequest_default_attribution.py b/isic/ingest/migrations/0037_publishrequest_default_attribution.py new file mode 100644 index 00000000..28be9bf7 --- /dev/null +++ b/isic/ingest/migrations/0037_publishrequest_default_attribution.py @@ -0,0 +1,17 @@ +# Generated by Django 5.2.3 on 2026-02-23 17:38 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("ingest", "0036_add_anatom_site_hierarchical_fields"), + ] + + operations = [ + migrations.AddField( + model_name="publishrequest", + name="default_attribution", + field=models.CharField(default="", max_length=200), + ), + ] diff --git a/isic/ingest/models/publish_request.py b/isic/ingest/models/publish_request.py index ac611524..387d4dac 100644 --- a/isic/ingest/models/publish_request.py +++ b/isic/ingest/models/publish_request.py @@ -25,6 +25,7 @@ class PublishRequest(models.Model): # the additional collections to which the images will be added, including the magic collection collections = models.ManyToManyField("core.Collection") public = models.BooleanField(default=False) + default_attribution = models.CharField(max_length=200, default="") def __str__(self) -> str: return f"PublishRequest {self.pk}" diff --git a/isic/ingest/services/publish/__init__.py b/isic/ingest/services/publish/__init__.py index 0865b08a..1d15b19c 100644 --- a/isic/ingest/services/publish/__init__.py +++ b/isic/ingest/services/publish/__init__.py @@ -59,6 +59,7 @@ def cohort_publish_initialize( publish_request = PublishRequest.objects.create( creator=publisher, public=public, + default_attribution=cohort.default_attribution, ) publish_request.accessions.set(cohort.accessions.publishable()) publish_request.collections.set(collections) @@ -83,6 +84,7 @@ def cohort_publish(*, publish_request: PublishRequest) -> None: public=publish_request.public, publisher_pk=publish_request.creator.pk, additional_collection_ids=additional_collection_ids, + default_attribution=publish_request.default_attribution, ) @@ -92,13 +94,18 @@ def accession_publish( public: bool, publisher: User, additional_collection_ids: Iterable[int] | None = None, + default_attribution: str = "", ): additional_collection_ids = additional_collection_ids or [] # wrapping this inside of a transaction ensures that this function can be retried easily with lock_table_for_writes(IsicId): if accession.attribution == "": - accession.attribution = accession.cohort.default_attribution + if not default_attribution: + raise ValueError( + "default_attribution must be provided when accession has no attribution" + ) + accession.attribution = default_attribution accession.save(update_fields=["attribution"]) image_create( diff --git a/isic/ingest/tasks.py b/isic/ingest/tasks.py index c5083289..a96139f2 100644 --- a/isic/ingest/tasks.py +++ b/isic/ingest/tasks.py @@ -186,6 +186,7 @@ def publish_accession_task( public: bool, publisher_pk: int, additional_collection_ids: list[int] | None = None, + default_attribution: str = "", ): accession = Accession.objects.select_related("cohort").get(pk=accession_pk) publisher = User.objects.get(pk=publisher_pk) @@ -194,4 +195,5 @@ def publish_accession_task( public=public, publisher=publisher, additional_collection_ids=additional_collection_ids, + default_attribution=default_attribution, ) diff --git a/isic/ingest/tests/test_accession.py b/isic/ingest/tests/test_accession.py index 87a4fd90..73ed68c8 100644 --- a/isic/ingest/tests/test_accession.py +++ b/isic/ingest/tests/test_accession.py @@ -63,6 +63,8 @@ def test_accession_reprocess(user, cohort_factory, django_capture_on_commit_call # refresh the accession to get the blob that was generated by the celery task accession.refresh_from_db() + accession.attribution = "test attribution" + accession.save(update_fields=["attribution"]) with django_capture_on_commit_callbacks(execute=True): accession_publish(accession=accession, public=True, publisher=user) diff --git a/isic/ingest/tests/test_publish.py b/isic/ingest/tests/test_publish.py index 1ab2583a..9c1caf46 100644 --- a/isic/ingest/tests/test_publish.py +++ b/isic/ingest/tests/test_publish.py @@ -86,12 +86,14 @@ def test_publish_copies_default_attribution( publishable_cohort_for_attributions.save(update_fields=["default_attribution"]) with django_capture_on_commit_callbacks(execute=True): - cohort_publish_initialize( + publish_request = cohort_publish_initialize( cohort=publishable_cohort_for_attributions, publisher=user, public=True, ) + assert publish_request.default_attribution == "default attribution" + published_images = Image.objects.filter(accession__cohort=publishable_cohort_for_attributions) assert published_images.count() == 2 @@ -161,6 +163,8 @@ def test_unembargo_images(user, cohort_factory, django_capture_on_commit_callbac original_blob_size=blob_path.stat().st_size, ) accession.refresh_from_db() + accession.attribution = "test attribution" + accession.save(update_fields=["attribution"]) with django_capture_on_commit_callbacks(execute=True): accession_publish(accession=accession, public=True, publisher=user) @@ -198,6 +202,8 @@ def test_iptc_metadata_embedding( original_blob_size=blob_path.stat().st_size, ) accession.refresh_from_db() + accession.attribution = "test attribution" + accession.save(update_fields=["attribution"]) with django_capture_on_commit_callbacks(execute=True): accession_publish(accession=accession, public=False, publisher=user)