diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e47dedbc96..7a66647a62 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -35,8 +35,6 @@ maven-package-all-apps: - agents/uploadworkers/target/agents-uploadworkers-*.jar - pipelines/reindex/target/pipelines-reindex-*.jar - pipelines/curation/target/pipelines-curation-*.jar - - pipelines/curami/target/pipelines-curami-*.jar - - pipelines/copydown/target/pipelines-copydown-*.jar build_and_push_docker_images: stage: package @@ -57,10 +55,6 @@ build_and_push_docker_images: DOCKERFILE_TARGET: "pipelines-reindex" - APP_NAME: "pipelines-curation" DOCKERFILE_TARGET: "pipelines-curation" - - APP_NAME: "pipelines-curami" - DOCKERFILE_TARGET: "pipelines-curami" - - APP_NAME: "pipelines-copydown" - DOCKERFILE_TARGET: "pipelines-copydown" script: - | DOCKER_IMAGE_NAME="$CI_REGISTRY_IMAGE/$APP_NAME:$DOCKER_TAG" @@ -87,6 +81,7 @@ clone-config: - config only: - chore/working-es-search-k8-deployment-with-cleanup + - chore/BSD-2547-sample-status-management - dev - main - biosamples-search @@ -100,6 +95,7 @@ deploy_k8s_primary_dev: url: https://wwwdev.ebi.ac.uk/biosamples only: - chore/working-es-search-k8-deployment-with-cleanup + - chore/BSD-2547-sample-status-management - dev - main - biosamples-search @@ -115,6 +111,7 @@ deploy_k8s_primary_prod: url: https://www.ebi.ac.uk/biosamples only: - chore/working-es-search-k8-deployment-with-cleanup + - chore/BSD-2547-sample-status-management - dev - main - biosamples-search @@ -130,6 +127,7 @@ deploy_k8s_fallback_prod: url: https://www.ebi.ac.uk/biosamples only: - chore/working-es-search-k8-deployment-with-cleanup + - chore/BSD-2547-sample-status-management - dev - main - biosamples-search @@ -145,6 +143,7 @@ deploy_pipeline_k8s_primary_prod: url: https://www.ebi.ac.uk/biosamples only: - chore/working-es-search-k8-deployment-with-cleanup + - chore/BSD-2547-sample-status-management - dev - main - biosamples-search @@ -160,6 +159,7 @@ deploy_pipeline_k8s_fallback_prod: url: https://www.ebi.ac.uk/biosamples only: - chore/working-es-search-k8-deployment-with-cleanup + - chore/BSD-2547-sample-status-management - dev - main - biosamples-search @@ -175,6 +175,7 @@ deploy_pipeline_k8s_primary_dev: url: https://wwwdev.ebi.ac.uk/biosamples only: - chore/working-es-search-k8-deployment-with-cleanup + - chore/BSD-2547-sample-status-management - dev - main - biosamples-search diff --git a/client/client/src/main/java/uk/ac/ebi/biosamples/client/BioSamplesClient.java b/client/client/src/main/java/uk/ac/ebi/biosamples/client/BioSamplesClient.java index 881b5cef25..1158dfb995 100644 --- a/client/client/src/main/java/uk/ac/ebi/biosamples/client/BioSamplesClient.java +++ b/client/client/src/main/java/uk/ac/ebi/biosamples/client/BioSamplesClient.java @@ -20,7 +20,6 @@ import org.springframework.boot.web.client.RestTemplateBuilder; import org.springframework.hateoas.EntityModel; import org.springframework.hateoas.MediaTypes; -import org.springframework.hateoas.PagedModel; import org.springframework.hateoas.client.Traverson; import org.springframework.http.HttpHeaders; import org.springframework.http.HttpRequest; @@ -46,7 +45,6 @@ public class BioSamplesClient implements AutoCloseable { private final Logger log = LoggerFactory.getLogger(getClass()); private final SampleRetrievalService sampleRetrievalService; private final SampleRetrievalServiceV2 sampleRetrievalServiceV2; - private final SamplePageRetrievalService samplePageRetrievalService; private final SampleCursorRetrievalService sampleCursorRetrievalService; private final SampleSubmissionService sampleSubmissionService; private final SampleSubmissionServiceV2 sampleSubmissionServiceV2; @@ -98,7 +96,6 @@ public BioSamplesClient( traverson.setRestOperations(restOperations); sampleRetrievalService = new SampleRetrievalService(restOperations, traverson); - samplePageRetrievalService = new SamplePageRetrievalService(restOperations, traverson); sampleCursorRetrievalService = new SampleCursorRetrievalService(restOperations, traverson); sampleSubmissionService = new SampleSubmissionService(restOperations, traverson); sampleSubmissionServiceV2 = new SampleSubmissionServiceV2(restOperations, uriV2); @@ -335,36 +332,6 @@ public Iterable>> fetchSampleResourceAll( return sampleRetrievalService.fetchAll(accessions); } - /** - * Searches for samples using pagination. This method should be used for specific pagination - * needs. When in need for all results from a search, prefer the iterator implementation. - * - * @param text the text query - * @param page the page number - * @param size the page size - * @return a paged model of sample resources - */ - public PagedModel> fetchPagedSampleResource( - final String text, final int page, final int size) { - return samplePageRetrievalService.search(text, Collections.emptyList(), page, size); - } - - /** - * Searches for samples using pagination with specified filters. This method should be used for - * specific pagination needs. When in need for all results from a search, prefer the iterator - * implementation. - * - * @param text the text query - * @param filters the collection of filters - * @param page the page number - * @param size the page size - * @return a paged model of sample resources - */ - public PagedModel> fetchPagedSampleResource( - final String text, final Collection filters, final int page, final int size) { - return samplePageRetrievalService.search(text, filters, page, size); - } - /** * Deprecated method: Persists a sample using BioSamples. * @@ -510,15 +477,6 @@ public Iterable> fetchSampleResourceAll( return sampleCursorRetrievalService.fetchAll(text, filters, jwt); } - public PagedModel> fetchPagedSampleResource( - final String text, - final Collection filters, - final int page, - final int size, - final String jwt) { - return samplePageRetrievalService.search(text, filters, page, size, jwt); - } - public EntityModel persistSampleResource(final Sample sample, final String jwt) { final Collection errors = sampleValidator.validate(sample); diff --git a/client/client/src/main/java/uk/ac/ebi/biosamples/client/service/SampleCursorRetrievalService.java b/client/client/src/main/java/uk/ac/ebi/biosamples/client/service/SampleCursorRetrievalService.java index ef524e7670..d14050f9d2 100644 --- a/client/client/src/main/java/uk/ac/ebi/biosamples/client/service/SampleCursorRetrievalService.java +++ b/client/client/src/main/java/uk/ac/ebi/biosamples/client/service/SampleCursorRetrievalService.java @@ -70,7 +70,6 @@ public Iterable> fetchAll( } params.add("applyCurations", String.valueOf(addCurations)); - params = encodePlusInQueryParameters(params); return new IterableResourceFetchAll<>( diff --git a/client/client/src/main/java/uk/ac/ebi/biosamples/client/service/SamplePageRetrievalService.java b/client/client/src/main/java/uk/ac/ebi/biosamples/client/service/SamplePageRetrievalService.java deleted file mode 100644 index 5d9297eb79..0000000000 --- a/client/client/src/main/java/uk/ac/ebi/biosamples/client/service/SamplePageRetrievalService.java +++ /dev/null @@ -1,121 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.client.service; - -import java.net.URI; -import java.util.Collection; -import java.util.List; -import java.util.Map; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.core.ParameterizedTypeReference; -import org.springframework.hateoas.EntityModel; -import org.springframework.hateoas.MediaTypes; -import org.springframework.hateoas.PagedModel; -import org.springframework.hateoas.client.Traverson; -import org.springframework.http.HttpHeaders; -import org.springframework.http.HttpMethod; -import org.springframework.http.RequestEntity; -import org.springframework.http.ResponseEntity; -import org.springframework.util.LinkedMultiValueMap; -import org.springframework.util.MultiValueMap; -import org.springframework.web.client.RestOperations; -import org.springframework.web.util.UriComponentsBuilder; -import uk.ac.ebi.biosamples.core.model.Sample; -import uk.ac.ebi.biosamples.core.model.filter.Filter; - -public class SamplePageRetrievalService { - private final Logger log = LoggerFactory.getLogger(getClass()); - private final Traverson traverson; - private final RestOperations restOperations; - - public SamplePageRetrievalService( - final RestOperations restOperations, final Traverson traverson) { - this.restOperations = restOperations; - this.traverson = traverson; - } - - public PagedModel> search( - final String text, final Collection filters, final int page, final int size) { - return search(text, filters, page, size, null); - } - - public PagedModel> search( - final String text, - final Collection filters, - final int page, - final int size, - final String jwt) { - MultiValueMap params = new LinkedMultiValueMap<>(); - // TODO use shared constants here - params.add("page", Integer.toString(page)); - params.add("size", Integer.toString(size)); - params.add("text", !text.isEmpty() ? text : "*:*"); - - for (final Filter filter : filters) { - params.add("filter", filter.getSerialization()); - } - - params = encodePlusInQueryParameters(params); - - final URI uri = - UriComponentsBuilder.fromUriString(traverson.follow("samples").asLink().getHref()) - .queryParams(params) - .build() - .toUri(); - - log.trace("GETing " + uri); - - final MultiValueMap headers = new LinkedMultiValueMap<>(); - - headers.add(HttpHeaders.CONTENT_TYPE, MediaTypes.HAL_JSON.toString()); - - if (jwt != null) { - headers.set(HttpHeaders.AUTHORIZATION, "Bearer " + jwt); - } - - final RequestEntity requestEntity = new RequestEntity<>(headers, HttpMethod.GET, uri); - final ResponseEntity>> responseEntity = - restOperations.exchange( - requestEntity, new ParameterizedTypeReference>>() {}); - - if (!responseEntity.getStatusCode().is2xxSuccessful()) { - throw new RuntimeException("Problem GETing samples"); - } - - log.trace("GETted " + uri); - - return responseEntity.getBody(); - } - - // TODO to keep the + in a (not encoded) query parameter is to force encoding - private MultiValueMap encodePlusInQueryParameters( - final MultiValueMap queryParameters) { - final MultiValueMap encodedQueryParameters = new LinkedMultiValueMap<>(); - - for (final Map.Entry> param : queryParameters.entrySet()) { - final String key = param.getKey(); - - param - .getValue() - .forEach( - v -> { - if (v != null) { - encodedQueryParameters.add(key, v.replaceAll("\\+", "%2B")); - } else { - encodedQueryParameters.add(key, ""); - } - }); - } - - return encodedQueryParameters; - } -} diff --git a/client/client/src/main/java/uk/ac/ebi/biosamples/client/utils/IterableResourceFetchAll.java b/client/client/src/main/java/uk/ac/ebi/biosamples/client/utils/IterableResourceFetchAll.java index ec99ea5b23..d66a9399b6 100644 --- a/client/client/src/main/java/uk/ac/ebi/biosamples/client/utils/IterableResourceFetchAll.java +++ b/client/client/src/main/java/uk/ac/ebi/biosamples/client/utils/IterableResourceFetchAll.java @@ -38,6 +38,28 @@ import org.springframework.web.util.UriComponentsBuilder; import org.springframework.web.util.UriTemplate; +/** + * Provides an {@link Iterable} view over a paginated HAL resource and transparently fetches + * additional pages as iteration progresses. + * + *

This utility is intended for clients that need to consume a collection of {@link EntityModel} + * instances without manually handling pagination. It starts from the initial page discovered + * through {@link Traverson}, then iterates through all available pages by requesting later page + * links on demand. The implementation is designed to hide the paging mechanics from callers so that + * the result can be processed with a standard Java-enhanced for-loop or any other {@link + * Iterable}-based API. + * + *

When iterating, the current page is consumed first. If another page is available, the next + * page request is prepared and executed asynchronously using the supplied {@link ExecutorService}. + * This allows the next page to be fetched while the current page is still being processed, which + * can reduce waiting time for large result sets. Authentication information may be propagated via + * the optional JWT value when performing page requests. + * + *

The class also supports applying query parameters to the initial traversal, making it useful + * for fetching filtered result sets from REST endpoints that expose paginated representations. + * + * @param the resource type contained in each {@link EntityModel} + */ @Slf4j public class IterableResourceFetchAll implements Iterable> { private final Traverson traverson; @@ -117,6 +139,9 @@ public Iterator> iterator() { // Build the URI with encoding disabled to preserve existing encoding final URI finalUri = uriBuilder.build(false).toUri(); + + log.info("First Uri {}", finalUri); + final RequestEntity requestEntity = IteratorResourceFetchAll.NextPageCallable.buildRequestEntity(jwt, finalUri); final ResponseEntity>> responseEntity = @@ -187,7 +212,7 @@ public synchronized boolean hasNext() { .build(true) .toUri(); - log.trace("Getting next page uri " + uri); + log.info("Next page uri " + uri); nextPageFuture = executor.submit( diff --git a/core/src/main/java/uk/ac/ebi/biosamples/core/model/Sample.java b/core/src/main/java/uk/ac/ebi/biosamples/core/model/Sample.java index b87c23c80e..cf2c1c3138 100644 --- a/core/src/main/java/uk/ac/ebi/biosamples/core/model/Sample.java +++ b/core/src/main/java/uk/ac/ebi/biosamples/core/model/Sample.java @@ -544,7 +544,7 @@ public static Sample build( sample.reviewed = reviewed; - // Validation moved to a later stage, to capture the error (SampleService.store()) + // Validation moved to a later stage to capture the error (SampleService.store()) sample.release = release; if (status != null) { diff --git a/core/src/main/java/uk/ac/ebi/biosamples/core/model/SampleStatus.java b/core/src/main/java/uk/ac/ebi/biosamples/core/model/SampleStatus.java index 8461eb29bf..9235b2d049 100644 --- a/core/src/main/java/uk/ac/ebi/biosamples/core/model/SampleStatus.java +++ b/core/src/main/java/uk/ac/ebi/biosamples/core/model/SampleStatus.java @@ -19,7 +19,9 @@ public enum SampleStatus { PUBLIC, CANCELLED, SUPPRESSED, - KILLED; + KILLED, + TEMPORARY_SUPPRESSED, + TEMPORARY_KILLED; public static List getSearchHiddenStatuses() { return Arrays.asList(SUPPRESSED.name(), KILLED.name()); diff --git a/core/src/main/java/uk/ac/ebi/biosamples/core/validation/SamplePersistencePolicy.java b/core/src/main/java/uk/ac/ebi/biosamples/core/validation/SamplePersistencePolicy.java new file mode 100644 index 0000000000..ef9730d9c5 --- /dev/null +++ b/core/src/main/java/uk/ac/ebi/biosamples/core/validation/SamplePersistencePolicy.java @@ -0,0 +1,51 @@ +/* +* Copyright 2021 EMBL - European Bioinformatics Institute +* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this +* file except in compliance with the License. You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 +* Unless required by applicable law or agreed to in writing, software distributed under the +* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +*/ +package uk.ac.ebi.biosamples.core.validation; + +import uk.ac.ebi.biosamples.core.model.Sample; +import uk.ac.ebi.biosamples.core.model.SubmittedViaType; + +public final class SamplePersistencePolicy { + private SamplePersistencePolicy() {} + + public static boolean isStoredSampleEmpty( + final Sample newSample, final boolean isWebinSuperUser, final Sample oldSample) { + if (isWebinSuperUser) { + if (newSample.getSubmittedVia() == SubmittedViaType.FILE_UPLOADER) { + if (newSample.hasAccession()) { + return isStoredSampleEmpty(oldSample); + } + + return true; + } + + return false; + } + + if (newSample.hasAccession()) { + return isStoredSampleEmpty(oldSample); + } + + return true; + } + + public static boolean isStoredSampleEmpty(final Sample oldSample) { + return (oldSample.getTaxId() == null || oldSample.getTaxId() <= 0) + && oldSample.getAttributes().isEmpty() + && oldSample.getRelationships().isEmpty() + && oldSample.getPublications().isEmpty() + && oldSample.getContacts().isEmpty() + && oldSample.getOrganizations().isEmpty() + && oldSample.getData().isEmpty() + && oldSample.getExternalReferences().isEmpty() + && oldSample.getStructuredData().isEmpty(); + } +} diff --git a/core/src/main/java/uk/ac/ebi/biosamples/core/validation/SampleStatusTransitionPolicy.java b/core/src/main/java/uk/ac/ebi/biosamples/core/validation/SampleStatusTransitionPolicy.java new file mode 100644 index 0000000000..f658d30025 --- /dev/null +++ b/core/src/main/java/uk/ac/ebi/biosamples/core/validation/SampleStatusTransitionPolicy.java @@ -0,0 +1,34 @@ +/* +* Copyright 2021 EMBL - European Bioinformatics Institute +* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this +* file except in compliance with the License. You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 +* Unless required by applicable law or agreed to in writing, software distributed under the +* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +*/ +package uk.ac.ebi.biosamples.core.validation; + +import java.util.EnumSet; +import java.util.Set; +import uk.ac.ebi.biosamples.core.model.SampleStatus; +import uk.ac.ebi.biosamples.exception.GlobalExceptions; + +public final class SampleStatusTransitionPolicy { + private static final Set ALLOWED_PUBLIC_SOURCE_STATUSES = + EnumSet.of(SampleStatus.PRIVATE); + + private SampleStatusTransitionPolicy() {} + + public static void validatePublicTransition( + final SampleStatus oldStatus, final SampleStatus newStatus) { + if (newStatus != SampleStatus.PUBLIC || oldStatus == null || oldStatus == SampleStatus.PUBLIC) { + return; + } + + if (!ALLOWED_PUBLIC_SOURCE_STATUSES.contains(oldStatus)) { + throw new GlobalExceptions.InvalidSampleException(); + } + } +} diff --git a/core/src/test/java/uk/ac/ebi/biosamples/core/validation/SamplePersistencePolicyTest.java b/core/src/test/java/uk/ac/ebi/biosamples/core/validation/SamplePersistencePolicyTest.java new file mode 100644 index 0000000000..5d6c14bb8c --- /dev/null +++ b/core/src/test/java/uk/ac/ebi/biosamples/core/validation/SamplePersistencePolicyTest.java @@ -0,0 +1,47 @@ +/* +* Copyright 2021 EMBL - European Bioinformatics Institute +* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this +* file except in compliance with the License. You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 +* Unless required by applicable law or agreed to in writing, software distributed under the +* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +*/ +package uk.ac.ebi.biosamples.core.validation; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.Test; +import uk.ac.ebi.biosamples.core.model.Sample; +import uk.ac.ebi.biosamples.core.model.SubmittedViaType; + +public class SamplePersistencePolicyTest { + + @Test + public void returns_true_for_empty_stored_sample() { + final Sample storedSample = new Sample.Builder("sample-empty").build(); + + assertThat(SamplePersistencePolicy.isStoredSampleEmpty(storedSample)).isTrue(); + } + + @Test + public void returns_false_for_stored_sample_with_metadata() { + final Sample storedSample = new Sample.Builder("sample-with-taxid").withTaxId(9606L).build(); + + assertThat(SamplePersistencePolicy.isStoredSampleEmpty(storedSample)).isFalse(); + } + + @Test + public void file_uploader_superuser_with_accession_delegates_to_stored_sample_state() { + final Sample newSample = + new Sample.Builder("new-sample", "SAMEA123") + .withSubmittedVia(SubmittedViaType.FILE_UPLOADER) + .build(); + final Sample oldSample = new Sample.Builder("stored-sample").withTaxId(9606L).build(); + + final boolean result = SamplePersistencePolicy.isStoredSampleEmpty(newSample, true, oldSample); + + assertThat(result).isFalse(); + } +} diff --git a/core/src/test/java/uk/ac/ebi/biosamples/core/validation/SampleStatusTransitionPolicyTest.java b/core/src/test/java/uk/ac/ebi/biosamples/core/validation/SampleStatusTransitionPolicyTest.java new file mode 100644 index 0000000000..2098b5ad8f --- /dev/null +++ b/core/src/test/java/uk/ac/ebi/biosamples/core/validation/SampleStatusTransitionPolicyTest.java @@ -0,0 +1,36 @@ +/* +* Copyright 2021 EMBL - European Bioinformatics Institute +* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this +* file except in compliance with the License. You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 +* Unless required by applicable law or agreed to in writing, software distributed under the +* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +*/ +package uk.ac.ebi.biosamples.core.validation; + +import org.junit.Test; +import uk.ac.ebi.biosamples.core.model.SampleStatus; +import uk.ac.ebi.biosamples.exception.GlobalExceptions; + +public class SampleStatusTransitionPolicyTest { + + @Test + public void allows_private_to_public_transition() { + SampleStatusTransitionPolicy.validatePublicTransition( + SampleStatus.PRIVATE, SampleStatus.PUBLIC); + } + + @Test + public void allows_non_public_target_without_validation() { + SampleStatusTransitionPolicy.validatePublicTransition( + SampleStatus.SUPPRESSED, SampleStatus.SUPPRESSED); + } + + @Test(expected = GlobalExceptions.InvalidSampleException.class) + public void rejects_suppressed_to_public_transition() { + SampleStatusTransitionPolicy.validatePublicTransition( + SampleStatus.SUPPRESSED, SampleStatus.PUBLIC); + } +} diff --git a/docker-compose.yml b/docker-compose.yml index b4a20ccca8..d833eb0988 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,6 +4,20 @@ services: build: . image: biosamples:latest mem_limit: 1g + restart: on-failure:5 + depends_on: + mongo: + condition: service_healthy + schema-store: + condition: service_started + json-schema-validator: + condition: service_started + rabbitmq: + condition: service_started + solr: + condition: service_started + neo4j: + condition: service_started links: - solr - mongo @@ -94,6 +108,16 @@ services: build: . image: biosamples:latest mem_limit: 1g + restart: on-failure:5 + depends_on: + mongo: + condition: service_healthy + schema-store: + condition: service_started + json-schema-validator: + condition: service_started + rabbitmq: + condition: service_started links: - mongo - json-schema-validator @@ -355,6 +379,13 @@ services: - logs:/logs image: biosamples:latest mem_limit: 500m + depends_on: + mongo: + condition: service_healthy + biosamples-webapps-core: + condition: service_started + biosamples-webapps-core-v2: + condition: service_started links: - biosamples-webapps-core - neo4j @@ -378,6 +409,11 @@ services: schema-store: image: biosamples/json-schema-store:1.1.0 + depends_on: + mongo: + condition: service_healthy + json-schema-validator: + condition: service_started ports: - 8085:8085 links: @@ -389,7 +425,14 @@ services: mongo: image: mongo:4.4.22 + command: ["mongod", "--bind_ip_all"] mem_limit: 2g + healthcheck: + test: ["CMD", "mongo", "--quiet", "--eval", "db.runCommand({ ping: 1 }).ok"] + interval: 10s + timeout: 5s + retries: 10 + start_period: 20s volumes: - mongo_data:/data/db - ./docker/mongoarchive:/mongoarchive diff --git a/docs/sample-status-post-release-reliability-plan.md b/docs/sample-status-post-release-reliability-plan.md new file mode 100644 index 0000000000..a626be9a0b --- /dev/null +++ b/docs/sample-status-post-release-reliability-plan.md @@ -0,0 +1,69 @@ +# SampleStatus Tracking + Post-Release Reliability Plan + +## Guardrail Decision +- Keep `release` date as the source of truth for public/private behavior in access/search/cache paths. +- Use `SampleStatus` as explicit lifecycle tracking and workflow state (`DRAFT`, `PRIVATE`, `PUBLIC`, `CANCELLED`, `SUPPRESSED`, `KILLED`). +- Do not switch existing visibility logic in services such as `WebinAuthenticationService` and `SolrFilterService` to status-only behavior. + +## Scope 1: Validate Status Model Usage (Tracking, Not Visibility) +- Review status creation/update touchpoints and ensure they are consistent with release-based visibility: + - `core/src/main/java/uk/ac/ebi/biosamples/core/model/Sample.java` + - `webapps/core/src/main/java/uk/ac/ebi/biosamples/service/SampleService.java` + - `webapps/core-v2/src/main/java/uk/ac/ebi/biosamples/service/SampleService.java` + - `pipelines/sample-release/src/main/java/uk/ac/ebi/biosamples/samplerelease/SampleReleaseCallable.java` + - `pipelines/sample-post-release-action/src/main/java/uk/ac/ebi/biosamples/postrelease/SamplePostReleaseActionCallable.java` + - `pipelines/chain/src/main/java/uk/ac/ebi/biosamples/helpdesk/services/SampleStatusUpdater.java` +- Define allowed status transitions and ownership (submission vs pipeline vs helpdesk/manual). +- Identify mismatches where status is stale/incorrect but release-driven behavior is still correct. + +## Scope 2: Root-Cause Pipeline Missed Samples +- Investigate release-window generation behavior in: + - `pipelines/common/src/main/java/uk/ac/ebi/biosamples/utils/PipelineUtils.java` +- Confirm that `release` filtering supports intended `--from` and `--until` semantics. +- Verify runtime arguments and operational trigger cadence for `sample-post-release-action`. +- Audit completion behavior in runner: + - future handling/draining + - shutdown wait behavior + - failed queue capture and reporting + +## Scope 3: Reliability Design for `sample-post-release-action` +- Keep pipeline function: update status from `PRIVATE -> PUBLIC` for samples released in target window. +- Improve capture reliability without changing release-based visibility semantics: + - Define explicit window contract (`from..until` range, inclusive day handling). + - Add overlap strategy for reruns to avoid boundary misses. + - Add optional watermark/checkpoint mode for continuous operation. + - Add observability: + - input/effective window + - candidate count + - success/failure count + - failed accession output +- Keep transition idempotent and skip non-target statuses safely. + +## Scope 4: Non-Visibility Status Handling +- Specify handling rules for non-visibility statuses: + - `DRAFT` + - `CANCELLED` + - `SUPPRESSED` + - `KILLED` +- Ensure status transitions do not accidentally alter release-driven visibility unless explicitly required. +- Document interoperability with existing fields/attributes (for example `INSDC status`). + +## Scope 5: Test Strategy (No Behavior Flip for Visibility) +- Add/expand tests around: + - release window boundaries (`from`, `until`, UTC day edges) + - pipeline rerun overlap and missed-sample prevention + - idempotent transitions (`PRIVATE -> PUBLIC`) and no-op on other statuses + - status tracking consistency while preserving release-based access behavior +- Add focused tests for runner completion to ensure all scheduled tasks are awaited. + +## Deliverables +- Findings report containing: + - confirmed root cause(s) for missed samples + - recommended minimal code changes + - rollout order and rollback-safe steps +- Follow-up implementation breakdown into small commits: + - window fix + - observability + - reliability hardening + - tests + - optional watermark mode diff --git a/es_index.json b/es_index.json index 0bb7591cad..1123ee6777 100644 --- a/es_index.json +++ b/es_index.json @@ -1,194 +1,194 @@ { -"settings": { - "number_of_shards": 1, - "number_of_replicas": 0 -}, -"mappings": { - "properties": { - "sample_full_text": { - "type": "text" + "settings": { + "number_of_shards": 1, + "number_of_replicas": 0 }, - "_class": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - }, - "accession": { - "type": "text", - "copy_to": "sample_full_text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - }, - "characteristics": { - "type": "nested", + "mappings": { "properties": { - "key": { - "type": "text", - "copy_to": "sample_full_text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - }, - "value": { - "type": "text", - "copy_to": "sample_full_text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 + "sample_full_text": { + "type": "text" + }, + "_class": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "accession": { + "type": "text", + "copy_to": "sample_full_text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "characteristics": { + "type": "nested", + "properties": { + "key": { + "type": "text", + "copy_to": "sample_full_text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "value": { + "type": "text", + "copy_to": "sample_full_text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + }, + "create": { + "type": "date" + }, + "domain": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "externalReferences": { + "type": "nested", + "properties": { + "url": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + }, + "name": { + "type": "text", + "copy_to": "sample_full_text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "publications": { + "type": "nested", + "properties": { + "pubmed_id": { + "type": "text", + "copy_to": "sample_full_text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + }, + "relationships": { + "type": "nested", + "properties": { + "source": { + "type": "text", + "copy_to": "sample_full_text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "target": { + "type": "text", + "copy_to": "sample_full_text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "type": { + "type": "text", + "copy_to": "sample_full_text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + }, + "release": { + "type": "date" + }, + "sraAccession": { + "type": "text", + "copy_to": "sample_full_text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "status": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "submitted": { + "type": "date" + }, + "submittedVia": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "taxId": { + "type": "long", + "copy_to": "sample_full_text" + }, + "update": { + "type": "date" + }, + "webinSubmissionAccountId": { + "type": "text", + "copy_to": "sample_full_text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } } - } - } - } - }, - "create": { - "type": "date" - }, - "domain": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - }, - "externalReferences": { - "type": "nested", - "properties": { - "url": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - } - } - }, - "name": { - "type": "text", - "copy_to": "sample_full_text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - }, - "publications": { - "type": "nested", - "properties": { - "pubmed_id": { - "type": "text", - "copy_to": "sample_full_text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - } } - }, - "relationships": { - "type": "nested", - "properties": { - "source": { - "type": "text", - "copy_to": "sample_full_text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - }, - "target": { - "type": "text", - "copy_to": "sample_full_text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - }, - "type": { - "type": "text", - "copy_to": "sample_full_text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - } - } - }, - "release": { - "type": "date" - }, - "sraAccession": { - "type": "text", - "copy_to": "sample_full_text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - }, - "status": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - }, - "submitted": { - "type": "date" - }, - "submittedVia": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - }, - "taxId": { - "type": "long", - "copy_to": "sample_full_text" - }, - "update": { - "type": "date" - }, - "webinSubmissionAccountId": { - "type": "text", - "copy_to": "sample_full_text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - } } } -} diff --git a/integration/src/main/java/uk/ac/ebi/biosamples/RestFilterIntegration.java b/integration/src/main/java/uk/ac/ebi/biosamples/RestFilterIntegration.java index 8ca4de78f7..d0c3b15ec1 100644 --- a/integration/src/main/java/uk/ac/ebi/biosamples/RestFilterIntegration.java +++ b/integration/src/main/java/uk/ac/ebi/biosamples/RestFilterIntegration.java @@ -11,7 +11,9 @@ package uk.ac.ebi.biosamples; import java.time.Instant; +import java.util.ArrayList; import java.util.Collections; +import java.util.List; import java.util.Optional; import java.util.SortedSet; import java.util.TreeSet; @@ -19,7 +21,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.hateoas.EntityModel; -import org.springframework.hateoas.PagedModel; import org.springframework.stereotype.Component; import uk.ac.ebi.biosamples.client.BioSamplesClient; import uk.ac.ebi.biosamples.client.utils.ClientProperties; @@ -187,22 +188,19 @@ protected void phaseThree() { Filter attributeFilter = FilterBuilder.create().onAttribute("TestAttribute").withValue("FilterMe").build(); - PagedModel> samplePage = - webinClient.fetchPagedSampleResource("", Collections.singletonList(attributeFilter), 0, 10); + List> matchingSamples = fetchAllSamplesByFilter(attributeFilter); - if (samplePage.getMetadata().getTotalElements() != 1) { + if (matchingSamples.size() != 1) { throw new IntegrationTestFailException( - "Unexpected number of results for attribute filter query: " - + samplePage.getMetadata().getTotalElements(), + "Unexpected number of results for attribute filter query: " + matchingSamples.size(), Phase.THREE); } - EntityModel restSample = samplePage.getContent().iterator().next(); + EntityModel restSample = matchingSamples.get(0); if (!restSample.getContent().equals(testSample1)) { throw new IntegrationTestFailException( - "Unexpected number of results for attribute filter query: " - + samplePage.getMetadata().getTotalElements(), + "Unexpected number of results for attribute filter query: " + matchingSamples.size(), Phase.THREE); } @@ -224,22 +222,19 @@ protected void phaseThree() { .withValue(targetAttribute.get().getValue()) .build(); - samplePage = - webinClient.fetchPagedSampleResource("", Collections.singletonList(attributeFilter), 0, 10); + matchingSamples = fetchAllSamplesByFilter(attributeFilter); - if (samplePage.getMetadata().getTotalElements() != 1) { + if (matchingSamples.size() != 1) { throw new IntegrationTestFailException( - "Unexpected number of results for attribute filter query: " - + samplePage.getMetadata().getTotalElements(), + "Unexpected number of results for attribute filter query: " + matchingSamples.size(), Phase.THREE); } - restSample = samplePage.getContent().iterator().next(); + restSample = matchingSamples.get(0); if (!restSample.getContent().getAccession().equals(testSample1.getAccession())) { throw new IntegrationTestFailException( - "Unexpected number of results for attribute filter query: " - + samplePage.getMetadata().getTotalElements(), + "Unexpected number of results for attribute filter query: " + matchingSamples.size(), Phase.THREE); } @@ -257,22 +252,19 @@ protected void phaseThree() { attributeFilter = FilterBuilder.create().onAttribute(targetAttribute.get().getType()).withValue(null).build(); - samplePage = - webinClient.fetchPagedSampleResource("", Collections.singletonList(attributeFilter), 0, 10); + matchingSamples = fetchAllSamplesByFilter(attributeFilter); - if (samplePage.getMetadata().getTotalElements() != 1) { + if (matchingSamples.size() != 1) { throw new IntegrationTestFailException( - "Unexpected number of results for attribute filter query: " - + samplePage.getMetadata().getTotalElements(), + "Unexpected number of results for attribute filter query: " + matchingSamples.size(), Phase.THREE); } - restSample = samplePage.getContent().iterator().next(); + restSample = matchingSamples.get(0); if (!restSample.getContent().getAccession().equals(testSample1.getAccession())) { throw new IntegrationTestFailException( - "Unexpected number of results for attribute filter query: " - + samplePage.getMetadata().getTotalElements(), + "Unexpected number of results for attribute filter query: " + matchingSamples.size(), Phase.THREE); } @@ -280,22 +272,19 @@ protected void phaseThree() { attributeFilter = FilterBuilder.create().onAttribute("testAttribute").withValue("filterMe_1").build(); - samplePage = - webinClient.fetchPagedSampleResource("", Collections.singletonList(attributeFilter), 0, 10); + matchingSamples = fetchAllSamplesByFilter(attributeFilter); - if (samplePage.getMetadata().getTotalElements() != 1) { + if (matchingSamples.size() != 1) { throw new IntegrationTestFailException( - "Unexpected number of results for attribute filter query: " - + samplePage.getMetadata().getTotalElements(), + "Unexpected number of results for attribute filter query: " + matchingSamples.size(), Phase.THREE); } - restSample = samplePage.getContent().iterator().next(); + restSample = matchingSamples.get(0); if (!restSample.getContent().getAccession().equals(testSample2.getAccession())) { throw new IntegrationTestFailException( - "Unexpected number of results for attribute filter query: " - + samplePage.getMetadata().getTotalElements(), + "Unexpected number of results for attribute filter query: " + matchingSamples.size(), Phase.THREE); } @@ -303,22 +292,19 @@ protected void phaseThree() { final Filter nameFilter = FilterBuilder.create().onName(testSample2.getName()).build(); - samplePage = - webinClient.fetchPagedSampleResource("", Collections.singletonList(nameFilter), 0, 10); + matchingSamples = fetchAllSamplesByFilter(nameFilter); - if (samplePage.getMetadata().getTotalElements() != 1) { + if (matchingSamples.size() != 1) { throw new IntegrationTestFailException( - "Unexpected number of results for attribute filter query: " - + samplePage.getMetadata().getTotalElements(), + "Unexpected number of results for attribute filter query: " + matchingSamples.size(), Phase.THREE); } - restSample = samplePage.getContent().iterator().next(); + restSample = matchingSamples.get(0); if (!restSample.getContent().getAccession().equals(testSample2.getAccession())) { throw new IntegrationTestFailException( - "Unexpected number of results for attribute filter query: " - + samplePage.getMetadata().getTotalElements(), + "Unexpected number of results for attribute filter query: " + matchingSamples.size(), Phase.THREE); } @@ -327,27 +313,24 @@ protected void phaseThree() { final Filter accessionFilter = FilterBuilder.create().onAccession(testSample2.getAccession()).build(); - samplePage = - webinClient.fetchPagedSampleResource("", Collections.singletonList(accessionFilter), 0, 10); + matchingSamples = fetchAllSamplesByFilter(accessionFilter); - if (samplePage.getMetadata().getTotalElements() != 1) { + if (matchingSamples.size() != 1) { throw new IntegrationTestFailException( - "Unexpected number of results for attribute filter query: " - + samplePage.getMetadata().getTotalElements(), + "Unexpected number of results for attribute filter query: " + matchingSamples.size(), Phase.THREE); } final String accession1 = testSample1.getAccession(); final String accession2 = testSample2.getAccession(); - if (!samplePage.getContent().stream() + if (!matchingSamples.stream() .allMatch( r -> r.getContent().getAccession().equals(accession1) || r.getContent().getAccession().equals(accession2))) { throw new IntegrationTestFailException( - "Unexpected number of results for attribute filter query: " - + samplePage.getMetadata().getTotalElements(), + "Unexpected number of results for attribute filter query: " + matchingSamples.size(), Phase.THREE); } } @@ -397,17 +380,15 @@ protected void phaseFour() { .from(testSample1.getRelease().minusSeconds(2)) .until(testSample1.getRelease().plusSeconds(2)) .build(); - PagedModel> samplePage = - webinClient.fetchPagedSampleResource("", Collections.singletonList(dateFilter), 0, 10); + List> matchingSamples = fetchAllSamplesByFilter(dateFilter); - if (samplePage.getMetadata().getTotalElements() < 1) { + if (matchingSamples.size() < 1) { throw new IntegrationTestFailException( - "Unexpected number of results for date range filter query: " - + samplePage.getMetadata().getTotalElements()); + "Unexpected number of results for date range filter query: " + matchingSamples.size()); } boolean match = - samplePage.getContent().stream() + matchingSamples.stream() .anyMatch(resource -> resource.getContent().getAccession().equals(accession1)); if (!match) { @@ -423,18 +404,16 @@ protected void phaseFour() { .onRelation("parent of") .withValue(testSample2.getAccession()) .build(); - samplePage = - webinClient.fetchPagedSampleResource("", Collections.singletonList(relFilter), 0, 10); + matchingSamples = fetchAllSamplesByFilter(relFilter); - if (samplePage.getMetadata().getTotalElements() < 1) { + if (matchingSamples.size() < 1) { throw new IntegrationTestFailException( - "Unexpected number of results for relation filter query: " - + samplePage.getMetadata().getTotalElements(), + "Unexpected number of results for relation filter query: " + matchingSamples.size(), Phase.FOUR); } match = - samplePage.getContent().stream() + matchingSamples.stream() .anyMatch(resource -> resource.getContent().getAccession().equals(accession3)); if (!match) { @@ -450,17 +429,16 @@ protected void phaseFour() { .onInverseRelation("parent of") .withValue(testSample3.getAccession()) .build(); - samplePage = - webinClient.fetchPagedSampleResource("", Collections.singletonList(invRelFilter), 0, 10); + matchingSamples = fetchAllSamplesByFilter(invRelFilter); - if (samplePage.getMetadata().getTotalElements() < 1) { + if (matchingSamples.size() < 1) { throw new IntegrationTestFailException( "Unexpected number of results for relation filter query. Expected more than zero but got " - + samplePage.getMetadata().getTotalElements()); + + matchingSamples.size()); } match = - samplePage.getContent().stream() + matchingSamples.stream() .anyMatch(resource -> resource.getContent().getAccession().equals(accession2)); if (!match) { @@ -476,13 +454,11 @@ protected void phaseFive() { final Filter authFilter = FilterBuilder.create().onAuthInfo(defaultWebinIdForIntegrationTests).build(); - final PagedModel> samplePage = - webinClient.fetchPagedSampleResource("", Collections.singletonList(authFilter), 0, 10); + final List> matchingSamples = fetchAllSamplesByFilter(authFilter); - if (samplePage.getMetadata().getTotalElements() < 1) { + if (matchingSamples.size() < 1) { throw new IntegrationTestFailException( - "Unexpected number of results for domain filter query: " - + samplePage.getMetadata().getTotalElements(), + "Unexpected number of results for domain filter query: " + matchingSamples.size(), Phase.FIVE); } } @@ -490,6 +466,17 @@ protected void phaseFive() { @Override protected void phaseSix() {} + private List> fetchAllSamplesByFilter(final Filter filter) { + final List> samples = new ArrayList<>(); + + for (final EntityModel sample : + webinClient.fetchSampleResourceAll("", Collections.singletonList(filter))) { + samples.add(sample); + } + + return samples; + } + private Sample getTestSample1() { final String name = "RestFilterIntegration_sample_1"; final Instant update = Instant.parse("1999-12-25T11:36:57.00Z"); diff --git a/integration/src/main/java/uk/ac/ebi/biosamples/RestSampleStatusTransitionIntegration.java b/integration/src/main/java/uk/ac/ebi/biosamples/RestSampleStatusTransitionIntegration.java new file mode 100644 index 0000000000..52b80f11df --- /dev/null +++ b/integration/src/main/java/uk/ac/ebi/biosamples/RestSampleStatusTransitionIntegration.java @@ -0,0 +1,202 @@ +/* +* Copyright 2021 EMBL - European Bioinformatics Institute +* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this +* file except in compliance with the License. You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 +* Unless required by applicable law or agreed to in writing, software distributed under the +* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +*/ +package uk.ac.ebi.biosamples; + +import java.time.Instant; +import java.util.Collections; +import java.util.Iterator; +import java.util.Optional; +import java.util.SortedSet; +import java.util.TreeSet; +import org.springframework.core.annotation.Order; +import org.springframework.hateoas.EntityModel; +import org.springframework.stereotype.Component; +import uk.ac.ebi.biosamples.client.BioSamplesClient; +import uk.ac.ebi.biosamples.core.model.Attribute; +import uk.ac.ebi.biosamples.core.model.Sample; +import uk.ac.ebi.biosamples.core.model.SampleStatus; +import uk.ac.ebi.biosamples.core.model.filter.Filter; +import uk.ac.ebi.biosamples.core.service.FilterBuilder; +import uk.ac.ebi.biosamples.utils.IntegrationTestFailException; + +@Component +@Order(2) +public class RestSampleStatusTransitionIntegration extends AbstractIntegration { + private static final String SAMPLE_NAME = + "RestSampleStatusTransitionIntegration_sample_suppressed"; + private static final String PRIVATE_SAMPLE_NAME = + "RestSampleStatusTransitionIntegration_sample_private"; + + public RestSampleStatusTransitionIntegration(final BioSamplesClient client) { + super(client); + } + + @Override + protected void phaseOne() { + if (fetchUniqueSampleByNameForWebin(SAMPLE_NAME).isPresent()) { + throw new IntegrationTestFailException( + "Transition test sample should not be present during phase 1", Phase.ONE); + } + if (fetchUniqueSampleByNameForWebin(PRIVATE_SAMPLE_NAME).isPresent()) { + throw new IntegrationTestFailException( + "Private transition test sample should not be present during phase 1", Phase.ONE); + } + + final EntityModel persisted = webinClient.persistSampleResource(getSuppressedSample()); + if (persisted.getContent() == null) { + throw new IntegrationTestFailException( + "Failed to persist transition test sample in phase 1", Phase.ONE); + } + final EntityModel privatePersisted = + webinClient.persistSampleResource(getPrivateSampleWithPastReleaseDate()); + if (privatePersisted.getContent() == null) { + throw new IntegrationTestFailException( + "Failed to persist private transition test sample in phase 1", Phase.ONE); + } + } + + @Override + protected void phaseTwo() { + final Sample existing = + fetchUniqueSampleByNameForWebin(SAMPLE_NAME) + .orElseThrow( + () -> + new IntegrationTestFailException( + "Failed to fetch transition test sample in phase 2", Phase.TWO)); + + final Sample updateToPublic = + Sample.Builder.fromSample(existing) + .withStatus(SampleStatus.PUBLIC) + .withRelease(Instant.parse("2016-04-01T11:36:57.00Z")) + .build(); + + boolean transitionBlocked = false; + try { + webinClient.persistSampleResource(updateToPublic); + } catch (final Exception ignored) { + transitionBlocked = true; + } + + if (!transitionBlocked) { + throw new IntegrationTestFailException( + "SUPPRESSED -> PUBLIC transition should be blocked", Phase.TWO); + } + + final Sample fetchedAfterAttempt = + webinClient + .fetchSampleResource(existing.getAccession()) + .orElseThrow( + () -> + new IntegrationTestFailException( + "Sample should still exist after blocked transition", Phase.TWO)) + .getContent(); + + if (fetchedAfterAttempt == null || fetchedAfterAttempt.getStatus() != SampleStatus.SUPPRESSED) { + throw new IntegrationTestFailException( + "Blocked transition should keep sample in SUPPRESSED state", Phase.TWO); + } + + final Sample privateSample = + fetchUniqueSampleByNameForWebin(PRIVATE_SAMPLE_NAME) + .orElseThrow( + () -> + new IntegrationTestFailException( + "Failed to fetch private transition test sample in phase 2", Phase.TWO)); + + final Sample updatePrivateToPublic = + Sample.Builder.fromSample(privateSample) + .withStatus(SampleStatus.PUBLIC) + .withRelease(Instant.now()) + .build(); + + webinClient.persistSampleResource(updatePrivateToPublic); + + final Sample privateSampleAfterTransition = + webinClient + .fetchSampleResource(privateSample.getAccession()) + .orElseThrow( + () -> + new IntegrationTestFailException( + "Private transition sample should still exist after update", Phase.TWO)) + .getContent(); + + if (privateSampleAfterTransition == null + || privateSampleAfterTransition.getStatus() != SampleStatus.PUBLIC) { + throw new IntegrationTestFailException( + "PRIVATE -> PUBLIC transition should be allowed when release <= now", Phase.TWO); + } + } + + @Override + protected void phaseThree() {} + + @Override + protected void phaseFour() {} + + @Override + protected void phaseFive() {} + + @Override + protected void phaseSix() {} + + private Optional fetchUniqueSampleByNameForWebin(final String name) { + final Filter nameFilter = FilterBuilder.create().onName(name).build(); + final Iterator> iterator = + webinClient.fetchSampleResourceAll(Collections.singletonList(nameFilter)).iterator(); + + final Optional optionalSample; + if (iterator.hasNext()) { + optionalSample = Optional.ofNullable(iterator.next().getContent()); + } else { + optionalSample = Optional.empty(); + } + + if (iterator.hasNext()) { + throw new IntegrationTestFailException( + "More than one sample present with the given name for transition test"); + } + + return optionalSample; + } + + private Sample getSuppressedSample() { + final SortedSet attributes = new TreeSet<>(); + attributes.add( + Attribute.build( + "organism", "Homo sapiens", "http://purl.obolibrary.org/obo/NCBITaxon_9606", null)); + attributes.add(Attribute.build("INSDC status", "suppressed")); + + return new Sample.Builder(SAMPLE_NAME) + .withTaxId(9606L) + .withStatus(SampleStatus.SUPPRESSED) + .withUpdate(Instant.parse("2016-05-05T11:36:57.00Z")) + .withRelease(Instant.parse("2016-04-01T11:36:57.00Z")) + .withWebinSubmissionAccountId(defaultWebinIdForIntegrationTests) + .withAttributes(attributes) + .build(); + } + + private Sample getPrivateSampleWithPastReleaseDate() { + final SortedSet attributes = new TreeSet<>(); + attributes.add( + Attribute.build( + "organism", "Homo sapiens", "http://purl.obolibrary.org/obo/NCBITaxon_9606", null)); + + return new Sample.Builder(PRIVATE_SAMPLE_NAME) + .withTaxId(9606L) + .withStatus(SampleStatus.PRIVATE) + .withUpdate(Instant.parse("2016-05-05T11:36:57.00Z")) + .withRelease(Instant.parse("2016-04-01T11:36:57.00Z")) + .withWebinSubmissionAccountId(defaultWebinIdForIntegrationTests) + .withAttributes(attributes) + .build(); + } +} diff --git a/pipelines/common/src/main/java/uk/ac/ebi/biosamples/PipelineApplicationRunner.java b/pipelines/common/src/main/java/uk/ac/ebi/biosamples/PipelineApplicationRunner.java deleted file mode 100644 index 357b9ce4b7..0000000000 --- a/pipelines/common/src/main/java/uk/ac/ebi/biosamples/PipelineApplicationRunner.java +++ /dev/null @@ -1,114 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples; - -import java.time.Duration; -import java.time.Instant; -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; -import java.util.Objects; -import java.util.concurrent.Callable; -import java.util.concurrent.Future; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.boot.ApplicationArguments; -import org.springframework.boot.ApplicationRunner; -import org.springframework.hateoas.EntityModel; -import uk.ac.ebi.biosamples.client.BioSamplesClient; -import uk.ac.ebi.biosamples.core.model.PipelineAnalytics; -import uk.ac.ebi.biosamples.core.model.Sample; -import uk.ac.ebi.biosamples.core.model.filter.Filter; -import uk.ac.ebi.biosamples.utils.PipelineUtils; -import uk.ac.ebi.biosamples.utils.thread.AdaptiveThreadPoolExecutor; -import uk.ac.ebi.biosamples.utils.thread.ThreadUtils; - -public abstract class PipelineApplicationRunner implements ApplicationRunner { - protected final Logger LOG = LoggerFactory.getLogger(getClass()); - private static final String PIPELINE_NAME = "TEMPLATE"; - - protected final BioSamplesClient bioSamplesClient; - private final PipelinesProperties pipelinesProperties; - private final PipelineFutureCallback pipelineFutureCallback; - - public PipelineApplicationRunner( - final BioSamplesClient bioSamplesClient, final PipelinesProperties pipelinesProperties) { - this.bioSamplesClient = bioSamplesClient; - this.pipelinesProperties = pipelinesProperties; - // this.analyticsService = analyticsService; - pipelineFutureCallback = new PipelineFutureCallback(); - } - - @Override - public void run(final ApplicationArguments args) throws Exception { - final Instant startTime = Instant.now(); - LOG.info("Pipeline started at {}", startTime); - final Collection filters = PipelineUtils.getDateFilters(args, "update"); - long sampleCount = 0; - - loadPreConfiguration(); - - try (final AdaptiveThreadPoolExecutor executorService = - AdaptiveThreadPoolExecutor.create( - 100, - 10000, - true, - pipelinesProperties.getThreadCount(), - pipelinesProperties.getThreadCountMax())) { - - final Map> futures = new HashMap<>(); - for (final EntityModel sampleResource : - bioSamplesClient.fetchSampleResourceAll("", filters)) { - final Sample sample = Objects.requireNonNull(sampleResource.getContent()); - LOG.trace("Handling {}", sample); - - final Callable task = getNewCallableClassInstance().withSample(sample); - sampleCount++; - if (sampleCount % 10000 == 0) { - LOG.info("{} samples scheduled for processing", sampleCount); - } - futures.put(sample.getAccession(), executorService.submit(task)); - } - - LOG.info("waiting for futures to finish"); - ThreadUtils.checkAndCallbackFutures(futures, 0, pipelineFutureCallback); - } catch (final Exception e) { - LOG.error("Pipeline failed to finish successfully", e); - throw e; - } finally { - final Instant endTime = Instant.now(); - LOG.info("Total samples processed {}", sampleCount); - LOG.info("Total curation objects added {}", pipelineFutureCallback.getTotalCount()); - LOG.info("Pipeline finished at {}", endTime); - LOG.info( - "Pipeline total running time {} seconds", - Duration.between(startTime, endTime).getSeconds()); - - final PipelineAnalytics pipelineAnalytics = - new PipelineAnalytics( - getPipelineName(), - startTime, - endTime, - sampleCount, - pipelineFutureCallback.getTotalCount()); - pipelineAnalytics.setDateRange(filters); - // analyticsService.persistPipelineAnalytics(pipelineAnalytics); - } - } - - private String getPipelineName() { - return PIPELINE_NAME; - } - - protected abstract void loadPreConfiguration(); - - protected abstract PipelineSampleCallable getNewCallableClassInstance(); -} diff --git a/pipelines/common/src/main/java/uk/ac/ebi/biosamples/utils/PipelineUtils.java b/pipelines/common/src/main/java/uk/ac/ebi/biosamples/utils/PipelineUtils.java index a61b27dba2..88b4ffe7c0 100644 --- a/pipelines/common/src/main/java/uk/ac/ebi/biosamples/utils/PipelineUtils.java +++ b/pipelines/common/src/main/java/uk/ac/ebi/biosamples/utils/PipelineUtils.java @@ -41,11 +41,12 @@ public static Collection getLastRunFilters(LocalDate lastRunDate, LocalD } public static Collection getDateFilters( - final ApplicationArguments args, final String dateType) { + final ApplicationArguments args, final DateType dateType) { final Collection filters = new ArrayList<>(); final LocalDate fromDate; final LocalDate toDate; - final Filter fromDateFilter; + final LocalDate effectiveTo; + Filter dateFilter; if (args.getOptionNames().contains("from")) { fromDate = @@ -63,24 +64,24 @@ public static Collection getDateFilters( toDate = LocalDate.parse("3000-01-01", DateTimeFormatter.ISO_LOCAL_DATE); } - log.info("Processing samples from " + DateTimeFormatter.ISO_LOCAL_DATE.format(fromDate)); - log.info("Processing samples to " + DateTimeFormatter.ISO_LOCAL_DATE.format(toDate)); - - if (!dateType.equals("release")) { - fromDateFilter = - new DateRangeFilter.DateRangeFilterBuilder(dateType) - .from(fromDate.atStartOfDay().toInstant(ZoneOffset.UTC)) - .until(toDate.plusDays(1).atStartOfDay().toInstant(ZoneOffset.UTC)) - .build(); + if (dateType.equals(DateType.release)) { + effectiveTo = LocalDate.now(); + } else if (dateType.equals(DateType.update)) { + effectiveTo = toDate.plusDays(1); } else { - fromDateFilter = - new DateRangeFilter.DateRangeFilterBuilder(dateType) - .from(fromDate.atStartOfDay().toInstant(ZoneOffset.UTC)) - .until(fromDate.plusDays(1).atStartOfDay().toInstant(ZoneOffset.UTC)) - .build(); + throw new RuntimeException("Unsupported date type"); } - filters.add(fromDateFilter); + log.info("Processing samples from " + DateTimeFormatter.ISO_LOCAL_DATE.format(fromDate)); + log.info("Processing samples to " + DateTimeFormatter.ISO_LOCAL_DATE.format(effectiveTo)); + + dateFilter = + new DateRangeFilter.DateRangeFilterBuilder(dateType.name()) + .from(fromDate.atStartOfDay().toInstant(ZoneOffset.UTC)) + .until(effectiveTo.atStartOfDay().toInstant(ZoneOffset.UTC)) + .build(); + + filters.add(dateFilter); return filters; } @@ -172,4 +173,9 @@ public static void exitPipeline(final ConfigurableApplicationContext ctx) { public static Filter getAttributeFilter(final String attributeName, final String attributeValue) { return new AttributeFilter.Builder(attributeName).withValue(attributeValue).build(); } + + public enum DateType { + release, + update + } } diff --git a/pipelines/copydown/pom.xml b/pipelines/copydown/pom.xml deleted file mode 100644 index 0063309c6a..0000000000 --- a/pipelines/copydown/pom.xml +++ /dev/null @@ -1,52 +0,0 @@ - - 4.0.0 - - pipelines-copydown - jar - - - uk.ac.ebi.biosamples - biosamples - 5.3.15-SNAPSHOT - ../../ - - - - - org.springframework.boot - spring-boot-starter-cache - - - uk.ac.ebi.biosamples - pipelines-common - 5.3.15-SNAPSHOT - - - uk.ac.ebi.biosamples - core - 5.3.15-SNAPSHOT - - - org.springframework.hateoas - spring-hateoas - 1.3.4 - - - - - - - org.springframework.boot - spring-boot-maven-plugin - - - - build-info - - - - - - - diff --git a/pipelines/copydown/src/main/java/uk/ac/ebi/biosamples/Application.java b/pipelines/copydown/src/main/java/uk/ac/ebi/biosamples/Application.java deleted file mode 100644 index 86d5948409..0000000000 --- a/pipelines/copydown/src/main/java/uk/ac/ebi/biosamples/Application.java +++ /dev/null @@ -1,68 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples; - -import org.springframework.boot.SpringApplication; -import org.springframework.boot.WebApplicationType; -import org.springframework.boot.autoconfigure.SpringBootApplication; -import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration; -import org.springframework.boot.web.client.RestTemplateCustomizer; -import org.springframework.cache.annotation.EnableCaching; -import org.springframework.context.ConfigurableApplicationContext; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.ComponentScan; -import org.springframework.context.annotation.FilterType; -import org.springframework.context.annotation.Import; -import org.springframework.data.mongodb.repository.config.EnableMongoRepositories; -import org.springframework.security.config.annotation.web.configuration.EnableWebSecurity; -import org.springframework.web.client.RestTemplate; -import uk.ac.ebi.biosamples.configuration.ExclusionConfiguration; -import uk.ac.ebi.biosamples.service.EnaConfig; -import uk.ac.ebi.biosamples.service.EnaSampleToBioSampleConversionService; -import uk.ac.ebi.biosamples.service.EraProDao; -import uk.ac.ebi.biosamples.utils.PipelineUtils; - -@SpringBootApplication(exclude = DataSourceAutoConfiguration.class) -@ComponentScan( - excludeFilters = { - @ComponentScan.Filter( - type = FilterType.ASSIGNABLE_TYPE, - value = {EnaConfig.class, EraProDao.class, EnaSampleToBioSampleConversionService.class}) - }) -@Import(ExclusionConfiguration.class) -@EnableCaching -@EnableWebSecurity -@EnableMongoRepositories(basePackages = "uk.ac.ebi.biosamples.repository") -public class Application { - - public static void main(final String[] args) { - SpringApplication app = new SpringApplication(Application.class); - app.setWebApplicationType(WebApplicationType.NONE); - - final ConfigurableApplicationContext ctx = app.run(args); - PipelineUtils.exitPipeline(ctx); - } - - @Bean - public RestTemplate restTemplate(final RestTemplateCustomizer restTemplateCustomizer) { - final RestTemplate restTemplate = new RestTemplate(); - restTemplateCustomizer.customize(restTemplate); - return restTemplate; - } - - @Bean - public RestTemplateCustomizer restTemplateCustomizer( - final BioSamplesProperties bioSamplesProperties, - final PipelinesProperties pipelinesProperties) { - return new PipelinesHelper() - .getRestTemplateCustomizer(bioSamplesProperties, pipelinesProperties); - } -} diff --git a/pipelines/copydown/src/main/java/uk/ac/ebi/biosamples/copydown/CopydownApplicationRunner.java b/pipelines/copydown/src/main/java/uk/ac/ebi/biosamples/copydown/CopydownApplicationRunner.java deleted file mode 100644 index f1bfb3fed7..0000000000 --- a/pipelines/copydown/src/main/java/uk/ac/ebi/biosamples/copydown/CopydownApplicationRunner.java +++ /dev/null @@ -1,142 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.copydown; - -import java.time.Duration; -import java.time.Instant; -import java.time.LocalDate; -import java.util.*; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.concurrent.Future; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.boot.ApplicationArguments; -import org.springframework.boot.ApplicationRunner; -import org.springframework.hateoas.EntityModel; -import org.springframework.stereotype.Component; -import uk.ac.ebi.biosamples.PipelineFutureCallback; -import uk.ac.ebi.biosamples.PipelineResult; -import uk.ac.ebi.biosamples.PipelinesProperties; -import uk.ac.ebi.biosamples.client.BioSamplesClient; -import uk.ac.ebi.biosamples.core.model.PipelineAnalytics; -import uk.ac.ebi.biosamples.core.model.Sample; -import uk.ac.ebi.biosamples.core.model.filter.Filter; -import uk.ac.ebi.biosamples.model.PipelineLastRun; -import uk.ac.ebi.biosamples.model.PipelineName; -import uk.ac.ebi.biosamples.mongo.service.AnalyticsService; -import uk.ac.ebi.biosamples.service.PipelineHelperService; -import uk.ac.ebi.biosamples.utils.PipelineUtils; -import uk.ac.ebi.biosamples.utils.thread.AdaptiveThreadPoolExecutor; -import uk.ac.ebi.biosamples.utils.thread.ThreadUtils; - -@Component -public class CopydownApplicationRunner implements ApplicationRunner { - private static final Logger LOG = LoggerFactory.getLogger(CopydownApplicationRunner.class); - private static final PipelineName PIPELINE_NAME = PipelineName.COPYDOWN; - - private final BioSamplesClient bioSamplesClient; - private final PipelinesProperties pipelinesProperties; - private final AnalyticsService analyticsService; - private final PipelineFutureCallback pipelineFutureCallback; - private final PipelineHelperService pipelineHelperService; - - public CopydownApplicationRunner( - final BioSamplesClient bioSamplesClient, - final PipelinesProperties pipelinesProperties, - final AnalyticsService analyticsService, - PipelineHelperService pipelineHelperService) { - this.bioSamplesClient = bioSamplesClient; - this.pipelinesProperties = pipelinesProperties; - this.analyticsService = analyticsService; - this.pipelineHelperService = pipelineHelperService; - pipelineFutureCallback = new PipelineFutureCallback(); - } - - @Override - public void run(final ApplicationArguments args) throws Exception { - PipelineLastRun pipelineLastRun = pipelineHelperService.getLastRunDate(PIPELINE_NAME); - LocalDate lastRunDate = pipelineLastRun.getLastRunDate(); - LocalDate startDate = LocalDate.now(); - final Collection filters = PipelineUtils.getLastRunFilters(lastRunDate, startDate); - final Instant startTime = Instant.now(); - LOG.info("Pipeline started at {}", startTime); - LOG.info("Processing samples from {}", lastRunDate); - long sampleCount = 0; - - try (final AdaptiveThreadPoolExecutor executorService = - AdaptiveThreadPoolExecutor.create( - 100, - 10000, - true, - pipelinesProperties.getThreadCount(), - pipelinesProperties.getThreadCountMax())) { - final Map> futures = new HashMap<>(); - - for (final EntityModel sampleResource : - bioSamplesClient.fetchSampleResourceAll("", filters)) { - LOG.trace("Handling " + sampleResource); - final Sample sample = sampleResource.getContent(); - sampleCount++; - - if (sample == null) { - throw new RuntimeException("Sample should not be null"); - } - - final Callable task = - new SampleCopydownCallable( - bioSamplesClient, sample, pipelinesProperties.getProxyWebinId()); - - futures.put(sample.getAccession(), executorService.submit(task)); - } - - LOG.info("waiting for futures"); - // wait for anything to finish - ThreadUtils.checkAndCallbackFutures(futures, 0, pipelineFutureCallback); - pipelineHelperService.updateLastRunDate(pipelineLastRun, startDate); - } catch (final Exception e) { - LOG.error("Pipeline failed to finish successfully", e); - throw e; - } finally { - final Instant endTime = Instant.now(); - LOG.info("Total samples processed {}", sampleCount); - LOG.info("Total curation objects added {}", pipelineFutureCallback.getTotalCount()); - LOG.info("Pipeline finished at {}", endTime); - LOG.info( - "Pipeline total running time {} seconds", - Duration.between(startTime, endTime).getSeconds()); - - final PipelineAnalytics pipelineAnalytics = - new PipelineAnalytics( - "copydown", startTime, endTime, sampleCount, pipelineFutureCallback.getTotalCount()); - pipelineAnalytics.setDateRange(filters); - analyticsService.persistPipelineAnalytics(pipelineAnalytics); - - // now print a list of things that failed - final ConcurrentLinkedQueue failedQueue = SampleCopydownCallable.failedQueue; - - if (failedQueue.size() > 0) { - // put the first ones on the queue into a list - // limit the size of list to avoid overload - final List fails = new LinkedList<>(); - - while (failedQueue.peek() != null) { - fails.add(failedQueue.poll()); - } - - final String failures = "Failed files (" + fails.size() + ") " + String.join(" , ", fails); - - LOG.info(failures); - } - } - // TODO re-check existing curations - } -} diff --git a/pipelines/copydown/src/main/java/uk/ac/ebi/biosamples/copydown/SampleCopydownCallable.java b/pipelines/copydown/src/main/java/uk/ac/ebi/biosamples/copydown/SampleCopydownCallable.java deleted file mode 100644 index 0c05599535..0000000000 --- a/pipelines/copydown/src/main/java/uk/ac/ebi/biosamples/copydown/SampleCopydownCallable.java +++ /dev/null @@ -1,209 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.copydown; - -import java.util.*; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.stream.Collectors; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import uk.ac.ebi.biosamples.PipelineResult; -import uk.ac.ebi.biosamples.client.BioSamplesClient; -import uk.ac.ebi.biosamples.core.model.*; - -public class SampleCopydownCallable implements Callable { - private static final Logger LOG = LoggerFactory.getLogger(SampleCopydownCallable.class); - private static final String ORGANISM = "organism"; - private static final Attribute mixedAttribute = - Attribute.build( - ORGANISM, "mixed sample", "http://purl.obolibrary.org/obo/NCBITaxon_1427524", null); - private static final String DERIVED_FROM = "derived from"; - private final Sample sample; - private final BioSamplesClient bioSamplesClient; - private final String webinId; - private int curationCount; - static final ConcurrentLinkedQueue failedQueue = new ConcurrentLinkedQueue<>(); - - public SampleCopydownCallable( - final BioSamplesClient bioSamplesClient, final Sample sample, final String webinId) { - this.bioSamplesClient = bioSamplesClient; - this.sample = sample; - this.webinId = webinId; - curationCount = 0; - } - - @Override - public PipelineResult call() { - boolean success = true; - final String accession = sample.getAccession(); - - LOG.info("Handling sample for copy-down " + accession); - - try { - final SortedSet attributes = sample.getAttributes(); - - if (hasDerivedFromRelationship()) { - processDerivedFromAttributes(attributes); - } - } catch (final Exception e) { - success = false; - } - - return new PipelineResult(accession, curationCount, success); - } - - private boolean hasDerivedFromRelationship() { - return sample.getRelationships().stream() - .anyMatch( - relationship -> - DERIVED_FROM.equalsIgnoreCase(relationship.getType()) - && sample.getAccession().equals(relationship.getSource())); - } - - private void processDerivedFromAttributes(final SortedSet childAttributes) { - final Map> sampleAccessionToAttributeMap = - getAttributesOfParentSamples(sample); - final Set qualifyingCopyDownAttributes = - getQualifyingCopyDownAttributes(sampleAccessionToAttributeMap, childAttributes); - - qualifyingCopyDownAttributes.removeIf( - attribute -> ORGANISM.equalsIgnoreCase(attribute.getType())); - applyCopyDownCuration(qualifyingCopyDownAttributes); - } - - private Set getQualifyingCopyDownAttributes( - final Map> sampleAccessionToAttributeMap, - final SortedSet attributesOfTheChildSample) { - final Set qualifyingCopyDownAttributes = new TreeSet<>(); - final Set uniqueParentSamplesAttributes = mergeSets(sampleAccessionToAttributeMap); - - uniqueParentSamplesAttributes.forEach( - parentAttr -> { - if (attributesOfTheChildSample.stream() - .noneMatch(childAttr -> childAttr.getType().equalsIgnoreCase(parentAttr.getType()))) { - qualifyingCopyDownAttributes.add(parentAttr); - } - }); - - return qualifyingCopyDownAttributes; - } - - public static Set mergeSets(final Map> map) { - final Set mergedSet = new TreeSet<>(); // Using TreeSet to keep elements sorted - final Set uniqueElements = new HashSet<>(); // Elements appearing in only one set - - for (final Set set : map.values()) { - final Set duplicateCheck = new HashSet<>(); - - for (final Attribute element : set) { - // Add to uniqueElements if it hasn't been added before - if (!duplicateCheck.contains(element.getType()) && !uniqueElements.contains(element)) { - uniqueElements.add(element); - } else { - // If element already exists in uniqueElements, remove it - uniqueElements.remove(element); - // Add to duplicateCheck to avoid adding it again - duplicateCheck.add(element.getType()); - } - } - } - - mergedSet.addAll(uniqueElements); - - return mergedSet; - } - - private void applyCopyDownCuration(final Set attributes) { - if (!attributes.isEmpty()) { - LOG.info( - "Adding " - + attributes.size() - + " copy-down curations for sample " - + sample.getAccession()); - - attributes.forEach(this::applyCuration); - } else { - LOG.info("No copy-down curations for sample " + sample.getAccession()); - } - } - - private void applyCuration(final Attribute attribute) { - final Set postAttributes = new HashSet<>(); - - postAttributes.add(attribute); - - final Curation curation = Curation.build(Collections.emptyList(), postAttributes); - - bioSamplesClient.persistCuration(sample.getAccession(), curation, webinId); - curationCount++; - } - - private Map> getAttributesOfParentSamples(final Sample sample) { - final Map> sampleAccessionToAttributeMap = new HashMap<>(); - - for (final Relationship relationship : sample.getRelationships()) { - if (DERIVED_FROM.equalsIgnoreCase(relationship.getType()) - && sample.getAccession().equals(relationship.getSource())) { - final String parentSample = relationship.getTarget(); - - LOG.trace("checking derived from " + parentSample); - - bioSamplesClient - .fetchSampleResource(parentSample) - .ifPresent( - sampleResource -> { - final Sample parent = sampleResource.getContent(); - - if (parent != null) { - if (parent.getSubmittedVia() == SubmittedViaType.PIPELINE_IMPORT) { - sampleAccessionToAttributeMap.put( - parent.getAccession(), - parent.getAttributes().stream() - .filter( - attribute -> - attribute.getTag().equals("attribute") - && isAttributeEligibleForCopydown(attribute.getType())) - .collect(Collectors.toSet())); - } else { - sampleAccessionToAttributeMap.put( - parent.getAccession(), - parent.getAttributes().stream() - .filter( - attribute -> - !attribute.getType().startsWith("ENA") - && isAttributeEligibleForCopydown(attribute.getType())) - .collect(Collectors.toSet())); - } - } - }); - } - } - - return sampleAccessionToAttributeMap; - } - - private boolean isAttributeEligibleForCopydown(final String type) { - return !type.startsWith("SRA accession") - && !type.startsWith("broker name") - && !type.startsWith("INSDC") - && !type.startsWith("title") - && !type.startsWith("description") - && !type.startsWith("Submitter Id") - && !type.startsWith("Secondary Id") - && !type.startsWith("organism") - && !type.startsWith("uuid") - && !type.startsWith("individual_name") - && !type.startsWith("anonymized_name") - && !type.startsWith("common name") - && !type.startsWith("ENA"); - } -} diff --git a/pipelines/curami/pom.xml b/pipelines/curami/pom.xml deleted file mode 100644 index d268574b17..0000000000 --- a/pipelines/curami/pom.xml +++ /dev/null @@ -1,68 +0,0 @@ - - - - - 4.0.0 - pipelines-curami - pipelines-curami - jar - - - - - uk.ac.ebi.biosamples - biosamples - 5.3.15-SNAPSHOT - ../../ - - - - - org.springframework.boot - spring-boot-starter-cache - - - uk.ac.ebi.biosamples - pipelines-common - 5.3.15-SNAPSHOT - - - uk.ac.ebi.biosamples - core - 5.3.15-SNAPSHOT - - - org.springframework.hateoas - spring-hateoas - 1.3.4 - - - com.github.ben-manes.caffeine - caffeine - - - org.apache.commons - commons-lang3 - 3.7 - test - - - - - - - - org.springframework.boot - spring-boot-maven-plugin - - - - build-info - - - - - - - diff --git a/pipelines/curami/src/main/java/uk/ac/ebi/biosamples/Application.java b/pipelines/curami/src/main/java/uk/ac/ebi/biosamples/Application.java deleted file mode 100644 index de7a03296b..0000000000 --- a/pipelines/curami/src/main/java/uk/ac/ebi/biosamples/Application.java +++ /dev/null @@ -1,155 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples; - -import org.apache.http.HeaderElement; -import org.apache.http.HeaderElementIterator; -import org.apache.http.HttpHost; -import org.apache.http.HttpResponse; -import org.apache.http.client.HttpClient; -import org.apache.http.client.config.RequestConfig; -import org.apache.http.conn.ConnectionKeepAliveStrategy; -import org.apache.http.conn.routing.HttpRoute; -import org.apache.http.impl.client.cache.CacheConfig; -import org.apache.http.impl.client.cache.CachingHttpClientBuilder; -import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; -import org.apache.http.message.BasicHeaderElementIterator; -import org.apache.http.protocol.HTTP; -import org.apache.http.protocol.HttpContext; -import org.springframework.boot.SpringApplication; -import org.springframework.boot.autoconfigure.SpringBootApplication; -import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration; -import org.springframework.boot.web.client.RestTemplateCustomizer; -import org.springframework.cache.annotation.EnableCaching; -import org.springframework.context.ConfigurableApplicationContext; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.ComponentScan; -import org.springframework.context.annotation.FilterType; -import org.springframework.context.annotation.Import; -import org.springframework.data.mongodb.repository.config.EnableMongoRepositories; -import org.springframework.http.client.HttpComponentsClientHttpRequestFactory; -import org.springframework.security.config.annotation.web.configuration.EnableWebSecurity; -import org.springframework.web.client.RestTemplate; -import uk.ac.ebi.biosamples.configuration.ExclusionConfiguration; -import uk.ac.ebi.biosamples.service.EnaConfig; -import uk.ac.ebi.biosamples.service.EnaSampleToBioSampleConversionService; -import uk.ac.ebi.biosamples.service.EraProDao; -import uk.ac.ebi.biosamples.utils.PipelineUtils; - -@SpringBootApplication(exclude = DataSourceAutoConfiguration.class) -@ComponentScan( - excludeFilters = { - @ComponentScan.Filter( - type = FilterType.ASSIGNABLE_TYPE, - value = {EnaConfig.class, EraProDao.class, EnaSampleToBioSampleConversionService.class}) - }) -@Import(ExclusionConfiguration.class) -@EnableCaching -@EnableWebSecurity -@EnableMongoRepositories(basePackages = "uk.ac.ebi.biosamples.repository") -public class Application { - - public static void main(final String[] args) { - final ConfigurableApplicationContext ctx = SpringApplication.run(Application.class, args); - PipelineUtils.exitPipeline(ctx); - } - - @Bean - public RestTemplate restTemplate(final RestTemplateCustomizer restTemplateCustomizer) { - final RestTemplate restTemplate = new RestTemplate(); - restTemplateCustomizer.customize(restTemplate); - return restTemplate; - } - - @Bean - public RestTemplateCustomizer restTemplateCustomizer( - final BioSamplesProperties bioSamplesProperties, - final PipelinesProperties piplinesProperties) { - return new RestTemplateCustomizer() { - @Override - public void customize(final RestTemplate restTemplate) { - - // use a keep alive strategy to try to make it easier to maintain connections for - // reuse - final ConnectionKeepAliveStrategy keepAliveStrategy = - new ConnectionKeepAliveStrategy() { - @Override - public long getKeepAliveDuration( - final HttpResponse response, final HttpContext context) { - - // check if there is a non-standard keep alive header present - final HeaderElementIterator it = - new BasicHeaderElementIterator(response.headerIterator(HTTP.CONN_KEEP_ALIVE)); - while (it.hasNext()) { - final HeaderElement he = it.nextElement(); - final String param = he.getName(); - final String value = he.getValue(); - if (value != null && param.equalsIgnoreCase("timeout")) { - return Long.parseLong(value) * 1000; - } - } - // default to 60s if no header - return 60 * 1000; - } - }; - - // set a number of connections to use at once for multiple threads - final PoolingHttpClientConnectionManager poolingHttpClientConnectionManager = - new PoolingHttpClientConnectionManager(); - poolingHttpClientConnectionManager.setMaxTotal(piplinesProperties.getConnectionCountMax()); - poolingHttpClientConnectionManager.setDefaultMaxPerRoute( - piplinesProperties.getConnectionCountDefault()); - poolingHttpClientConnectionManager.setMaxPerRoute( - new HttpRoute(HttpHost.create(piplinesProperties.getZooma())), - piplinesProperties.getConnectionCountZooma()); - poolingHttpClientConnectionManager.setMaxPerRoute( - new HttpRoute(HttpHost.create(bioSamplesProperties.getOls())), - piplinesProperties.getConnectionCountOls()); - - // set a local cache for cacheable responses - final CacheConfig cacheConfig = - CacheConfig.custom() - .setMaxCacheEntries(1024) - .setMaxObjectSize(1024 * 1024) // max size of 1Mb - // number of entries x size of entries = 1Gb total cache size - .setSharedCache(false) // act like a browser cache not a middle-hop cache - .build(); - - // set a timeout limit - // TODO put this in application.properties - final int timeout = 60; // in seconds - final RequestConfig config = - RequestConfig.custom() - .setConnectTimeout(timeout * 1000) // time to establish the connection with the - // remote host - .setConnectionRequestTimeout( - timeout * 1000) // maximum time of inactivity between two - // data packets - .setSocketTimeout(timeout * 1000) - .build(); // time to wait for a connection from the connection - // manager/pool - - // make the actual client - final HttpClient httpClient = - CachingHttpClientBuilder.create() - .setCacheConfig(cacheConfig) - .useSystemProperties() - .setConnectionManager(poolingHttpClientConnectionManager) - .setKeepAliveStrategy(keepAliveStrategy) - .setDefaultRequestConfig(config) - .build(); - - // and wire it into the resttemplate - restTemplate.setRequestFactory(new HttpComponentsClientHttpRequestFactory(httpClient)); - } - }; - } -} diff --git a/pipelines/curami/src/main/java/uk/ac/ebi/biosamples/curation/CuramiApplicationRunner.java b/pipelines/curami/src/main/java/uk/ac/ebi/biosamples/curation/CuramiApplicationRunner.java deleted file mode 100644 index c0a5a25f80..0000000000 --- a/pipelines/curami/src/main/java/uk/ac/ebi/biosamples/curation/CuramiApplicationRunner.java +++ /dev/null @@ -1,211 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.curation; - -import java.io.*; -import java.time.Duration; -import java.time.Instant; -import java.time.LocalDate; -import java.util.*; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.concurrent.Future; -import java.util.stream.Collectors; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.boot.ApplicationArguments; -import org.springframework.boot.ApplicationRunner; -import org.springframework.core.io.ClassPathResource; -import org.springframework.hateoas.EntityModel; -import org.springframework.stereotype.Component; -import uk.ac.ebi.biosamples.PipelineFutureCallback; -import uk.ac.ebi.biosamples.PipelineResult; -import uk.ac.ebi.biosamples.PipelinesProperties; -import uk.ac.ebi.biosamples.client.BioSamplesClient; -import uk.ac.ebi.biosamples.core.model.PipelineAnalytics; -import uk.ac.ebi.biosamples.core.model.Sample; -import uk.ac.ebi.biosamples.core.model.SampleAnalytics; -import uk.ac.ebi.biosamples.core.model.filter.Filter; -import uk.ac.ebi.biosamples.model.PipelineLastRun; -import uk.ac.ebi.biosamples.model.PipelineName; -import uk.ac.ebi.biosamples.mongo.model.MongoCurationRule; -import uk.ac.ebi.biosamples.mongo.repository.MongoCurationRuleRepository; -import uk.ac.ebi.biosamples.mongo.service.AnalyticsService; -import uk.ac.ebi.biosamples.service.PipelineHelperService; -import uk.ac.ebi.biosamples.utils.PipelineUtils; -import uk.ac.ebi.biosamples.utils.thread.AdaptiveThreadPoolExecutor; -import uk.ac.ebi.biosamples.utils.thread.ThreadUtils; - -@Component -public class CuramiApplicationRunner implements ApplicationRunner { - private static final Logger LOG = LoggerFactory.getLogger(CuramiApplicationRunner.class); - private static final PipelineName PIPELINE_NAME = PipelineName.CURAMI; - - private final BioSamplesClient bioSamplesClient; - private final PipelinesProperties pipelinesProperties; - private final Map curationRules; - private final MongoCurationRuleRepository repository; - private final AnalyticsService analyticsService; - private final PipelineFutureCallback pipelineFutureCallback; - private final PipelineHelperService pipelineHelperService; - - public CuramiApplicationRunner( - final BioSamplesClient bioSamplesClient, - final PipelinesProperties pipelinesProperties, - final MongoCurationRuleRepository repository, - final AnalyticsService analyticsService, - PipelineHelperService pipelineHelperService) { - this.bioSamplesClient = bioSamplesClient; - this.pipelinesProperties = pipelinesProperties; - this.repository = repository; - this.analyticsService = analyticsService; - this.pipelineHelperService = pipelineHelperService; - curationRules = new HashMap<>(); - pipelineFutureCallback = new PipelineFutureCallback(); - } - - @Override - public void run(final ApplicationArguments args) throws Exception { - PipelineLastRun pipelineLastRun = pipelineHelperService.getLastRunDate(PIPELINE_NAME); - LocalDate lastRunDate = pipelineLastRun.getLastRunDate(); - LocalDate startDate = LocalDate.now(); - final Collection filters = PipelineUtils.getLastRunFilters(lastRunDate, startDate); - final Instant startTime = Instant.now(); - LOG.info("Pipeline started at {}", startTime); - LOG.info("Processing samples from {}", lastRunDate); - long sampleCount = 0; - final SampleAnalytics sampleAnalytics = new SampleAnalytics(); - - loadCurationRulesFromFileToDb(getFileNameFromArgs(args)); - curationRules.putAll(loadCurationRulesToMemory()); - LOG.info("Found {} curation rules", curationRules.size()); - - try (final AdaptiveThreadPoolExecutor executorService = - AdaptiveThreadPoolExecutor.create( - 100, - 10000, - true, - pipelinesProperties.getThreadCount(), - pipelinesProperties.getThreadCountMax())) { - - final Map> futures = new HashMap<>(); - for (final EntityModel sampleResource : - bioSamplesClient.fetchSampleResourceAll("", filters)) { - LOG.trace("Handling {}", sampleResource); - final Sample sample = sampleResource.getContent(); - Objects.requireNonNull(sample); - collectSampleTypes(sample, sampleAnalytics); - - final Callable task = - new SampleCuramiCallable( - bioSamplesClient, sample, pipelinesProperties.getProxyWebinId(), curationRules); - futures.put(sample.getAccession(), executorService.submit(task)); - - if (++sampleCount % 5000 == 0) { - LOG.info("Scheduled sample count {}", sampleCount); - } - } - - LOG.info("Waiting for all scheduled tasks to finish"); - ThreadUtils.checkAndCallbackFutures(futures, 0, pipelineFutureCallback); - pipelineHelperService.updateLastRunDate(pipelineLastRun, startDate); - } catch (final Exception e) { - LOG.error("Pipeline failed to finish successfully", e); - throw e; - } finally { - final Instant endTime = Instant.now(); - LOG.info("Total samples processed {}", sampleCount); - LOG.info("Total curation objects added {}", pipelineFutureCallback.getTotalCount()); - LOG.info("Pipeline finished at {}", endTime); - LOG.info( - "Pipeline total running time {} seconds", - Duration.between(startTime, endTime).getSeconds()); - - final PipelineAnalytics pipelineAnalytics = - new PipelineAnalytics( - "curami", startTime, endTime, sampleCount, pipelineFutureCallback.getTotalCount()); - pipelineAnalytics.setDateRange(filters); - sampleAnalytics.setDateRange(filters); - sampleAnalytics.setProcessedRecords(sampleCount); - analyticsService.persistSampleAnalytics(startTime, sampleAnalytics); - analyticsService.persistPipelineAnalytics(pipelineAnalytics); - } - } - - private Map loadCurationRulesToMemory() { - final List mongoCurationRules = repository.findAll(); - return mongoCurationRules.stream() - .collect( - Collectors.toMap( - MongoCurationRule::getAttributePre, MongoCurationRule::getAttributePost)); - } - - private void loadCurationRulesFromFileToDb(final String filePath) { - final Reader reader; - // read it from given filepath, else read it from classpath - try { - if (filePath == null || filePath.isEmpty()) { - final ClassPathResource resource = new ClassPathResource("curation_rules.csv"); - reader = new InputStreamReader(resource.getInputStream()); - } else { - reader = new FileReader(filePath); - } - } catch (final IOException e) { - LOG.error("Could not find specified file in {} or classpath", filePath, e); - return; - } - - try (final BufferedReader bf = new BufferedReader(reader)) { - String line = bf.readLine(); - LOG.info("Reading file with headers: {}", line); - while ((line = bf.readLine()) != null) { - final String[] curationRule = line.split(","); - final MongoCurationRule mongoCurationRule = - MongoCurationRule.build(curationRule[0].trim(), curationRule[1].trim()); - repository.save(mongoCurationRule); - } - } catch (final IOException e) { - LOG.error("Could not find file in {} or classpath", filePath, e); - } - } - - private String getFileNameFromArgs(final ApplicationArguments args) { - String curationRulesFile = null; - if (args.getOptionNames().contains("file")) { - curationRulesFile = args.getOptionValues("file").get(0); - } - - return curationRulesFile; - } - - private String handleFailedSamples() { - final ConcurrentLinkedQueue failedQueue = SampleCuramiCallable.failedQueue; - String failures = null; - if (!failedQueue.isEmpty()) { - final List fails = new LinkedList<>(); - while (failedQueue.peek() != null) { - fails.add(failedQueue.poll()); - } - failures = "Failed files (" + fails.size() + ") " + String.join(" , ", fails); - LOG.warn(failures); - } else { - LOG.info("Pipeline completed without any failures"); - } - return failures; - } - - private void collectSampleTypes(final Sample sample, final SampleAnalytics sampleAnalytics) { - final String accessionPrefix = sample.getAccession().substring(0, 4); - final String submittedChannel = sample.getSubmittedVia().name(); - sampleAnalytics.addToCenter(accessionPrefix); - sampleAnalytics.addToChannel(submittedChannel); - } -} diff --git a/pipelines/curami/src/main/java/uk/ac/ebi/biosamples/curation/SampleCuramiCallable.java b/pipelines/curami/src/main/java/uk/ac/ebi/biosamples/curation/SampleCuramiCallable.java deleted file mode 100644 index 003bf74933..0000000000 --- a/pipelines/curami/src/main/java/uk/ac/ebi/biosamples/curation/SampleCuramiCallable.java +++ /dev/null @@ -1,92 +0,0 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.curation; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.SortedSet; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentLinkedQueue; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import uk.ac.ebi.biosamples.PipelineResult; -import uk.ac.ebi.biosamples.client.BioSamplesClient; -import uk.ac.ebi.biosamples.core.model.Attribute; -import uk.ac.ebi.biosamples.core.model.Curation; -import uk.ac.ebi.biosamples.core.model.Sample; - -public class SampleCuramiCallable implements Callable { - private static final Logger LOG = LoggerFactory.getLogger(SampleCuramiCallable.class); - static final ConcurrentLinkedQueue failedQueue = new ConcurrentLinkedQueue<>(); - private final Sample sample; - private final BioSamplesClient bioSamplesClient; - private final String webinId; - private final Map curationRules; - - SampleCuramiCallable( - final BioSamplesClient bioSamplesClient, - final Sample sample, - final String webinId, - final Map curationRules) { - this.bioSamplesClient = bioSamplesClient; - this.sample = sample; - this.webinId = webinId; - this.curationRules = curationRules; - } - - @Override - public PipelineResult call() { - int appliedCurations = 0; - boolean success = true; - try { - final List curations = getRuleBasedCurations(sample); - if (!curations.isEmpty()) { - LOG.info("{} curations added for sample {}", curations.size(), sample.getAccession()); - } - appliedCurations = curations.size(); - } catch (final Exception e) { - success = false; - failedQueue.add(sample.getAccession()); - LOG.error("Failed to add curation on sample: " + sample.getAccession(), e); - } - return new PipelineResult(sample.getAccession(), appliedCurations, success); - } - - private List getRuleBasedCurations(final Sample sample) { - final SortedSet attributes = sample.getAttributes(); - final List curations = new ArrayList<>(); - for (final Attribute a : attributes) { - // @here ignoring empty values is wrong. This has done as a workaround as pipeline fails - // if we - // post empty - // values to curation endpoint. Curation endpoint translate empty values as null values - // and - // throw an exception. - if (curationRules.containsKey(a.getType()) && !a.getValue().isEmpty()) { - final Curation curation = - Curation.build( - Attribute.build(a.getType(), a.getValue(), a.getTag(), a.getIri(), a.getUnit()), - Attribute.build( - curationRules.get(a.getType()), - a.getValue(), - a.getTag(), - a.getIri(), - a.getUnit())); - LOG.info("New curation found {}", curation); - bioSamplesClient.persistCuration(sample.getAccession(), curation, webinId); - curations.add(curation); - } - } - - return curations; - } -} diff --git a/pipelines/curami/src/main/resources/curation_rules.csv b/pipelines/curami/src/main/resources/curation_rules.csv deleted file mode 100644 index 101eb56271..0000000000 --- a/pipelines/curami/src/main/resources/curation_rules.csv +++ /dev/null @@ -1,23 +0,0 @@ -ATTRIBUTE_PRE,ATTRIBUTE_POST -patient age at primary dignosis years,patient age at primary diagnosis years -originial tumor type of initial cell,original tumor type of initial cell -enviromental conditions,environmental conditions -alcohol types beercider,alcohol types beer cider -overall suvival months,overall survival months -days of differentation,days of differentiation -cultivation temperture,cultivation temperature -overallsurvival months,overall survival months -overall surviaval delay,overall survival delay -pathological diagonosis,pathological diagnosis -antibody manufactuer,antibody manufacturer -differentation grade,differentiation grade -clinicalinformation,clinical information -survival timemonths,survival time months -developemental stage,developmental stage -colection date,collection date -survival.status,survival status -internvention,intervention -cutured with,cultured with -concetration,concentration -diseasestatus,disease status -treament time,treatment time \ No newline at end of file diff --git a/pipelines/pom.xml b/pipelines/pom.xml index 211330b37c..9ac9ff31b7 100644 --- a/pipelines/pom.xml +++ b/pipelines/pom.xml @@ -12,6 +12,12 @@ 5.3.15-SNAPSHOT + + + true + true + + @@ -29,8 +35,6 @@ analytics curation zooma - copydown - curami sample-transformation-dtol ncbi sample-release diff --git a/pipelines/reindex/src/main/java/uk/ac/ebi/biosamples/ReindexRunner.java b/pipelines/reindex/src/main/java/uk/ac/ebi/biosamples/ReindexRunner.java index 808db642a5..d37ad47402 100644 --- a/pipelines/reindex/src/main/java/uk/ac/ebi/biosamples/ReindexRunner.java +++ b/pipelines/reindex/src/main/java/uk/ac/ebi/biosamples/ReindexRunner.java @@ -38,9 +38,9 @@ * This runner will get a list of accessions from mongo directly, query the API to get the latest * information, and then send that information to Rabbit for the Solr Agent to reindex it into Solr. * - *

Mongo is queried instead of the API because the API is driven by Solr, and if Solr is - * incorrect (which it will be because why else would you run this) then it won't get the right - * information from the API. + *

Mongo is queried instead of the API. The API is driven by Solr, and if Solr is incorrect + * (which it will be because why else would you run this), then it won't get the right information + * from the API. * * @author faulcon */ @@ -66,7 +66,8 @@ public ReindexRunner( @Override public void run(final ApplicationArguments args) throws Exception { - final Collection filters = PipelineUtils.getDateFilters(args, "update"); + final Collection filters = + PipelineUtils.getDateFilters(args, PipelineUtils.DateType.update); final Map> futures = new HashMap<>(); ExecutorService executor = null; diff --git a/pipelines/sample-post-release-action/src/main/java/uk/ac/ebi/biosamples/Application.java b/pipelines/sample-post-release-action/src/main/java/uk/ac/ebi/biosamples/Application.java index 83dd6af292..684733a1f2 100644 --- a/pipelines/sample-post-release-action/src/main/java/uk/ac/ebi/biosamples/Application.java +++ b/pipelines/sample-post-release-action/src/main/java/uk/ac/ebi/biosamples/Application.java @@ -23,8 +23,11 @@ import org.apache.http.message.BasicHeaderElementIterator; import org.apache.http.protocol.HTTP; import org.springframework.boot.SpringApplication; +import org.springframework.boot.WebApplicationType; import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration; +import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration; +import org.springframework.boot.autoconfigure.security.servlet.UserDetailsServiceAutoConfiguration; import org.springframework.boot.web.client.RestTemplateCustomizer; import org.springframework.cache.annotation.EnableCaching; import org.springframework.context.ConfigurableApplicationContext; @@ -35,23 +38,38 @@ import org.springframework.http.client.HttpComponentsClientHttpRequestFactory; import org.springframework.web.client.RestTemplate; import uk.ac.ebi.biosamples.configuration.ExclusionConfiguration; +import uk.ac.ebi.biosamples.security.service.BioSamplesWebSecurityConfig; import uk.ac.ebi.biosamples.service.EnaConfig; import uk.ac.ebi.biosamples.service.EnaSampleToBioSampleConversionService; import uk.ac.ebi.biosamples.service.EraProDao; +import uk.ac.ebi.biosamples.service.PipelineHelperService; import uk.ac.ebi.biosamples.utils.PipelineUtils; -@SpringBootApplication(exclude = DataSourceAutoConfiguration.class) +@SpringBootApplication( + exclude = { + DataSourceAutoConfiguration.class, + SecurityAutoConfiguration.class, + UserDetailsServiceAutoConfiguration.class + }) @ComponentScan( excludeFilters = { @ComponentScan.Filter( type = FilterType.ASSIGNABLE_TYPE, - value = {EnaConfig.class, EraProDao.class, EnaSampleToBioSampleConversionService.class}) + value = { + EnaConfig.class, + EraProDao.class, + EnaSampleToBioSampleConversionService.class, + PipelineHelperService.class, + BioSamplesWebSecurityConfig.class + }) }) @Import(ExclusionConfiguration.class) @EnableCaching public class Application { public static void main(final String[] args) { - final ConfigurableApplicationContext ctx = SpringApplication.run(Application.class, args); + final SpringApplication springApplication = new SpringApplication(Application.class); + springApplication.setWebApplicationType(WebApplicationType.NONE); + final ConfigurableApplicationContext ctx = springApplication.run(args); PipelineUtils.exitPipeline(ctx); } diff --git a/pipelines/sample-post-release-action/src/main/java/uk/ac/ebi/biosamples/postrelease/SamplePostReleaseActionApplicationRunner.java b/pipelines/sample-post-release-action/src/main/java/uk/ac/ebi/biosamples/postrelease/SamplePostReleaseActionApplicationRunner.java index 1dc12bfdb1..0dd5699738 100644 --- a/pipelines/sample-post-release-action/src/main/java/uk/ac/ebi/biosamples/postrelease/SamplePostReleaseActionApplicationRunner.java +++ b/pipelines/sample-post-release-action/src/main/java/uk/ac/ebi/biosamples/postrelease/SamplePostReleaseActionApplicationRunner.java @@ -12,8 +12,9 @@ import java.time.Duration; import java.time.Instant; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; import java.util.*; -import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.Future; import org.slf4j.Logger; @@ -25,6 +26,8 @@ import uk.ac.ebi.biosamples.PipelinesProperties; import uk.ac.ebi.biosamples.client.BioSamplesClient; import uk.ac.ebi.biosamples.core.model.Sample; +import uk.ac.ebi.biosamples.core.model.SampleStatus; +import uk.ac.ebi.biosamples.core.model.filter.DateRangeFilter; import uk.ac.ebi.biosamples.core.model.filter.Filter; import uk.ac.ebi.biosamples.utils.PipelineUtils; import uk.ac.ebi.biosamples.utils.thread.AdaptiveThreadPoolExecutor; @@ -46,7 +49,21 @@ public SamplePostReleaseActionApplicationRunner( @Override public void run(final ApplicationArguments args) throws Exception { final Instant startTime = Instant.now(); - final Collection filters = PipelineUtils.getDateFilters(args, "release"); + final Collection filters = + PipelineUtils.getDateFilters(args, PipelineUtils.DateType.release); + final DateRangeFilter.DateRange dateRange = + filters.stream() + .filter(DateRangeFilter.class::isInstance) + .map(DateRangeFilter.class::cast) + .map(DateRangeFilter::getContent) + .flatMap(Optional::stream) + .findFirst() + .orElseThrow(() -> new IllegalStateException("Date range filter not found")); + final String fromDate = + DateTimeFormatter.ISO_LOCAL_DATE.format(dateRange.getFrom().atZone(ZoneOffset.UTC)); + final String effectiveUntilDate = + DateTimeFormatter.ISO_LOCAL_DATE.format(dateRange.getUntil().atZone(ZoneOffset.UTC)); + final List accessions = new ArrayList<>(); long sampleCount = 0; try (final AdaptiveThreadPoolExecutor executorService = @@ -66,17 +83,22 @@ public void run(final ApplicationArguments args) throws Exception { throw new RuntimeException("Sample should not be null"); } - LOG.info("Handling {}", sample.getAccession()); + if (sample.getRelease().isBefore(Instant.now()) + && sample.getStatus() == SampleStatus.PRIVATE) { + LOG.info("Handling {}", sample.getAccession()); - final Callable task = - new SamplePostReleaseActionCallable(bioSamplesWebinClient, sample); - sampleCount++; + accessions.add(sample.getAccession()); + + /*final Callable task = + new SamplePostReleaseActionCallable(bioSamplesWebinClient, sample);*/ + sampleCount++; + } if (sampleCount % 10000 == 0) { LOG.info("{} scheduled for processing", sampleCount); } - futures.put(sample.getAccession(), executorService.submit(task)); + // futures.put(sample.getAccession(), executorService.submit(task)); } LOG.info("waiting for futures"); @@ -87,10 +109,16 @@ public void run(final ApplicationArguments args) throws Exception { throw e; } finally { + accessions.forEach(System.out::println); final Instant endTime = Instant.now(); final String failures; LOG.info("Total samples processed {}", sampleCount); + LOG.info( + "Samples received for effective release date range from {} to {}: {}", + fromDate, + effectiveUntilDate, + sampleCount); LOG.info("Pipeline finished at {}", endTime); LOG.info( "Pipeline total running time {} seconds", diff --git a/pipelines/sample-transformation-dtol/src/main/java/uk/ac/ebi/biosamples/curation/TransformationApplicationRunner.java b/pipelines/sample-transformation-dtol/src/main/java/uk/ac/ebi/biosamples/curation/TransformationApplicationRunner.java index a1d27b657d..30a342eea1 100644 --- a/pipelines/sample-transformation-dtol/src/main/java/uk/ac/ebi/biosamples/curation/TransformationApplicationRunner.java +++ b/pipelines/sample-transformation-dtol/src/main/java/uk/ac/ebi/biosamples/curation/TransformationApplicationRunner.java @@ -49,7 +49,8 @@ public TransformationApplicationRunner( @Override public void run(final ApplicationArguments args) throws Exception { - final Collection filters = PipelineUtils.getDateFilters(args, "update"); + final Collection filters = + PipelineUtils.getDateFilters(args, PipelineUtils.DateType.update); final Instant startTime = Instant.now(); LOG.info("Pipeline started at {}", startTime); long sampleCount = 0; diff --git a/pipelines/zooma/src/main/java/uk/ac/ebi/biosamples/zooma/ZoomaApplicationRunner.java b/pipelines/zooma/src/main/java/uk/ac/ebi/biosamples/zooma/ZoomaApplicationRunner.java index 03718a3ff1..3d86c09c12 100644 --- a/pipelines/zooma/src/main/java/uk/ac/ebi/biosamples/zooma/ZoomaApplicationRunner.java +++ b/pipelines/zooma/src/main/java/uk/ac/ebi/biosamples/zooma/ZoomaApplicationRunner.java @@ -12,6 +12,7 @@ import java.time.Duration; import java.time.Instant; +import java.time.LocalDate; import java.util.*; import java.util.concurrent.Callable; import java.util.concurrent.Future; @@ -29,7 +30,10 @@ import uk.ac.ebi.biosamples.core.model.Sample; import uk.ac.ebi.biosamples.core.model.filter.Filter; import uk.ac.ebi.biosamples.core.service.CurationApplicationService; +import uk.ac.ebi.biosamples.model.PipelineLastRun; +import uk.ac.ebi.biosamples.model.PipelineName; import uk.ac.ebi.biosamples.mongo.service.AnalyticsService; +import uk.ac.ebi.biosamples.service.PipelineHelperService; import uk.ac.ebi.biosamples.utils.PipelineUtils; import uk.ac.ebi.biosamples.utils.thread.AdaptiveThreadPoolExecutor; import uk.ac.ebi.biosamples.utils.thread.ThreadUtils; @@ -37,6 +41,7 @@ @Component public class ZoomaApplicationRunner implements ApplicationRunner { private static final Logger LOG = LoggerFactory.getLogger(ZoomaApplicationRunner.class); + private static final PipelineName PIPELINE_NAME = PipelineName.ZOOMA; private final BioSamplesClient bioSamplesClient; private final PipelinesProperties pipelinesProperties; @@ -44,25 +49,33 @@ public class ZoomaApplicationRunner implements ApplicationRunner { private final CurationApplicationService curationApplicationService; private final AnalyticsService analyticsService; private final PipelineFutureCallback pipelineFutureCallback; + private final PipelineHelperService pipelineHelperService; public ZoomaApplicationRunner( final BioSamplesClient bioSamplesClient, final PipelinesProperties pipelinesProperties, final ZoomaProcessor zoomaProcessor, final CurationApplicationService curationApplicationService, - final AnalyticsService analyticsService) { + final AnalyticsService analyticsService, + final PipelineHelperService pipelineHelperService) { this.bioSamplesClient = bioSamplesClient; this.pipelinesProperties = pipelinesProperties; this.zoomaProcessor = zoomaProcessor; this.curationApplicationService = curationApplicationService; this.analyticsService = analyticsService; - pipelineFutureCallback = new PipelineFutureCallback(); + this.pipelineHelperService = pipelineHelperService; + this.pipelineFutureCallback = new PipelineFutureCallback(); } @Override public void run(final ApplicationArguments args) { + PipelineLastRun pipelineLastRun = pipelineHelperService.getLastRunDate(PIPELINE_NAME); + LocalDate lastRunDate = pipelineLastRun.getLastRunDate(); + LocalDate startDate = LocalDate.now(); final Instant startTime = Instant.now(); - final Collection filters = PipelineUtils.getDateFilters(args, "update"); + final Collection filters = PipelineUtils.getLastRunFilters(lastRunDate, startDate); + LOG.info("Pipeline started at {}", startTime); + LOG.info("Processing samples from {}", lastRunDate); long sampleCount = 0; try (final AdaptiveThreadPoolExecutor executorService = diff --git a/webapps/core-v2/src/main/java/uk/ac/ebi/biosamples/service/SampleService.java b/webapps/core-v2/src/main/java/uk/ac/ebi/biosamples/service/SampleService.java index 1f489704b3..5894dd4e6d 100644 --- a/webapps/core-v2/src/main/java/uk/ac/ebi/biosamples/service/SampleService.java +++ b/webapps/core-v2/src/main/java/uk/ac/ebi/biosamples/service/SampleService.java @@ -29,13 +29,13 @@ import uk.ac.ebi.biosamples.core.model.Attribute; import uk.ac.ebi.biosamples.core.model.Relationship; import uk.ac.ebi.biosamples.core.model.Sample; +import uk.ac.ebi.biosamples.core.model.SampleStatus; import uk.ac.ebi.biosamples.core.model.SubmittedViaType; import uk.ac.ebi.biosamples.core.model.structured.AbstractData; import uk.ac.ebi.biosamples.core.service.SampleValidator; +import uk.ac.ebi.biosamples.core.validation.SamplePersistencePolicy; +import uk.ac.ebi.biosamples.core.validation.SampleStatusTransitionPolicy; import uk.ac.ebi.biosamples.exception.GlobalExceptions; -import uk.ac.ebi.biosamples.mongo.model.MongoSample; -import uk.ac.ebi.biosamples.mongo.model.MongoSampleMessage; -import uk.ac.ebi.biosamples.mongo.repository.MongoSampleMessageRepository; import uk.ac.ebi.biosamples.mongo.repository.MongoSampleRepository; import uk.ac.ebi.biosamples.mongo.service.*; import uk.ac.ebi.biosamples.security.service.BioSamplesCrossSourceIngestAccessControlService; @@ -51,10 +51,8 @@ public class SampleService { private static final Logger log = LoggerFactory.getLogger(SampleService.class); private final MongoAccessionService mongoAccessionService; private final MongoSampleRepository mongoSampleRepository; - private final MongoSampleMessageRepository mongoSampleMessageRepository; private final MongoSampleToSampleConverter mongoSampleToSampleConverter; private final SampleToMongoSampleConverter sampleToMongoSampleConverter; - private final MongoRelationshipToRelationshipConverter mongoRelationshipToRelationshipConverter; private final SampleValidator sampleValidator; private final SampleReadService sampleReadService; private final MessagingService messagingService; @@ -66,10 +64,8 @@ public class SampleService { public SampleService( @Qualifier("SampleAccessionService") final MongoAccessionService mongoAccessionService, final MongoSampleRepository mongoSampleRepository, - final MongoSampleMessageRepository mongoSampleMessageRepository, final MongoSampleToSampleConverter mongoSampleToSampleConverter, final SampleToMongoSampleConverter sampleToMongoSampleConverter, - final MongoRelationshipToRelationshipConverter mongoRelationshipToRelationshipConverter, final SampleValidator sampleValidator, final SampleReadService sampleReadService, final MessagingService messagingService, @@ -78,10 +74,8 @@ public SampleService( bioSamplesCrossSourceIngestAccessControlService) { this.mongoAccessionService = mongoAccessionService; this.mongoSampleRepository = mongoSampleRepository; - this.mongoSampleMessageRepository = mongoSampleMessageRepository; this.mongoSampleToSampleConverter = mongoSampleToSampleConverter; this.sampleToMongoSampleConverter = sampleToMongoSampleConverter; - this.mongoRelationshipToRelationshipConverter = mongoRelationshipToRelationshipConverter; this.sampleValidator = sampleValidator; this.sampleReadService = sampleReadService; this.messagingService = messagingService; @@ -100,88 +94,7 @@ public Optional fetch(final String accession, final boolean applyCuratio */ private boolean isStoredSampleEmpty( final Sample newSample, final boolean isWebinSuperUser, final Sample oldSample) { - if (isWebinSuperUser) { - if (newSample.getSubmittedVia() == SubmittedViaType.FILE_UPLOADER) { - // file uploader submissions are done via superuser, but they are non-imported samples, - // needs to be handled safely - if (newSample.hasAccession()) { - return isStoredSampleEmpty(oldSample); - } - - return true; - } else { - // otherwise it is an ENA pipeline import, cannot be empty - return false; - } - } - - if (newSample.hasAccession()) { - return isStoredSampleEmpty(oldSample); - } - - return true; - } - - /** Returns true if the stored sample has no metadata. */ - private boolean isStoredSampleEmpty(final Sample oldSample) { - return (oldSample.getTaxId() == null || oldSample.getTaxId() <= 0) - && oldSample.getAttributes().isEmpty() - && oldSample.getRelationships().isEmpty() - && oldSample.getPublications().isEmpty() - && oldSample.getContacts().isEmpty() - && oldSample.getOrganizations().isEmpty() - && oldSample.getData().isEmpty() - && oldSample.getExternalReferences().isEmpty() - && oldSample.getStructuredData().isEmpty(); - } - - // Because the fetch caches the sample, if an updated version is stored, we need to make - // sure that any cached version is removed. - // Note, pages of samples will not be cache busted, only single-accession sample retrieval - // @CacheEvict(cacheNames=WebappProperties.fetchUsing, key="#result.accession") - - /* - Called by V1 endpoints to persist samples - */ - public Sample persistSample( - Sample newSample, final Sample oldSample, final boolean isWebinSuperUser) { - final var errors = sampleValidator.validate(newSample); - - if (!errors.isEmpty()) { - log.error("Sample validation has failed : {}", errors); - - throw new GlobalExceptions.SampleMandatoryFieldsMissingException(String.join("|", errors)); - } - - if (newSample.hasAccession()) { - if (oldSample != null) { - newSample = updateFromCurrent(newSample, oldSample, isWebinSuperUser); - } else { - newSample = updateWhenNoneExists(newSample); - } - - var mongoSample = sampleToMongoSampleConverter.convert(newSample); - mongoSample = mongoSampleRepository.save(mongoSample); - - if (isTaxIdUpdated(oldSample, newSample)) { - mongoSampleMessageRepository.save( - new MongoSampleMessage(newSample.getAccession(), Instant.now(), newSample.getTaxId())); - } - - newSample = mongoSampleToSampleConverter.apply(mongoSample); - sendMessageToRabbitForIndexingToSolr( - newSample.getAccession(), getExistingRelationshipTargetsForIndexingInSolr(oldSample)); - } else { - newSample = createNew(newSample); - } - - // fetch returns sample with curations applied - final var sampleOptional = fetch(newSample.getAccession(), true); - - return sampleOptional.orElseThrow( - () -> - new RuntimeException( - "Failed to create newSample. Please contact the BioSamples Helpdesk at biosamples@ebi.ac.uk")); + return SamplePersistencePolicy.isStoredSampleEmpty(newSample, isWebinSuperUser, oldSample); } private Sample updateWhenNoneExists(Sample newSample) { @@ -222,23 +135,6 @@ private Sample updateWhenNoneExists(Sample newSample) { return newSample; } - private Sample updateFromCurrent( - Sample newSample, final Sample oldSample, final boolean isWebinSuperUser) { - final var savedSampleEmpty = isStoredSampleEmpty(newSample, isWebinSuperUser, oldSample); - - if (savedSampleEmpty) { - newSample = Sample.Builder.fromSample(newSample).withSubmitted(Instant.now()).build(); - } - - final var existingRelationships = - getExistingRelationshipTargetsForIndexingInSolr( - newSample.getAccession(), - Objects.requireNonNull(sampleToMongoSampleConverter.convert(oldSample))); - - return compareWithExistingAndUpdateSample( - newSample, oldSample, existingRelationships, savedSampleEmpty, isWebinSuperUser); - } - private Sample createNew(Sample newSample) { final var noSraAccession = newSample.getAttributes().stream() @@ -247,44 +143,12 @@ private Sample createNew(Sample newSample) { if (!noSraAccession) { newSample = validateAndPromoteSRAAccessionAttributeToField(newSample); } - newSample = mongoAccessionService.generateAccession(newSample, noSraAccession); sendMessageToRabbitForIndexingToSolr(newSample.getAccession(), Collections.emptyList()); return newSample; } - private boolean isTaxIdUpdated(final Sample oldSample, final Sample sample) { - return oldSample != null - && oldSample.getTaxId() != null - && !oldSample.getTaxId().equals(sample.getTaxId()); - } - - private List getExistingRelationshipTargetsForIndexingInSolr(final Sample oldSample) { - final var existingRelationshipTargets = new ArrayList(); - - if (oldSample != null) { - final var existingRelationships = - getExistingRelationshipTargetsForIndexingInSolr( - oldSample.getAccession(), - Objects.requireNonNull(sampleToMongoSampleConverter.convert(oldSample))); - - existingRelationshipTargets.addAll( - existingRelationships.stream() - .map( - relationship -> { - if (relationship.getSource().equals(oldSample.getAccession())) { - return relationship.getTarget(); - } - - return null; - }) - .toList()); - } - - return existingRelationshipTargets; - } - /* Called by V2 endpoints to persist samples */ @@ -320,6 +184,7 @@ public Sample persistSampleV2( newSample = updateWhenNoneExists(newSample); } + validatePublicStatusTransition(oldSample, newSample); var mongoSample = sampleToMongoSampleConverter.convert(newSample); mongoSample = mongoSampleRepository.save(mongoSample); @@ -354,9 +219,13 @@ public Sample accessionSample(Sample newSample) { throw new GlobalExceptions.SampleMandatoryFieldsMissingException(String.join("|", errors)); } - if (newSample - .getWebinSubmissionAccountId() - .equalsIgnoreCase(bioSamplesProperties.getBiosamplesClientWebinUsername())) { + final var webinSubmissionAccountId = newSample.getWebinSubmissionAccountId(); + + validatePublicStatusTransition(null, newSample); + + if (webinSubmissionAccountId != null + && webinSubmissionAccountId.equalsIgnoreCase( + bioSamplesProperties.getBiosamplesClientWebinUsername())) { // accessioning from ENA, sample name is the SRA accession here final var sraAccessionAttribute = Attribute.build(SRA_ACCESSION, newSample.getName()); @@ -384,20 +253,6 @@ public boolean isNotExistingAccession(final String accession) { } } - private List getExistingRelationshipTargetsForIndexingInSolr( - final String accession, final MongoSample mongoOldSample) { - final var oldRelationshipTargets = new ArrayList(); - - for (final var mongoRelationship : mongoOldSample.getRelationships()) { - if (mongoRelationship.getSource().equals(accession)) { - oldRelationshipTargets.add( - mongoRelationshipToRelationshipConverter.convert(mongoRelationship)); - } - } - - return oldRelationshipTargets; - } - private Sample compareWithExistingAndUpdateSample( Sample newSample, final Sample oldSample, @@ -411,6 +266,7 @@ private Sample compareWithExistingAndUpdateSample( // uploads though handleRelationships(newSample, existingRelationships); handleSRAAccession(newSample, oldSample, isWebinSuperUser); + newSample = handleSampleStatus(newSample, oldSample); newSample = validateAndPromoteSRAAccessionAttributeToField(newSample); if (newSample.getData().isEmpty()) { @@ -444,6 +300,23 @@ private Sample compareWithExistingAndUpdateSample( } } + private Sample handleSampleStatus(final Sample newSample, final Sample oldSample) { + if (newSample.getRelease() != null && !newSample.getRelease().isAfter(Instant.now())) { + final var oldSampleStatus = oldSample.getStatus(); + + if (oldSampleStatus == null || oldSampleStatus == SampleStatus.PRIVATE) { + return Sample.Builder.fromSample(newSample).withStatus(SampleStatus.PUBLIC).build(); + } + } + + return newSample; + } + + private void validatePublicStatusTransition(final Sample oldSample, final Sample newSample) { + final SampleStatus oldStatus = oldSample != null ? oldSample.getStatus() : null; + SampleStatusTransitionPolicy.validatePublicTransition(oldStatus, newSample.getStatus()); + } + private Sample validateAndPromoteSRAAccessionAttributeToField(final Sample newSample) { // Retrieve SRA accession attribute from new sample final var newSampleSraAccessionOptional = diff --git a/webapps/core/src/main/java/uk/ac/ebi/biosamples/controller/SamplesRestController.java b/webapps/core/src/main/java/uk/ac/ebi/biosamples/controller/SamplesRestController.java index e35fd0a99a..436cee763a 100644 --- a/webapps/core/src/main/java/uk/ac/ebi/biosamples/controller/SamplesRestController.java +++ b/webapps/core/src/main/java/uk/ac/ebi/biosamples/controller/SamplesRestController.java @@ -423,8 +423,7 @@ public ResponseEntity validateSample(@RequestBody final Sample sample) { path = "/accession", consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE) - public ResponseEntity> accession( - @RequestBody Sample sample, @RequestHeader(name = "Authorization") final String token) { + public ResponseEntity> accession(@RequestBody Sample sample) { final Authentication loggedInUser = SecurityContextHolder.getContext().getAuthentication(); final String principle = sampleService.getPrinciple(loggedInUser); diff --git a/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/RecommendationService.java b/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/RecommendationService.java index 7cdebdbbdf..617ac2194e 100644 --- a/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/RecommendationService.java +++ b/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/RecommendationService.java @@ -1,150 +1,150 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.service; - -import java.util.Optional; -import java.util.SortedSet; -import java.util.TreeSet; -import java.util.stream.Collectors; -import javax.annotation.PostConstruct; -import org.springframework.stereotype.Service; -import uk.ac.ebi.biosamples.core.model.Attribute; -import uk.ac.ebi.biosamples.core.model.Sample; -import uk.ac.ebi.biosamples.curami.model.AttributeRecommendation; -import uk.ac.ebi.biosamples.curami.model.CuramiRecommendation; -import uk.ac.ebi.biosamples.curami.service.CuramiUtils; -import uk.ac.ebi.biosamples.curami.service.DataLoader; - -@Service -public class RecommendationService { - private final DataLoader dataLoader; - - public RecommendationService() { - dataLoader = new DataLoader(); - } - - @PostConstruct - private void init() { - dataLoader.loadDataFromClassPathResource(); - } - - public CuramiRecommendation getRecommendations(final Sample sample) { - final SortedSet attributes = - new TreeSet<>( - sample.getAttributes().stream().map(Attribute::getType).collect(Collectors.toList())); - - final SortedSet goodAttributes = new TreeSet<>(); - final SortedSet badAttributes = new TreeSet<>(); - final SortedSet missingAttributes = new TreeSet<>(); - - for (final String attribute : attributes) { - if (dataLoader.getPopularAttributes().contains(attribute)) { - goodAttributes.add(attribute); - continue; - } else if (dataLoader.getCurations().containsKey(attribute)) { - badAttributes.add( - new AttributeRecommendation.Builder() - .withAttribute(attribute) - .withRecommendation(dataLoader.getCurations().get(attribute)) - .build()); - continue; - } - - final String normalisedAttribute = - CuramiUtils.normaliseAttribute(attribute, dataLoader.getAbbreviations()); - if (dataLoader.getCurations().containsKey(normalisedAttribute)) { - badAttributes.add( - new AttributeRecommendation.Builder() - .withAttribute(attribute) - .withRecommendation(dataLoader.getCurations().get(normalisedAttribute)) - .build()); - continue; - } else if (dataLoader.getPopularAttributes().contains(normalisedAttribute)) { - badAttributes.add( - new AttributeRecommendation.Builder() - .withAttribute(attribute) - .withRecommendation(normalisedAttribute) - .build()); - continue; - } - - final Optional similarAttribute = - CuramiUtils.getMostSimilarAttribute(attribute, dataLoader.getPopularAttributes()); - if (similarAttribute.isPresent()) { - badAttributes.add( - new AttributeRecommendation.Builder() - .withAttribute(attribute) - .withRecommendation(similarAttribute.get()) - .build()); - } else { - missingAttributes.add(attribute); - } - } - - // have at least 5 known attributes to score 50 (4 similar attributes to score 20) - final int attributeQuality = - Math.min(50 * goodAttributes.size() / 5 + Math.min(5 * badAttributes.size(), 20), 50); - final int quality = getSampleQualityScore(sample, attributeQuality); - - return new CuramiRecommendation.Builder() - .withQuality(quality) - .withGoodAttributes(goodAttributes) - .withBadAttributes(badAttributes) - .withMissingAttributes(missingAttributes) - .build(); - } - - public Sample getRecommendedSample( - final Sample sample, final CuramiRecommendation recommendation) { - Sample recommendedSample = sample; - if (!recommendation.getAttributeRecommendations().isEmpty()) { - final SortedSet recommendedAttributes = new TreeSet<>(); - for (final Attribute a : sample.getAttributes()) { - boolean replaced = false; - for (final AttributeRecommendation rec : recommendation.getAttributeRecommendations()) { - if (a.getType().equals(rec.getAttribute())) { - recommendedAttributes.add( - Attribute.build( - rec.getRecommendation(), a.getValue(), a.getTag(), a.getIri(), a.getUnit())); - replaced = true; - } - } - if (!replaced) { - recommendedAttributes.add(a); - } - } - recommendedSample = - Sample.Builder.fromSample(sample).withAttributes(recommendedAttributes).build(); - } - return recommendedSample; - } - - private int getSampleQualityScore(final Sample sample, final int attributeQuality) { - int quality = attributeQuality; - if (sample.getExternalReferences() != null && !sample.getExternalReferences().isEmpty()) { - quality += 15; - } - if (sample.getRelationships() != null && !sample.getRelationships().isEmpty()) { - quality += 15; - } - if (sample.getPublications() != null && !sample.getPublications().isEmpty()) { - quality += 10; - } - if (sample.getOrganizations() != null && !sample.getOrganizations().isEmpty()) { - quality += 5; - } - if (sample.getContacts() != null && !sample.getContacts().isEmpty()) { - quality += 5; - } - - return quality; - } -} +/* +* Copyright 2021 EMBL - European Bioinformatics Institute +* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this +* file except in compliance with the License. You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 +* Unless required by applicable law or agreed to in writing, software distributed under the +* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +*/ +package uk.ac.ebi.biosamples.service; + +import java.util.Optional; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.stream.Collectors; +import javax.annotation.PostConstruct; +import org.springframework.stereotype.Service; +import uk.ac.ebi.biosamples.core.model.Attribute; +import uk.ac.ebi.biosamples.core.model.Sample; +import uk.ac.ebi.biosamples.curami.model.AttributeRecommendation; +import uk.ac.ebi.biosamples.curami.model.CuramiRecommendation; +import uk.ac.ebi.biosamples.curami.service.CuramiUtils; +import uk.ac.ebi.biosamples.curami.service.DataLoader; + +@Service +public class RecommendationService { + private final DataLoader dataLoader; + + public RecommendationService() { + dataLoader = new DataLoader(); + } + + @PostConstruct + private void init() { + dataLoader.loadDataFromClassPathResource(); + } + + public CuramiRecommendation getRecommendations(final Sample sample) { + final SortedSet attributes = + new TreeSet<>( + sample.getAttributes().stream().map(Attribute::getType).collect(Collectors.toList())); + + final SortedSet goodAttributes = new TreeSet<>(); + final SortedSet badAttributes = new TreeSet<>(); + final SortedSet missingAttributes = new TreeSet<>(); + + for (final String attribute : attributes) { + if (dataLoader.getPopularAttributes().contains(attribute)) { + goodAttributes.add(attribute); + continue; + } else if (dataLoader.getCurations().containsKey(attribute)) { + badAttributes.add( + new AttributeRecommendation.Builder() + .withAttribute(attribute) + .withRecommendation(dataLoader.getCurations().get(attribute)) + .build()); + continue; + } + + final String normalisedAttribute = + CuramiUtils.normaliseAttribute(attribute, dataLoader.getAbbreviations()); + if (dataLoader.getCurations().containsKey(normalisedAttribute)) { + badAttributes.add( + new AttributeRecommendation.Builder() + .withAttribute(attribute) + .withRecommendation(dataLoader.getCurations().get(normalisedAttribute)) + .build()); + continue; + } else if (dataLoader.getPopularAttributes().contains(normalisedAttribute)) { + badAttributes.add( + new AttributeRecommendation.Builder() + .withAttribute(attribute) + .withRecommendation(normalisedAttribute) + .build()); + continue; + } + + final Optional similarAttribute = + CuramiUtils.getMostSimilarAttribute(attribute, dataLoader.getPopularAttributes()); + if (similarAttribute.isPresent()) { + badAttributes.add( + new AttributeRecommendation.Builder() + .withAttribute(attribute) + .withRecommendation(similarAttribute.get()) + .build()); + } else { + missingAttributes.add(attribute); + } + } + + // have at least 5 known attributes to score 50 (4 similar attributes to score 20) + final int attributeQuality = + Math.min(50 * goodAttributes.size() / 5 + Math.min(5 * badAttributes.size(), 20), 50); + final int quality = getSampleQualityScore(sample, attributeQuality); + + return new CuramiRecommendation.Builder() + .withQuality(quality) + .withGoodAttributes(goodAttributes) + .withBadAttributes(badAttributes) + .withMissingAttributes(missingAttributes) + .build(); + } + + public Sample getRecommendedSample( + final Sample sample, final CuramiRecommendation recommendation) { + Sample recommendedSample = sample; + if (!recommendation.getAttributeRecommendations().isEmpty()) { + final SortedSet recommendedAttributes = new TreeSet<>(); + for (final Attribute a : sample.getAttributes()) { + boolean replaced = false; + for (final AttributeRecommendation rec : recommendation.getAttributeRecommendations()) { + if (a.getType().equals(rec.getAttribute())) { + recommendedAttributes.add( + Attribute.build( + rec.getRecommendation(), a.getValue(), a.getTag(), a.getIri(), a.getUnit())); + replaced = true; + } + } + if (!replaced) { + recommendedAttributes.add(a); + } + } + recommendedSample = + Sample.Builder.fromSample(sample).withAttributes(recommendedAttributes).build(); + } + return recommendedSample; + } + + private int getSampleQualityScore(final Sample sample, final int attributeQuality) { + int quality = attributeQuality; + if (sample.getExternalReferences() != null && !sample.getExternalReferences().isEmpty()) { + quality += 15; + } + if (sample.getRelationships() != null && !sample.getRelationships().isEmpty()) { + quality += 15; + } + if (sample.getPublications() != null && !sample.getPublications().isEmpty()) { + quality += 10; + } + if (sample.getOrganizations() != null && !sample.getOrganizations().isEmpty()) { + quality += 5; + } + if (sample.getContacts() != null && !sample.getContacts().isEmpty()) { + quality += 5; + } + + return quality; + } +} diff --git a/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/SampleService.java b/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/SampleService.java index b619d02aff..223fd464ea 100644 --- a/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/SampleService.java +++ b/webapps/core/src/main/java/uk/ac/ebi/biosamples/service/SampleService.java @@ -29,6 +29,8 @@ import uk.ac.ebi.biosamples.core.model.*; import uk.ac.ebi.biosamples.core.model.structured.AbstractData; import uk.ac.ebi.biosamples.core.service.SampleValidator; +import uk.ac.ebi.biosamples.core.validation.SamplePersistencePolicy; +import uk.ac.ebi.biosamples.core.validation.SampleStatusTransitionPolicy; import uk.ac.ebi.biosamples.exception.GlobalExceptions; import uk.ac.ebi.biosamples.mongo.model.MongoRelationship; import uk.ac.ebi.biosamples.mongo.model.MongoSample; @@ -98,69 +100,7 @@ public Optional fetch(final String accession, final boolean applyCuratio */ private boolean isStoredSampleEmpty( final Sample newSample, final boolean isWebinSuperUser, final Sample oldSample) { - if (isWebinSuperUser) { - if (newSample.getSubmittedVia() == SubmittedViaType.FILE_UPLOADER) { - // file uploader submissions are done via superuser, but they are non-imported samples, - // needs to be handled safely - if (newSample.hasAccession()) { - return isStoredSampleEmpty(oldSample); - } - - return true; - } else { - // otherwise it is an ENA submission reference, cannot be an empty sample - return false; - } - } else { - if (newSample.hasAccession()) { - return isStoredSampleEmpty(oldSample); - } - } - - if (newSample.hasAccession()) { - return isStoredSampleEmpty(oldSample); - } - - return true; - } - - /* - Checks if the current sample that exists has no metadata, returns true if empty - */ - private boolean isStoredSampleEmpty(final Sample oldSample) { - if (oldSample.getTaxId() != null && oldSample.getTaxId() > 0) { - return false; - } - - if (!oldSample.getAttributes().isEmpty()) { - return false; - } - - if (!oldSample.getRelationships().isEmpty()) { - return false; - } - - if (!oldSample.getPublications().isEmpty()) { - return false; - } - - if (!oldSample.getContacts().isEmpty()) { - return false; - } - - if (!oldSample.getOrganizations().isEmpty()) { - return false; - } - - if (!oldSample.getData().isEmpty()) { - return false; - } - - if (!oldSample.getExternalReferences().isEmpty()) { - return false; - } - - return oldSample.getStructuredData().isEmpty(); + return SamplePersistencePolicy.isStoredSampleEmpty(newSample, isWebinSuperUser, oldSample); } // Because the fetch caches the sample, if an updated version is stored, we need to make @@ -189,6 +129,7 @@ public Sample persistSample( newSample = updateWhenNoneExists(newSample); } + validatePublicStatusTransition(oldSample, newSample); MongoSample mongoSample = sampleToMongoSampleConverter.convert(newSample); mongoSample = mongoSampleRepository.save(mongoSample); @@ -311,90 +252,12 @@ private List getExistingRelationshipTargetsForIndexingInSolr(final Sampl return existingRelationshipTargets; } - /* - Called by V2 endpoints to persist samples - */ - public Sample persistSampleV2( - Sample newSample, final Sample oldSample, final boolean isWebinSuperUser) { - final Collection errors = sampleValidator.validate(newSample); - - if (!errors.isEmpty()) { - log.error("Sample validation failed : {}", errors); - throw new GlobalExceptions.SampleMandatoryFieldsMissingException(String.join("|", errors)); - } - - if (newSample.hasAccession()) { - if (oldSample != null) { - log.info( - "Trying to update sample that exists in database, accession: {}", - newSample.getAccession()); - - final boolean savedSampleEmpty = - isStoredSampleEmpty(newSample, isWebinSuperUser, oldSample); - - if (savedSampleEmpty) { - // submitted is now if metadata is first submitted after accessioning - newSample = Sample.Builder.fromSample(newSample).withSubmitted(Instant.now()).build(); - } - - newSample = - compareWithExistingAndUpdateSample( - newSample, oldSample, null, savedSampleEmpty, isWebinSuperUser); - } else { - log.error( - "Trying to update sample not in database, accession: {}", newSample.getAccession()); - - newSample = updateWhenNoneExists(newSample); - } - - MongoSample mongoSample = sampleToMongoSampleConverter.convert(newSample); - - assert mongoSample != null; - - mongoSample = mongoSampleRepository.save(mongoSample); - newSample = mongoSampleToSampleConverter.apply(mongoSample); - - sendMessageToRabbitForIndexingToSolr(newSample.getAccession(), Collections.emptyList()); - } else { - newSample = createNew(newSample); - } - - return newSample; - } - private void sendMessageToRabbitForIndexingToSolr( final String accession, final List existingRelationshipTargets) { try { messagingService.fetchThenSendMessage(accession, existingRelationshipTargets); } catch (final Exception e) { - log.error("Indexing failed for accession " + accession); - } - } - - /* - Called by V2 endpoints to build a sample with a newly generated sample accession - */ - public Sample accessionSample(Sample newSample) { - final Collection errors = sampleValidator.validate(newSample); - - if (!errors.isEmpty()) { - log.error("Sample validation failed : {}", errors); - - throw new GlobalExceptions.SampleMandatoryFieldsMissingException(String.join("|", errors)); - } - - if (newSample - .getWebinSubmissionAccountId() - .equalsIgnoreCase(bioSamplesProperties.getBiosamplesClientWebinUsername())) { - // accessioning from ENA, sample name is the SRA accession here - final Attribute sraAccessionAttribute = Attribute.build(SRA_ACCESSION, newSample.getName()); - - newSample.getAttributes().add(sraAccessionAttribute); - newSample = Sample.Builder.fromSample(newSample).build(); - - return mongoAccessionService.generateAccession(newSample, false); - } else { - return mongoAccessionService.generateAccession(newSample, true); + log.error("Indexing failed for accession {}", accession, e); } } @@ -440,6 +303,7 @@ private Sample compareWithExistingAndUpdateSample( // uploads though handleRelationships(newSample, existingRelationships); handleSRAAccession(newSample, oldSample, isWebinSuperUser); + newSample = handleSampleStatus(newSample, oldSample); newSample = validateAndPromoteSRAAccessionAttributeToField(newSample); if (newSample.getData().isEmpty()) { @@ -473,17 +337,35 @@ private Sample compareWithExistingAndUpdateSample( } } + private Sample handleSampleStatus(final Sample newSample, final Sample oldSample) { + if (newSample.getRelease() != null && !newSample.getRelease().isAfter(Instant.now())) { + final SampleStatus oldSampleStatus = oldSample.getStatus(); + + if (oldSampleStatus == null || oldSampleStatus == SampleStatus.PRIVATE) { + return Sample.Builder.fromSample(newSample).withStatus(SampleStatus.PUBLIC).build(); + } + } + + return newSample; + } + + private void validatePublicStatusTransition(final Sample oldSample, final Sample newSample) { + final SampleStatus oldStatus = oldSample != null ? oldSample.getStatus() : null; + + SampleStatusTransitionPolicy.validatePublicTransition(oldStatus, newSample.getStatus()); + } + private Sample validateAndPromoteSRAAccessionAttributeToField(final Sample newSample) { - // Retrieve SRA accession attribute from new sample + // Retrieve SRA accession attribute from a new sample final Optional newSampleSraAccessionOptional = newSample.getAttributes().stream() .filter(attribute -> attribute.getType().equalsIgnoreCase(SRA_ACCESSION)) .findFirst(); - // Retrieve SRA accession field from new sample + // Retrieve the SRA accession field from a new sample final String sraAccessionField = newSample.getSraAccession(); - // Check if SRA accession field and attribute are both present + // Check if the SRA accession field and attribute are both present if (sraAccessionField != null && newSampleSraAccessionOptional.isPresent()) { // Check for SRA accession mismatch if (!sraAccessionField.equals(newSampleSraAccessionOptional.get().getValue())) { @@ -494,7 +376,7 @@ private Sample validateAndPromoteSRAAccessionAttributeToField(final Sample newSa } } - // Check if SRA accession field is null but the attribute is present + // Check if the SRA accession field is null but the attribute is present if (sraAccessionField == null && newSampleSraAccessionOptional.isPresent()) { // Promote SRA accession attribute to the field and return the modified sample return Sample.Builder.fromSample(newSample) diff --git a/webapps/core/src/test/java/uk/ac/ebi/biosamples/service/RecommendationServiceTest.java b/webapps/core/src/test/java/uk/ac/ebi/biosamples/service/RecommendationServiceTest.java index e75a7aafa1..1f95610399 100644 --- a/webapps/core/src/test/java/uk/ac/ebi/biosamples/service/RecommendationServiceTest.java +++ b/webapps/core/src/test/java/uk/ac/ebi/biosamples/service/RecommendationServiceTest.java @@ -1,118 +1,118 @@ -/* -* Copyright 2021 EMBL - European Bioinformatics Institute -* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -* file except in compliance with the License. You may obtain a copy of the License at -* http://www.apache.org/licenses/LICENSE-2.0 -* Unless required by applicable law or agreed to in writing, software distributed under the -* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -* CONDITIONS OF ANY KIND, either express or implied. See the License for the -* specific language governing permissions and limitations under the License. -*/ -package uk.ac.ebi.biosamples.service; - -import java.time.Instant; -import java.util.Optional; -import java.util.SortedSet; -import java.util.TreeSet; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.test.context.SpringBootTest; -import org.springframework.test.context.ActiveProfiles; -import org.springframework.test.context.junit4.SpringRunner; -import uk.ac.ebi.biosamples.core.model.Attribute; -import uk.ac.ebi.biosamples.core.model.Sample; -import uk.ac.ebi.biosamples.curami.model.AttributeRecommendation; -import uk.ac.ebi.biosamples.curami.model.CuramiRecommendation; - -@RunWith(SpringRunner.class) -@SpringBootTest(properties = {"spring.cloud.gcp.project-id=no_project"}) -@ActiveProfiles("test") -public class RecommendationServiceTest { - - @Autowired RecommendationService recommendationService; - - @Test - public void should_return_recommendation() { - final Sample sample = getTestSample(); - final CuramiRecommendation recommendation = recommendationService.getRecommendations(sample); - - Assert.assertEquals(1, recommendation.getKnownAttributes().size()); - Assert.assertEquals(1, recommendation.getAttributeRecommendations().size()); - Assert.assertEquals(15, recommendation.getQuality()); - } - - @Test - public void should_find_recommendations_from_curations() { - final Sample sample = getTestSample2(); - final CuramiRecommendation recommendation = recommendationService.getRecommendations(sample); - - Assert.assertTrue( - recommendation - .getAttributeRecommendations() - .contains( - new AttributeRecommendation.Builder() - .withAttribute("Gender") - .withRecommendation("sex") - .build())); - } - - @Test - public void should_exist_unidentified_attribute() { - final Sample sample = getTestSample2(); - final CuramiRecommendation recommendation = recommendationService.getRecommendations(sample); - - Assert.assertEquals(1, recommendation.getUnknownAttributes().size()); - } - - @Test - public void should_return_recommended_sample() { - final Sample sample = getTestSample2(); - final CuramiRecommendation recommendation = recommendationService.getRecommendations(sample); - final Sample recommendedSample = - recommendationService.getRecommendedSample(sample, recommendation); - - final Optional recAttr = - recommendedSample.getAttributes().stream() - .filter(a -> a.getType().equals("sex")) - .findFirst(); - Assert.assertTrue(recAttr.isPresent()); - } - - private Sample getTestSample() { - final String name = "RecommendationServiceUnitTest_sample"; - final Instant release = Instant.parse("2016-04-01T11:36:57.00Z"); - final SortedSet attributes = new TreeSet<>(); - attributes.add(Attribute.build("organism", "Homo sapiens")); - attributes.add(Attribute.build("organism_part", "liver")); - - return new Sample.Builder(name) - .withDomain("self.biosamplesUnitTests") - .withRelease(release) - .withAttributes(attributes) - .build(); - } - - private Sample getTestSample2() { - final String name = "RecommendationServiceUnitTest_sample_2"; - final Instant release = Instant.parse("2016-04-01T11:36:57.00Z"); - final SortedSet attributes = new TreeSet<>(); - attributes.add(Attribute.build("organism", "Homo sapiens")); - attributes.add(Attribute.build("organism_part", "liver")); - attributes.add(Attribute.build("INSDC_status", "x")); - attributes.add(Attribute.build("STUDY NAME", "x")); - attributes.add(Attribute.build("gap accession", "x")); - attributes.add(Attribute.build("Age", "56")); - attributes.add(Attribute.build("Phenotype", "x")); - attributes.add(Attribute.build("Disease", "x")); - attributes.add(Attribute.build("Gender", "x")); - attributes.add(Attribute.build("impossibleAttribute", "x")); - - return new Sample.Builder(name) - .withDomain("self.biosamplesUnitTests") - .withRelease(release) - .withAttributes(attributes) - .build(); - } -} +/* +* Copyright 2021 EMBL - European Bioinformatics Institute +* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this +* file except in compliance with the License. You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 +* Unless required by applicable law or agreed to in writing, software distributed under the +* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +*/ +package uk.ac.ebi.biosamples.service; + +import java.time.Instant; +import java.util.Optional; +import java.util.SortedSet; +import java.util.TreeSet; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.test.context.junit4.SpringRunner; +import uk.ac.ebi.biosamples.core.model.Attribute; +import uk.ac.ebi.biosamples.core.model.Sample; +import uk.ac.ebi.biosamples.curami.model.AttributeRecommendation; +import uk.ac.ebi.biosamples.curami.model.CuramiRecommendation; + +@RunWith(SpringRunner.class) +@SpringBootTest(properties = {"spring.cloud.gcp.project-id=no_project"}) +@ActiveProfiles("test") +public class RecommendationServiceTest { + + @Autowired RecommendationService recommendationService; + + @Test + public void should_return_recommendation() { + final Sample sample = getTestSample(); + final CuramiRecommendation recommendation = recommendationService.getRecommendations(sample); + + Assert.assertEquals(1, recommendation.getKnownAttributes().size()); + Assert.assertEquals(1, recommendation.getAttributeRecommendations().size()); + Assert.assertEquals(15, recommendation.getQuality()); + } + + @Test + public void should_find_recommendations_from_curations() { + final Sample sample = getTestSample2(); + final CuramiRecommendation recommendation = recommendationService.getRecommendations(sample); + + Assert.assertTrue( + recommendation + .getAttributeRecommendations() + .contains( + new AttributeRecommendation.Builder() + .withAttribute("Gender") + .withRecommendation("sex") + .build())); + } + + @Test + public void should_exist_unidentified_attribute() { + final Sample sample = getTestSample2(); + final CuramiRecommendation recommendation = recommendationService.getRecommendations(sample); + + Assert.assertEquals(1, recommendation.getUnknownAttributes().size()); + } + + @Test + public void should_return_recommended_sample() { + final Sample sample = getTestSample2(); + final CuramiRecommendation recommendation = recommendationService.getRecommendations(sample); + final Sample recommendedSample = + recommendationService.getRecommendedSample(sample, recommendation); + + final Optional recAttr = + recommendedSample.getAttributes().stream() + .filter(a -> a.getType().equals("sex")) + .findFirst(); + Assert.assertTrue(recAttr.isPresent()); + } + + private Sample getTestSample() { + final String name = "RecommendationServiceUnitTest_sample"; + final Instant release = Instant.parse("2016-04-01T11:36:57.00Z"); + final SortedSet attributes = new TreeSet<>(); + attributes.add(Attribute.build("organism", "Homo sapiens")); + attributes.add(Attribute.build("organism_part", "liver")); + + return new Sample.Builder(name) + .withDomain("self.biosamplesUnitTests") + .withRelease(release) + .withAttributes(attributes) + .build(); + } + + private Sample getTestSample2() { + final String name = "RecommendationServiceUnitTest_sample_2"; + final Instant release = Instant.parse("2016-04-01T11:36:57.00Z"); + final SortedSet attributes = new TreeSet<>(); + attributes.add(Attribute.build("organism", "Homo sapiens")); + attributes.add(Attribute.build("organism_part", "liver")); + attributes.add(Attribute.build("INSDC_status", "x")); + attributes.add(Attribute.build("STUDY NAME", "x")); + attributes.add(Attribute.build("gap accession", "x")); + attributes.add(Attribute.build("Age", "56")); + attributes.add(Attribute.build("Phenotype", "x")); + attributes.add(Attribute.build("Disease", "x")); + attributes.add(Attribute.build("Gender", "x")); + attributes.add(Attribute.build("impossibleAttribute", "x")); + + return new Sample.Builder(name) + .withDomain("self.biosamplesUnitTests") + .withRelease(release) + .withAttributes(attributes) + .build(); + } +}