diff --git a/hca/staging_area_validator.py b/hca/staging_area_validator.py index eee0a0c..27aca89 100644 --- a/hca/staging_area_validator.py +++ b/hca/staging_area_validator.py @@ -258,8 +258,7 @@ def validate_file_description(self, file_description: str) -> None: def validate_descriptors_file(self, blob: gcs.Blob) -> None: # Expected syntax: descriptors/{metadata_type}/{metadata_id}_{version}.json - # TODO: remove unused `metadata_type` - metadata_type, descriptor_file = blob.name.split("/")[-2:] + descriptor_file = blob.name.split("/")[-1] assert descriptor_file.count("_") == 1 assert descriptor_file.endswith(".json") @@ -272,6 +271,14 @@ def validate_descriptors_file(self, blob: gcs.Blob) -> None: if metadata_file := self.metadata_files.get(metadata_id): metadata_file["crc32c"] = file_json["crc32c"] metadata_versions = metadata_file["metadata_versions"] + + # Sequence file data_files might not be present if they are managed access. + # File Descriptor v2.1.0 allows for the drs_uri to be a string or null. + # In both of these cases, we set found_data_file to True + if metadata_file["entity_type"] == "sequence_file": + if "drs_uri" in file_json: + metadata_file["found_data_file"] = True + assert ( descriptor_version in metadata_versions ), f"Corresponding metadata version for descriptor version {descriptor_version} not found"