diff --git a/pyQuARC/code/custom_validator.py b/pyQuARC/code/custom_validator.py index bf3620d1..6a2beb08 100644 --- a/pyQuARC/code/custom_validator.py +++ b/pyQuARC/code/custom_validator.py @@ -56,11 +56,43 @@ def mime_type_check(mime_type, url_type, controlled_list): @staticmethod def availability_check(field_value, parent_value): # If the parent is available, the child should be available too, else it is invalid + return { "valid": bool(field_value) if parent_value else True, "value": parent_value, } + @staticmethod + def url_description_presence_check(field_value): + """ + Ensures that URL descriptions are present and not empty if a URL is provided. + """ + if isinstance(field_value, list): + # List of dictionaries (URL objects) + errors = [] + for url_obj in field_value: + description = url_obj.get("Description", "") + if not description or not str(description).strip(): + errors.append({ + "valid": False, + "value": url_obj, + }) + + if errors: + return errors[0] + else: + return {"valid": True, "value": field_value} + + + if not field_value or not str(field_value).strip(): + return { + "valid": False, + "value": field_value, + } + + return {"valid": True, "value": field_value} + + @staticmethod @if_arg def bounding_coordinate_logic_check(west, north, east, south): @@ -277,3 +309,5 @@ def count_check(count, values, key): items = [items] num_items = len(items) return {"valid": int(count) == num_items, "value": (count, num_items)} + + \ No newline at end of file diff --git a/pyQuARC/code/schema_validator.py b/pyQuARC/code/schema_validator.py index fcdc2078..171c7e2a 100644 --- a/pyQuARC/code/schema_validator.py +++ b/pyQuARC/code/schema_validator.py @@ -75,25 +75,19 @@ def run_json_validator(self, content_to_validate): """ schema = self.read_json_schema() schema_store = {} - if self.metadata_format == UMM_C: with open(SCHEMA_PATHS["umm-cmn-json-schema"]) as schema_file: schema_base = json.load(schema_file) - # workaround to read local referenced schema file (only supports uri) schema_store = { schema_base.get("$id", "/umm-cmn-json-schema.json"): schema_base, schema_base.get("$id", "umm-cmn-json-schema.json"): schema_base, } - errors = {} - resolver = RefResolver.from_schema(schema, store=schema_store) - validator = Draft7Validator( schema, format_checker=Draft7Validator.FORMAT_CHECKER, resolver=resolver ) - for error in sorted( validator.iter_errors(json.loads(content_to_validate)), key=str ): @@ -139,7 +133,7 @@ def _build_errors(error_log, paths): namespaces = re.findall(r"(\{http[^}]*\})", line) for namespace in namespaces: line = line.replace(namespace, "") - field_name = re.search(r"Element\s'(.*)':", line)[1] + field_name = re.search(r"Element\s'(.*)':", line)[1] field_paths = [abs_path for abs_path in paths if field_name in abs_path] field_name = field_paths[0] if len(field_paths) == 1 else field_name message = re.search(r"Element\s'.+':\s(\[.*\])?(.*)", line)[2].strip() @@ -191,4 +185,7 @@ def run(self, metadata): Returns: (dict): Result of the validation from xml and json schema validators """ + return self.validator_func(metadata) + + \ No newline at end of file diff --git a/pyQuARC/schemas/check_messages.json b/pyQuARC/schemas/check_messages.json index f1674207..8739b373 100644 --- a/pyQuARC/schemas/check_messages.json +++ b/pyQuARC/schemas/check_messages.json @@ -989,7 +989,15 @@ "message": "", "url": "https://wiki.earthdata.nasa.gov/display/CMR/Related+URLs" }, - "remediation": "Descriptions should be unique to each URL. At least one of the descriptions are repeated in this record. Recommend changing the descriptions to more accurately and uniquely describe each link." + "remediation": "Descriptions should be unique to each URL. Several of the descriptions are repeated in this record. Recommend changing the descriptions to more accurately and uniquely describe each link" + }, + "url_description_presence_check": { + "failure": "A URL description is missing.", + "help": { + "message": "", + "url": "https://wiki.earthdata.nasa.gov/display/CMR/Related+URLs" + }, + "remediation": "Recommend providing a description for each URL." }, "online_resource_description_uniqueness_check": { "failure": "A URL description is duplicated: `{}`.", diff --git a/pyQuARC/schemas/checks.json b/pyQuARC/schemas/checks.json index d921ac28..acff6c7e 100644 --- a/pyQuARC/schemas/checks.json +++ b/pyQuARC/schemas/checks.json @@ -69,6 +69,11 @@ "check_function": "availability_check", "available": true }, + "url_description_presence_check": { + "data_type": "custom", + "check_function": "url_description_presence_check", + "available": true + }, "mime_type_check": { "data_type": "custom", "check_function": "mime_type_check", diff --git a/pyQuARC/schemas/rule_mapping.json b/pyQuARC/schemas/rule_mapping.json index 7641306a..c0d038e5 100644 --- a/pyQuARC/schemas/rule_mapping.json +++ b/pyQuARC/schemas/rule_mapping.json @@ -4759,70 +4759,24 @@ "severity": "info", "check_id": "datetime_compare" }, - "url_desc_presence_check": { - "rule_name": "Online Description Presence Check", + "url_description_presence_check": { + "rule_name": "url description presence check", "fields_to_apply": { - "echo-c": [ - { - "fields": [ - "Collection/OnlineAccessURLs/OnlineAccessURL/URLDescription", - "Collection/OnlineAccessURLs/OnlineAccessURL/URL" - ] - }, - { - "fields": [ - "Collection/OnlineResources/OnlineResource/Description", - "Collection/OnlineResources/OnlineResource/URL" - ] - } - ], - "echo-g": [ - { - "fields": [ - "Granule/OnlineAccessURLs/OnlineAccessURL/URLDescription", - "Granule/OnlineAccessURLs/OnlineAccessURL/URL" - ] - }, - { - "fields": [ - "Granule/OnlineResources/OnlineResource/Description", - "Granule/OnlineResources/OnlineResource/URL" - ] - } - ], - "dif10": [ - { - "fields": [ - "DIF/Related_URL/Description", - "DIF/Related_URL/URL" - ] - }, - { - "fields": [ - "DIF/Multimedia_Sample/Description", - "DIF/Multimedia_Sample/URL" - ] - } - ], "umm-c": [ - { + { "fields": [ - "RelatedUrls/Description", - "RelatedUrls/URL" + "RelatedUrls" ] - } - ], - "umm-g": [ + }, { "fields": [ - "RelatedUrls/Description", - "RelatedUrls/URL" + "DataCenters/ContactInformation/RelatedUrls" ] } ] }, "severity": "warning", - "check_id": "availability_check" + "check_id": "url_description_presence_check" }, "get_data_url_check": { "rule_name": "GET DATA URL check", diff --git a/tests/test_downloader.py b/tests/test_downloader.py index 5a6ca777..92ab0d0d 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -166,3 +166,4 @@ def test_download_real_granule_no_errors(self): # is the concept id valid and is the request going through? assert downloader.errors == [] + \ No newline at end of file