Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions pyQuARC/code/custom_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,43 @@ def mime_type_check(mime_type, url_type, controlled_list):
@staticmethod
def availability_check(field_value, parent_value):
# If the parent is available, the child should be available too, else it is invalid

return {
"valid": bool(field_value) if parent_value else True,
"value": parent_value,
}

@staticmethod
def url_description_presence_check(field_value):
"""
Ensures that URL descriptions are present and not empty if a URL is provided.
"""
if isinstance(field_value, list):
# List of dictionaries (URL objects)
errors = []
for url_obj in field_value:
description = url_obj.get("Description", "")
if not description or not str(description).strip():
errors.append({
"valid": False,
"value": url_obj,
})

if errors:
return errors[0]
else:
return {"valid": True, "value": field_value}


if not field_value or not str(field_value).strip():
return {
"valid": False,
"value": field_value,
}

return {"valid": True, "value": field_value}


@staticmethod
@if_arg
def bounding_coordinate_logic_check(west, north, east, south):
Expand Down Expand Up @@ -277,3 +309,5 @@ def count_check(count, values, key):
items = [items]
num_items = len(items)
return {"valid": int(count) == num_items, "value": (count, num_items)}


11 changes: 4 additions & 7 deletions pyQuARC/code/schema_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,25 +75,19 @@ def run_json_validator(self, content_to_validate):
"""
schema = self.read_json_schema()
schema_store = {}

if self.metadata_format == UMM_C:
with open(SCHEMA_PATHS["umm-cmn-json-schema"]) as schema_file:
schema_base = json.load(schema_file)

# workaround to read local referenced schema file (only supports uri)
schema_store = {
schema_base.get("$id", "/umm-cmn-json-schema.json"): schema_base,
schema_base.get("$id", "umm-cmn-json-schema.json"): schema_base,
}

errors = {}

resolver = RefResolver.from_schema(schema, store=schema_store)

validator = Draft7Validator(
schema, format_checker=Draft7Validator.FORMAT_CHECKER, resolver=resolver
)

for error in sorted(
validator.iter_errors(json.loads(content_to_validate)), key=str
):
Expand Down Expand Up @@ -139,7 +133,7 @@ def _build_errors(error_log, paths):
namespaces = re.findall(r"(\{http[^}]*\})", line)
for namespace in namespaces:
line = line.replace(namespace, "")
field_name = re.search(r"Element\s'(.*)':", line)[1]
field_name = re.search(r"Element\s'(.*)':", line)[1]
field_paths = [abs_path for abs_path in paths if field_name in abs_path]
field_name = field_paths[0] if len(field_paths) == 1 else field_name
message = re.search(r"Element\s'.+':\s(\[.*\])?(.*)", line)[2].strip()
Expand Down Expand Up @@ -191,4 +185,7 @@ def run(self, metadata):
Returns:
(dict): Result of the validation from xml and json schema validators
"""

return self.validator_func(metadata)


10 changes: 9 additions & 1 deletion pyQuARC/schemas/check_messages.json
Original file line number Diff line number Diff line change
Expand Up @@ -989,7 +989,15 @@
"message": "",
"url": "https://wiki.earthdata.nasa.gov/display/CMR/Related+URLs"
},
"remediation": "Descriptions should be unique to each URL. At least one of the descriptions are repeated in this record. Recommend changing the descriptions to more accurately and uniquely describe each link."
"remediation": "Descriptions should be unique to each URL. Several of the descriptions are repeated in this record. Recommend changing the descriptions to more accurately and uniquely describe each link"
},
"url_description_presence_check": {
"failure": "A URL description is missing.",
"help": {
"message": "",
"url": "https://wiki.earthdata.nasa.gov/display/CMR/Related+URLs"
},
"remediation": "Recommend providing a description for each URL."
},
"online_resource_description_uniqueness_check": {
"failure": "A URL description is duplicated: `{}`.",
Expand Down
5 changes: 5 additions & 0 deletions pyQuARC/schemas/checks.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@
"check_function": "availability_check",
"available": true
},
"url_description_presence_check": {
"data_type": "custom",
"check_function": "url_description_presence_check",
"available": true
},
"mime_type_check": {
"data_type": "custom",
"check_function": "mime_type_check",
Expand Down
60 changes: 7 additions & 53 deletions pyQuARC/schemas/rule_mapping.json
Original file line number Diff line number Diff line change
Expand Up @@ -4759,70 +4759,24 @@
"severity": "info",
"check_id": "datetime_compare"
},
"url_desc_presence_check": {
"rule_name": "Online Description Presence Check",
"url_description_presence_check": {
"rule_name": "url description presence check",
"fields_to_apply": {
Comment on lines +4762 to 4764
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are we replacing an existing rule? do we need to?

if we do, check if anything else is using this rule and if not, remove all the associated code

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is another function, url_description_uniqueness_check, in the rule_mapping.json that will check whether the description of the URL is unique. Since we have three different criteria (see below), @bhawana11 wrote a new function for URLDescription. Let me know if you have suggestions for a better fix.

  • If missing description only (Warning)
  • If duplicate description only (Info)
  • If both missing and duplicate descriptions (Warning)

Copy link
Member

@slesaad slesaad Oct 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

im concerned about the deletion, previously we used the given fields
for example:
"Collection/OnlineAccessURLs/OnlineAccessURL/URLDescription",
"Collection/OnlineAccessURLs/OnlineAccessURL/URL"
to check

if url is provided, description needs to be provided.

we're completely getting rid of that check for all these fields, is that intentional?

"echo-c": [
{
"fields": [
"Collection/OnlineAccessURLs/OnlineAccessURL/URLDescription",
"Collection/OnlineAccessURLs/OnlineAccessURL/URL"
]
},
{
"fields": [
"Collection/OnlineResources/OnlineResource/Description",
"Collection/OnlineResources/OnlineResource/URL"
]
}
],
"echo-g": [
{
"fields": [
"Granule/OnlineAccessURLs/OnlineAccessURL/URLDescription",
"Granule/OnlineAccessURLs/OnlineAccessURL/URL"
]
},
{
"fields": [
"Granule/OnlineResources/OnlineResource/Description",
"Granule/OnlineResources/OnlineResource/URL"
]
}
],
"dif10": [
{
"fields": [
"DIF/Related_URL/Description",
"DIF/Related_URL/URL"
]
},
{
"fields": [
"DIF/Multimedia_Sample/Description",
"DIF/Multimedia_Sample/URL"
]
}
],
"umm-c": [
{
{
"fields": [
"RelatedUrls/Description",
"RelatedUrls/URL"
"RelatedUrls"
]
}
],
"umm-g": [
},
{
"fields": [
"RelatedUrls/Description",
"RelatedUrls/URL"
"DataCenters/ContactInformation/RelatedUrls"
]
}
]
},
"severity": "warning",
"check_id": "availability_check"
"check_id": "url_description_presence_check"
},
"get_data_url_check": {
"rule_name": "GET DATA URL check",
Expand Down
1 change: 1 addition & 0 deletions tests/test_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,4 @@ def test_download_real_granule_no_errors(self):

# is the concept id valid and is the request going through?
assert downloader.errors == []