Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions pyQuARC/code/custom_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,43 @@ def mime_type_check(mime_type, url_type, controlled_list):
@staticmethod
def availability_check(field_value, parent_value):
# If the parent is available, the child should be available too, else it is invalid

return {
"valid": bool(field_value) if parent_value else True,
"value": parent_value,
}

@staticmethod
def url_description_presence_check(field_value):
"""
Ensures that URL descriptions are present and not empty if a URL is provided.
"""
if isinstance(field_value, list):
# List of dictionaries (URL objects)
errors = []
for url_obj in field_value:
description = url_obj.get("Description", "")
if not description or not str(description).strip():
errors.append({
"valid": False,
"value": url_obj,
})

if errors:
return errors[0]
else:
return {"valid": True, "value": field_value}


if not field_value or not str(field_value).strip():
return {
"valid": False,
"value": field_value,
}

return {"valid": True, "value": field_value}


@staticmethod
@if_arg
def bounding_coordinate_logic_check(west, north, east, south):
Expand Down
16 changes: 5 additions & 11 deletions pyQuARC/code/schema_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import re

from io import BytesIO
from jsonschema import Draft7Validator, draft7_format_checker, RefResolver
from jsonschema import Draft7Validator, RefResolver
from lxml import etree
from urllib.request import pathname2url

Expand Down Expand Up @@ -75,25 +75,19 @@ def run_json_validator(self, content_to_validate):
"""
schema = self.read_json_schema()
schema_store = {}

if self.metadata_format == UMM_C:
with open(SCHEMA_PATHS["umm-cmn-json-schema"]) as schema_file:
schema_base = json.load(schema_file)

# workaround to read local referenced schema file (only supports uri)
schema_store = {
schema_base.get("$id", "/umm-cmn-json-schema.json"): schema_base,
schema_base.get("$id", "umm-cmn-json-schema.json"): schema_base,
}

errors = {}

resolver = RefResolver.from_schema(schema, store=schema_store)

validator = Draft7Validator(
schema, format_checker=draft7_format_checker, resolver=resolver
schema, format_checker=Draft7Validator.FORMAT_CHECKER, resolver=resolver
)

for error in sorted(
validator.iter_errors(json.loads(content_to_validate)), key=str
):
Expand Down Expand Up @@ -136,13 +130,13 @@ def _build_errors(error_log, paths):
# For DIF, because the namespace is specified in the metadata file, lxml library
# provides field name concatenated with the namespace,
# the following 3 lines of code removes the namespace
namespaces = re.findall("(\{http[^}]*\})", line)
namespaces = re.findall(r"(\{http[^}]*\})", line)
for namespace in namespaces:
line = line.replace(namespace, "")
field_name = re.search("Element\s'(.*)':", line)[1]
field_name = re.search(r"Element\s'(.*)':", line)[1]
field_paths = [abs_path for abs_path in paths if field_name in abs_path]
field_name = field_paths[0] if len(field_paths) == 1 else field_name
message = re.search("Element\s'.+':\s(\[.*\])?(.*)", line)[2].strip()
message = re.search(r"Element\s'.+':\s(\[.*\])?(.*)", line)[2].strip()
errors.setdefault(field_name, {})["schema"] = {
"message": [f"Error: {message}"],
"valid": False,
Expand Down
8 changes: 5 additions & 3 deletions pyQuARC/schemas/MimeType.csv
Original file line number Diff line number Diff line change
@@ -1,23 +1,25 @@
"Hits: 37","page_num: 1","page_size: 2000","Keyword Version: 14.3","Revision: 2022-08-26 10:35:56","Timestamp: 2022-09-15 12:28:30","Terms Of Use: https://cdn.earthdata.nasa.gov/conduit/upload/5182/KeywordsCommunityGuide_Baseline_v1_SIGNED_FINAL.pdf","The most up to date XML representations can be found here: https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/MimeType/?format=xml","Case native"
"Hits: 39","page_num: 1","page_size: 2000","Keyword Version: 21.6","Revision: 2025-06-20 10:09:41","Timestamp: 2025-07-15 08:58:02","Terms Of Use: https://cdn.earthdata.nasa.gov/conduit/upload/5182/KeywordsCommunityGuide_Baseline_v1_SIGNED_FINAL.pdf","The most up to date XML representations can be found here: https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/MimeType/?format=xml","Case native"
MimeType,UUID
"application/geo+json","aa595aba-86bf-4cc6-8ab5-26ee4de68eeb"
"application/gml+xml","40bdf6e5-780c-43e2-ab8e-e5dfae4bd779"
"application/gzip","a8ee535a-8bc8-46fd-8b97-917bd7ea7666"
"application/json","8542dd4a-a11b-475d-8d46-cad785a7f510"
"application/msword","c79a0e11-2774-4cf3-a194-45b9e58a93fd"
"application/octet-stream","b77e64ef-ce80-4dab-b552-c6062990a6e0"
"application/opensearchdescription+xml","07bcc60e-1551-44d9-b87e-7c260d230ecb"
"application/pdf","627269ae-ba93-492e-8c31-cc4de1d69810"
"application/tar","84ef762f-e348-42a6-981c-563822a47806"
"application/tar+gzip","43ca8ee0-04a5-4020-b0ec-998ec0e0f30e"
"application/tar+zip","17e82b7c-498d-4d69-993c-fd691aa25ce8"
"application/tar","84ef762f-e348-42a6-981c-563822a47806"
"application/vnd.google-earth.kml+xml","80045dcb-18ee-463a-8baf-ffcabed510ea"
"application/vnd.google-earth.kmz","f7328bf5-8ef2-4f95-a4e0-6fb16d122237"
"application/vnd.ms-excel","7c99ff72-5239-424d-a0bf-9712c33ea76d"
"application/vnd.opendap.dap4.dmrpp+xml","b26761fa-8d8e-4bd8-a8ba-db6575554ad7"
"application/x-bufr","e384b8a8-8cec-4230-9ebe-4db76bbef706"
"application/x-geotiff","53054b2b-034f-44d8-ab56-fd365947d386"
"application/x-hdf","b0a3e733-4d1b-486f-b56c-c405a5e4367b"
"application/x-hdf5","4e80047b-c50b-4805-ac68-789dbc38803f"
"application/x-hdfeos","b1eac265-2b00-4c39-a429-797c13a2c640"
"application/x-hdf","b0a3e733-4d1b-486f-b56c-c405a5e4367b"
"application/x-netcdf","2b192915-32a8-4b68-a720-8ca8a84f04ca"
"application/x-tar-gz","5e70beda-396e-4cc8-bdd5-70dfc8a1142e"
"application/x-vnd.iso.19139-2+xml","c1a8dbb7-312d-4481-998e-58d126b32080"
Expand Down
12 changes: 11 additions & 1 deletion pyQuARC/schemas/check_messages.json
Original file line number Diff line number Diff line change
Expand Up @@ -973,8 +973,18 @@
"message": "",
"url": "https://wiki.earthdata.nasa.gov/display/CMR/Related+URLs"
},
"remediation": "Descriptions should be unique to each URL. At least one of the descriptions are repeated in this record. Recommend changing the descriptions to more accurately and uniquely describe each link."
"remediation": "Descriptions should be unique to each URL. Several of the descriptions are repeated in this record. Recommend changing the descriptions to more accurately and uniquely describe each link"
},

"url_description_presence_check": {
"failure": "A URL description is missing.",
"help": {
"message": "",
"url": "https://wiki.earthdata.nasa.gov/display/CMR/Related+URLs"
},
"remediation": "Recommend providing a description for each URL"
},

"online_resource_description_uniqueness_check": {
"failure": "A URL description is duplicated: `{}`.",
"help": {
Expand Down
5 changes: 5 additions & 0 deletions pyQuARC/schemas/checks.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@
"check_function": "availability_check",
"available": true
},
"url_description_presence_check": {
"data_type": "custom",
"check_function": "url_description_presence_check",
"available": true
},
"mime_type_check": {
"data_type": "custom",
"check_function": "mime_type_check",
Expand Down
Loading