From 9f7adef98ee34a5bcb116e94c7cc725baf96819c Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Tue, 16 Dec 2025 14:09:45 -0600 Subject: [PATCH 01/58] Add JSON-LD generation script with NRP AI support --- .env.example | 14 + .gitignore | 20 ++ prompts/dataset-detection-prompt.txt | 33 +++ prompts/jsonld-generation-prompt.txt | 34 +++ scripts/README.md | 119 ++++++++ scripts/generate_jsonld.py | 410 +++++++++++++++++++++++++++ scripts/requirements.txt | 18 ++ 7 files changed, 648 insertions(+) create mode 100644 .env.example create mode 100644 prompts/dataset-detection-prompt.txt create mode 100644 prompts/jsonld-generation-prompt.txt create mode 100644 scripts/README.md create mode 100644 scripts/generate_jsonld.py create mode 100644 scripts/requirements.txt diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..3085b03 --- /dev/null +++ b/.env.example @@ -0,0 +1,14 @@ +# API Keys for JSON-LD Generation +# Copy this file to .env and fill in your actual API keys +# The .env file is gitignored and will not be committed + +# NRP (National Research Platform) API Key +# Get your key from: https://nrp.ai/documentation/userdocs/ai/llm-managed/ +# Available NRP models: qwen3, llama3-sdsc, gpt-oss, gorilla, olmo, gemma3, kimi, etc. +NRP_API_KEY=your-nrp-api-key-here + +# OpenAI API Key (optional) +# OPENAI_API_KEY=your-openai-api-key-here + +# Anthropic API Key (optional) +# ANTHROPIC_API_KEY=your-anthropic-api-key-here diff --git a/.gitignore b/.gitignore index 485dee6..c2add81 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,21 @@ .idea + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +*.egg-info/ +dist/ +build/ + +# Environment variables +.env +.env.local + +# CSV data files (downloaded from Google Sheets) +datasets.csv + +# Generated JSON-LD files +data/objects/summoned/generated/ diff --git a/prompts/dataset-detection-prompt.txt b/prompts/dataset-detection-prompt.txt new file mode 100644 index 0000000..90db45b --- /dev/null +++ b/prompts/dataset-detection-prompt.txt @@ -0,0 +1,33 @@ +You are analyzing a scientific dataset webpage to identify available datasets and their metadata. + +**Task**: Examine the following webpage and identify: +1. What datasets are available on this page? +2. For each dataset found, extract: + - Dataset name/title + - Description + - Creator(s) or author(s) + - Publisher or organization + - Publication date + - Download links or access URLs + - Spatial coverage (if mentioned) + - Temporal coverage (if mentioned) + - License information + - Keywords or topics + +**Webpage URL**: {URL} +**Webpage Content**: {CONTENT} + +**Context from Google Sheet**: +- Expected Dataset Name: {DATASET_NAME} +- Group/Category: {GROUP} +- Description: {DESCRIPTION} + +**Instructions**: +- If the page contains multiple datasets, list all of them +- Focus on structured data products (not just documentation) +- Look for download links, API endpoints, or data access points +- Extract any existing JSON-LD or structured metadata if present +- Note any errors or issues accessing the page + +**Output Format**: Provide a structured JSON response with the extracted information. + diff --git a/prompts/jsonld-generation-prompt.txt b/prompts/jsonld-generation-prompt.txt new file mode 100644 index 0000000..7324678 --- /dev/null +++ b/prompts/jsonld-generation-prompt.txt @@ -0,0 +1,34 @@ +You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. + +**Task**: Create a valid JSON-LD document for the following dataset. + +**Dataset Information**: +- Name: {DATASET_NAME} +- URL: {URL} +- Description: {DESCRIPTION} +- Group/Category: {GROUP} +- Creator: {CREATOR} +- Provider: {PROVIDER} +- Publisher: {PUBLISHER} +- Keywords: {KEYWORDS} +- Spatial Coverage: {SPATIAL_COVERAGE} +- Extracted Metadata: {EXTRACTED_METADATA} + +**Reference Example** (from existing JSON-LD in this project): +{EXAMPLE_JSONLD} + +**Requirements**: +1. Use Schema.org vocabulary (https://schema.org/) +2. Set @context to `{"@vocab": "https://schema.org/"}` +3. Set @type to "Dataset" +4. Include @id with the dataset URL or identifier +5. Include all available metadata fields +6. For creators, use Person or Organization types with proper structure +7. Include distribution information if download links are available +8. Add temporalCoverage if time period is known +9. Add spatial coverage if geographic bounds are provided +10. Include license and access information +11. Use proper JSON-LD structure (arrays for multiple values, nested objects where appropriate) + +**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. + diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..2889fa2 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,119 @@ +# JSON-LD Generation Scripts + +## Setup + +### Getting the datasets.csv file + +The script requires a `datasets.csv` file exported from the Google Sheet. To create it: + +**Option 1: Download directly (Recommended)** +1. Open the Google Sheet: https://docs.google.com/spreadsheets/d/1pqZpMWqQFwUrleHXPbvXqXX59Xcj1Yrtqt2nJTh1reM/edit?gid=1162616600 +2. Go to the "Datasets" tab +3. File → Download → Comma Separated Values (.csv) +4. Save as `datasets.csv` in the project root directory + +**Option 2: Use Python to download** +```bash +# Download the CSV export directly +python -c "import urllib.request; urllib.request.urlretrieve('https://docs.google.com/spreadsheets/d/1pqZpMWqQFwUrleHXPbvXqXX59Xcj1Yrtqt2nJTh1reM/export?format=csv&gid=1162616600', 'datasets.csv'); print('Downloaded datasets.csv')" +``` + +**Note**: The `datasets.csv` file is gitignored and will not be committed to the repository. + +1. Activate the virtual environment (if using one): + ```bash + # Windows + venv\Scripts\activate + + # Linux/Mac + source venv/bin/activate + ``` + +2. Install required packages: + ```bash + pip install -r scripts/requirements.txt + ``` + + Or install specific AI service: + ```bash + pip install openai requests beautifulsoup4 python-dotenv + # OR + pip install anthropic requests beautifulsoup4 python-dotenv + ``` + +3. Set up your API key: + + **Use .env file (Recommended)** + ```bash + # Copy the example file + cp .env.example .env + + # Edit .env and add your API key + # See .env.example for all available options and NRP model information + ``` + + **Alternative: Set environment variable directly** + ```bash + # Linux/Mac + export NRP_API_KEY="your-key-here" + + # Windows (PowerShell) + $env:NRP_API_KEY="your-key-here" + + # Windows (CMD) + set NRP_API_KEY=your-key-here + ``` + + **For NRP**: Get your API key from https://nrp.ai/documentation/userdocs/ai/llm-managed/ + +## Usage + +### Test with a single URL +```bash +# Using NRP (default, no --ai-service needed) +python scripts/generate_jsonld.py --test-url "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" + +# Or use OpenAI/Anthropic if you have their API keys +python scripts/generate_jsonld.py --ai-service openai --test-url "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" +``` + +### Process datasets from CSV +```bash +# Process all datasets that need JSON-LD (using NRP by default) +python scripts/generate_jsonld.py --csv datasets.csv + +# Process only first 5 datasets (for testing) +python scripts/generate_jsonld.py --csv datasets.csv --limit 5 + +# Alternative: Use OpenAI or Anthropic if you have their API keys +python scripts/generate_jsonld.py --ai-service openai --csv datasets.csv +python scripts/generate_jsonld.py --ai-service anthropic --csv datasets.csv +``` + +### Options +- `--csv`: Path to CSV file (default: `datasets.csv`) +- `--output-dir`: Output directory for JSON-LD files (default: `data/objects/summoned/generated`) +- `--ai-service`: Choose `nrp` (default), `openai`, or `anthropic` (optional - defaults to `nrp`) +- `--api-key`: API key (or use environment variable) +- `--model`: Model name (optional, uses defaults) + - NRP default: `qwen3` (other options: `llama3-sdsc`, `gpt-oss`, `gorilla`, `olmo`, `gemma3`, `kimi`, etc.) + - OpenAI default: `gpt-4` + - Anthropic default: `claude-3-5-sonnet-20241022` +- `--limit`: Limit number of datasets to process +- `--test-url`: Test with a single URL instead of CSV + +## Output + +Generated JSON-LD files are saved to the output directory with filenames like: +`DatasetName_hash.jsonld` + +## Workflow + +1. Script reads the CSV file +2. Filters datasets where `hasJSONLD?` is `FALSE`, `#ERROR!`, or empty +3. For each dataset: + - Fetches the webpage + - Uses AI to detect datasets and extract metadata + - Generates JSON-LD using the extracted metadata + - Saves the JSON-LD file + diff --git a/scripts/generate_jsonld.py b/scripts/generate_jsonld.py new file mode 100644 index 0000000..47cc795 --- /dev/null +++ b/scripts/generate_jsonld.py @@ -0,0 +1,410 @@ +#!/usr/bin/env python3 +""" +Generate JSON-LD descriptions for datasets from Google Sheet using AI. + +This script reads a CSV export of the Google Sheet, processes datasets +that need JSON-LD (hasJSONLD? = FALSE or #ERROR!), and generates +JSON-LD descriptions using AI prompts. +""" + +import csv +import json +import os +import sys +import argparse +from pathlib import Path +from typing import Dict, List, Optional +from urllib.parse import urlparse + +# Try to load .env file if python-dotenv is available +try: + from dotenv import load_dotenv + load_dotenv() +except ImportError: + pass # dotenv is optional + +# Try to import AI client libraries (user needs to install) +try: + import openai + OPENAI_AVAILABLE = True +except ImportError: + OPENAI_AVAILABLE = False + +try: + import anthropic + ANTHROPIC_AVAILABLE = True +except ImportError: + ANTHROPIC_AVAILABLE = False + +# Standard libraries +import requests +from bs4 import BeautifulSoup + + +class AIClient: + """Abstract base class for AI clients.""" + + def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict: + """Detect datasets on a webpage and extract metadata.""" + raise NotImplementedError + + def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: + """Generate JSON-LD from metadata.""" + raise NotImplementedError + + +class OpenAIClient(AIClient): + """OpenAI API client.""" + + def __init__(self, api_key: str, model: str = "gpt-4", base_url: str = None): + self.client = openai.OpenAI(api_key=api_key, base_url=base_url) + self.model = model + + def _call_api(self, prompt: str, system_prompt: str = None) -> str: + """Make API call to OpenAI.""" + messages = [] + if system_prompt: + messages.append({"role": "system", "content": system_prompt}) + messages.append({"role": "user", "content": prompt}) + + print(f" 📤 Sending request to API (this may take a minute)...") + response = self.client.chat.completions.create( + model=self.model, + messages=messages, + temperature=0.3, + timeout=120.0 # 2 minute timeout + ) + print(f" ✓ Received response") + return response.choices[0].message.content + + def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict: + """Detect datasets using OpenAI.""" + prompt_path = Path(__file__).parent.parent / "prompts" / "dataset-detection-prompt.txt" + with open(prompt_path, 'r', encoding='utf-8') as f: + prompt_template = f.read() + + # Limit content to 5000 chars to speed up API calls + limited_content = webpage_content[:5000] + prompt = prompt_template.format( + URL=url, + CONTENT=limited_content, + DATASET_NAME=context.get('Dataset Name', ''), + GROUP=context.get('Group', ''), + DESCRIPTION=context.get('Description', '') + ) + + response = self._call_api(prompt) + try: + return json.loads(response) + except json.JSONDecodeError: + return {"raw_response": response, "error": "Failed to parse JSON"} + + def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: + """Generate JSON-LD using OpenAI.""" + prompt_path = Path(__file__).parent.parent / "prompts" / "jsonld-generation-prompt.txt" + with open(prompt_path, 'r', encoding='utf-8') as f: + prompt_template = f.read() + + prompt = prompt_template.format( + DATASET_NAME=metadata.get('name', ''), + URL=metadata.get('url', ''), + DESCRIPTION=metadata.get('description', ''), + GROUP=metadata.get('group', ''), + CREATOR=metadata.get('creator', ''), + PROVIDER=metadata.get('provider', ''), + PUBLISHER=metadata.get('publisher', ''), + KEYWORDS=metadata.get('keywords', ''), + SPATIAL_COVERAGE=metadata.get('spatial_coverage', ''), + EXTRACTED_METADATA=json.dumps(metadata.get('extracted', {}), indent=2), + EXAMPLE_JSONLD=example_jsonld[:2000] # Limit example size + ) + + response = self._call_api(prompt) + # Try to extract JSON from response + try: + # Look for JSON block in response + if '{' in response: + start = response.find('{') + end = response.rfind('}') + 1 + json_str = response[start:end] + # Validate it's valid JSON + json.loads(json_str) + return json_str + return response + except (json.JSONDecodeError, ValueError): + return response + + +class NRPClient(OpenAIClient): + """National Research Platform (NRP) LLM client - OpenAI-compatible.""" + + def __init__(self, api_key: str, model: str = "meta-llama/Llama-3.1-70B-Instruct"): + # NRP uses OpenAI-compatible API at ellm.nrp-nautilus.io + base_url = "https://ellm.nrp-nautilus.io/v1" + super().__init__(api_key=api_key, model=model, base_url=base_url) + print(f"Using NRP LLM endpoint: {base_url}") + print(f"Using model: {model}") + + +class AnthropicClient(AIClient): + """Anthropic (Claude) API client.""" + + def __init__(self, api_key: str, model: str = "claude-3-5-sonnet-20241022"): + self.client = anthropic.Anthropic(api_key=api_key) + self.model = model + + def _call_api(self, prompt: str, system_prompt: str = None) -> str: + """Make API call to Anthropic.""" + response = self.client.messages.create( + model=self.model, + max_tokens=4096, + system=system_prompt or "You are a helpful assistant that generates structured data.", + messages=[{"role": "user", "content": prompt}] + ) + return response.content[0].text + + def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict: + """Detect datasets using Anthropic.""" + prompt_path = Path(__file__).parent.parent / "prompts" / "dataset-detection-prompt.txt" + with open(prompt_path, 'r', encoding='utf-8') as f: + prompt_template = f.read() + + prompt = prompt_template.format( + URL=url, + CONTENT=webpage_content[:10000], + DATASET_NAME=context.get('Dataset Name', ''), + GROUP=context.get('Group', ''), + DESCRIPTION=context.get('Description', '') + ) + + response = self._call_api(prompt) + try: + return json.loads(response) + except json.JSONDecodeError: + return {"raw_response": response, "error": "Failed to parse JSON"} + + def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: + """Generate JSON-LD using Anthropic.""" + prompt_path = Path(__file__).parent.parent / "prompts" / "jsonld-generation-prompt.txt" + with open(prompt_path, 'r', encoding='utf-8') as f: + prompt_template = f.read() + + prompt = prompt_template.format( + DATASET_NAME=metadata.get('name', ''), + URL=metadata.get('url', ''), + DESCRIPTION=metadata.get('description', ''), + GROUP=metadata.get('group', ''), + CREATOR=metadata.get('creator', ''), + PROVIDER=metadata.get('provider', ''), + PUBLISHER=metadata.get('publisher', ''), + KEYWORDS=metadata.get('keywords', ''), + SPATIAL_COVERAGE=metadata.get('spatial_coverage', ''), + EXTRACTED_METADATA=json.dumps(metadata.get('extracted', {}), indent=2), + EXAMPLE_JSONLD=example_jsonld[:2000] + ) + + response = self._call_api(prompt) + try: + if '{' in response: + start = response.find('{') + end = response.rfind('}') + 1 + json_str = response[start:end] + json.loads(json_str) + return json_str + return response + except (json.JSONDecodeError, ValueError): + return response + + +def fetch_webpage(url: str) -> Optional[str]: + """Fetch webpage content.""" + try: + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' + } + response = requests.get(url, headers=headers, timeout=30) + response.raise_for_status() + return response.text + except Exception as e: + print(f"Error fetching {url}: {e}") + return None + + +def extract_text_content(html: str) -> str: + """Extract text content from HTML.""" + try: + soup = BeautifulSoup(html, 'html.parser') + # Remove script and style elements + for script in soup(["script", "style"]): + script.decompose() + return soup.get_text(separator=' ', strip=True) + except Exception as e: + print(f"Error parsing HTML: {e}") + return html[:10000] # Return first 10k chars if parsing fails + + +def load_example_jsonld() -> str: + """Load an example JSON-LD file for reference.""" + example_path = Path(__file__).parent.parent / "data" / "objects" / "summoned" / "gpp" / "2d78c4242a108f70ea2c0604964dc095b34bfd7b.jsonld" + if example_path.exists(): + with open(example_path, 'r', encoding='utf-8') as f: + return f.read() + return "" + + +def read_csv(csv_path: str) -> List[Dict]: + """Read the datasets CSV file.""" + datasets = [] + with open(csv_path, 'r', encoding='utf-8') as f: + reader = csv.DictReader(f) + for row in reader: + datasets.append(row) + return datasets + + +def save_jsonld(jsonld_str: str, output_dir: Path, dataset_name: str, url: str) -> Path: + """Save JSON-LD to file.""" + # Create safe filename from dataset name + safe_name = "".join(c for c in dataset_name if c.isalnum() or c in (' ', '-', '_')).rstrip() + safe_name = safe_name.replace(' ', '_')[:50] # Limit length + + # Use URL hash as fallback + import hashlib + url_hash = hashlib.sha1(url.encode()).hexdigest()[:8] + + filename = f"{safe_name}_{url_hash}.jsonld" + output_path = output_dir / filename + + output_dir.mkdir(parents=True, exist_ok=True) + with open(output_path, 'w', encoding='utf-8') as f: + f.write(jsonld_str) + + return output_path + + +def main(): + parser = argparse.ArgumentParser(description='Generate JSON-LD for datasets') + parser.add_argument('--csv', default='datasets.csv', help='Path to CSV file') + parser.add_argument('--output-dir', default='data/objects/summoned/generated', help='Output directory for JSON-LD files') + parser.add_argument('--ai-service', choices=['openai', 'anthropic', 'nrp'], default='nrp', help='AI service to use (default: nrp)') + parser.add_argument('--api-key', help='API key (or set environment variable)') + parser.add_argument('--model', help='Model name (optional)') + parser.add_argument('--limit', type=int, help='Limit number of datasets to process') + parser.add_argument('--test-url', help='Test with a single URL instead of CSV') + + args = parser.parse_args() + + # Initialize AI client + api_key = args.api_key or os.getenv(f"{args.ai_service.upper()}_API_KEY") or os.getenv("NRP_API_KEY") + if not api_key: + print(f"Error: API key required. Set {args.ai_service.upper()}_API_KEY environment variable or use --api-key") + sys.exit(1) + + if args.ai_service == 'openai': + if not OPENAI_AVAILABLE: + print("Error: openai package not installed. Run: pip install openai") + sys.exit(1) + client = OpenAIClient(api_key, args.model or "gpt-4") + elif args.ai_service == 'nrp': + if not OPENAI_AVAILABLE: + print("Error: openai package not installed. Run: pip install openai") + sys.exit(1) + # Default NRP models: qwen3, llama3-sdsc, gpt-oss, etc. + # Available models: qwen3, llama3-sdsc, gpt-oss, gorilla, olmo, gemma3, kimi, etc. + client = NRPClient(api_key, args.model or "qwen3") + elif args.ai_service == 'anthropic': + if not ANTHROPIC_AVAILABLE: + print("Error: anthropic package not installed. Run: pip install anthropic") + sys.exit(1) + client = AnthropicClient(api_key, args.model or "claude-3-5-sonnet-20241022") + + output_dir = Path(args.output_dir) + example_jsonld = load_example_jsonld() + + # Test mode with single URL + if args.test_url: + print(f"Testing with URL: {args.test_url}") + html = fetch_webpage(args.test_url) + if html: + content = extract_text_content(html) + context = {'Dataset Name': 'Test Dataset', 'Group': 'test', 'Description': ''} + result = client.detect_datasets(args.test_url, content, context) + print("\n=== Detection Result ===") + print(json.dumps(result, indent=2)) + return + + # Process CSV + datasets = read_csv(args.csv) + print(f"Found {len(datasets)} datasets in CSV") + + # Filter datasets that need JSON-LD + to_process = [ + d for d in datasets + if d.get('hasJSONLD?', '').upper() in ('FALSE', '#ERROR!', '') + and d.get('Dataset Webpage URL', '').strip() + ] + + if args.limit: + to_process = to_process[:args.limit] + + print(f"Processing {len(to_process)} datasets that need JSON-LD") + + for i, dataset in enumerate(to_process, 1): + url = dataset.get('Dataset Webpage URL', '').strip() + name = dataset.get('Dataset Name', 'Unknown') + + if not url: + print(f"[{i}/{len(to_process)}] Skipping {name}: No URL") + continue + + print(f"\n[{i}/{len(to_process)}] Processing: {name}") + print(f" URL: {url}") + + # Fetch webpage + print(" 🌐 Fetching webpage...") + html = fetch_webpage(url) + if not html: + print(f" ⚠️ Failed to fetch webpage") + continue + + content = extract_text_content(html) + print(f" ✓ Fetched {len(content)} characters") + + # Detect datasets + print(" 🔍 Detecting datasets with AI...") + detection_result = client.detect_datasets(url, content, dataset) + print(f" ✓ Detection complete") + + # Prepare metadata + metadata = { + 'name': name, + 'url': url, + 'description': dataset.get('Description', ''), + 'group': dataset.get('Group', ''), + 'creator': dataset.get('Creator', ''), + 'provider': dataset.get('Provider', ''), + 'publisher': dataset.get('Publisher', ''), + 'keywords': dataset.get('Keywords', ''), + 'spatial_coverage': f"{dataset.get('box_lon_min', '')},{dataset.get('box_lat_min', '')},{dataset.get('box_lon_max', '')},{dataset.get('box_lat_max', '')}" if dataset.get('box_lon_min') else '', + 'extracted': detection_result + } + + # Generate JSON-LD + print(" ✨ Generating JSON-LD...") + jsonld = client.generate_jsonld(metadata, example_jsonld) + + # Validate JSON + try: + json.loads(jsonld) + print(" ✓ Valid JSON") + except json.JSONDecodeError as e: + print(f" ⚠️ Warning: Generated JSON may be invalid: {e}") + + # Save + output_path = save_jsonld(jsonld, output_dir, name, url) + print(f" 💾 Saved to: {output_path}") + + +if __name__ == '__main__': + main() + diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 0000000..925cf35 --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1,18 @@ +# Requirements for JSON-LD generation script +# Install with: pip install -r requirements.txt + +# AI API clients (install at least one) +openai>=1.0.0 +anthropic>=0.18.0 + +# Web scraping and parsing +requests>=2.31.0 +beautifulsoup4>=4.12.0 + +# Environment variable management +python-dotenv>=1.0.0 + +# Optional: for better HTML parsing +lxml>=4.9.0 + + From 62460b5e63e698fc40483a772685bd8327f8efe6 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Tue, 16 Dec 2025 14:17:03 -0600 Subject: [PATCH 02/58] removed unnecessary comment --- scripts/generate_jsonld.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/scripts/generate_jsonld.py b/scripts/generate_jsonld.py index 47cc795..89e1a6e 100644 --- a/scripts/generate_jsonld.py +++ b/scripts/generate_jsonld.py @@ -67,14 +67,14 @@ def _call_api(self, prompt: str, system_prompt: str = None) -> str: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": prompt}) - print(f" 📤 Sending request to API (this may take a minute)...") + print(f" Sending request to API (this may take a minute)...") response = self.client.chat.completions.create( model=self.model, messages=messages, temperature=0.3, timeout=120.0 # 2 minute timeout ) - print(f" ✓ Received response") + print(f" Received response") return response.choices[0].message.content def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict: @@ -361,19 +361,19 @@ def main(): print(f" URL: {url}") # Fetch webpage - print(" 🌐 Fetching webpage...") + print(" Fetching webpage...") html = fetch_webpage(url) if not html: - print(f" ⚠️ Failed to fetch webpage") + print(f" Warning: Failed to fetch webpage") continue content = extract_text_content(html) - print(f" ✓ Fetched {len(content)} characters") + print(f" Fetched {len(content)} characters") # Detect datasets - print(" 🔍 Detecting datasets with AI...") + print(" Detecting datasets with AI...") detection_result = client.detect_datasets(url, content, dataset) - print(f" ✓ Detection complete") + print(f" Detection complete") # Prepare metadata metadata = { @@ -390,19 +390,19 @@ def main(): } # Generate JSON-LD - print(" ✨ Generating JSON-LD...") + print(" Generating JSON-LD...") jsonld = client.generate_jsonld(metadata, example_jsonld) # Validate JSON try: json.loads(jsonld) - print(" ✓ Valid JSON") + print(" Valid JSON") except json.JSONDecodeError as e: - print(f" ⚠️ Warning: Generated JSON may be invalid: {e}") + print(f" Warning: Generated JSON may be invalid: {e}") # Save output_path = save_jsonld(jsonld, output_dir, name, url) - print(f" 💾 Saved to: {output_path}") + print(f" Saved to: {output_path}") if __name__ == '__main__': From 19891e8e9286011fe0250ac9a359c3419d421541 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Tue, 16 Dec 2025 14:27:21 -0600 Subject: [PATCH 03/58] Fixed the error caused by the curly braces in the prompte template --- prompts/jsonld-generation-prompt.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prompts/jsonld-generation-prompt.txt b/prompts/jsonld-generation-prompt.txt index 7324678..e2092ec 100644 --- a/prompts/jsonld-generation-prompt.txt +++ b/prompts/jsonld-generation-prompt.txt @@ -19,7 +19,7 @@ You are generating a JSON-LD (JSON for Linking Data) description for a scientifi **Requirements**: 1. Use Schema.org vocabulary (https://schema.org/) -2. Set @context to `{"@vocab": "https://schema.org/"}` +2. Set @context to `{{"@vocab": "https://schema.org/"}}` 3. Set @type to "Dataset" 4. Include @id with the dataset URL or identifier 5. Include all available metadata fields From 8c18bc4ae7de0991ee3e34a9abc4274499e2f4c8 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Tue, 16 Dec 2025 14:44:11 -0600 Subject: [PATCH 04/58] Update for correct bounding box generation --- prompts/jsonld-generation-prompt.txt | 8 +++- scripts/generate_jsonld.py | 57 ++++++++++++++++++++++++++-- 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/prompts/jsonld-generation-prompt.txt b/prompts/jsonld-generation-prompt.txt index e2092ec..ad1851d 100644 --- a/prompts/jsonld-generation-prompt.txt +++ b/prompts/jsonld-generation-prompt.txt @@ -25,8 +25,12 @@ You are generating a JSON-LD (JSON for Linking Data) description for a scientifi 5. Include all available metadata fields 6. For creators, use Person or Organization types with proper structure 7. Include distribution information if download links are available -8. Add temporalCoverage if time period is known -9. Add spatial coverage if geographic bounds are provided +8. Add temporalCoverage if time period is known (format: "YYYY-MM-DD/YYYY-MM-DD") +9. Add spatialCoverage if geographic bounds are provided: + - Use Place with geo containing GeoShape + - The box format MUST be: "west,south east,north" (comma-separated pairs, space between pairs) + - Example: For coordinates 20,-40,50,10 use box: "20,-40 50,10" (NOT "20 -40 50 10") + - Format: {"@type": "Place", "geo": {"@type": "GeoShape", "box": "west,south east,north"}} 10. Include license and access information 11. Use proper JSON-LD structure (arrays for multiple values, nested objects where appropriate) diff --git a/scripts/generate_jsonld.py b/scripts/generate_jsonld.py index 89e1a6e..1700682 100644 --- a/scripts/generate_jsonld.py +++ b/scripts/generate_jsonld.py @@ -128,11 +128,34 @@ def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: end = response.rfind('}') + 1 json_str = response[start:end] # Validate it's valid JSON - json.loads(json_str) - return json_str + json_data = json.loads(json_str) + # Fix spatial coverage format if needed + json_data = self._fix_spatial_coverage(json_data) + return json.dumps(json_data, indent=2) return response except (json.JSONDecodeError, ValueError): return response + + def _fix_spatial_coverage(self, data: Dict) -> Dict: + """Fix spatial coverage box format to match Schema.org standard.""" + if isinstance(data, dict) and 'spatialCoverage' in data: + spatial = data['spatialCoverage'] + if isinstance(spatial, dict) and 'geo' in spatial: + geo = spatial['geo'] + if isinstance(geo, dict) and 'box' in geo: + box = geo['box'] + if isinstance(box, str): + # Fix format: "20 -40 50 10" -> "20,-40 50,10" + # Check if it's space-separated (wrong format) + parts = box.split() + if len(parts) == 4 and ',' not in box: + try: + # Convert to proper format: "west,south east,north" + west, south, east, north = map(float, parts) + geo['box'] = f"{west},{south} {east},{north}" + except (ValueError, TypeError): + pass # If conversion fails, leave as is + return data class NRPClient(OpenAIClient): @@ -209,11 +232,37 @@ def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: start = response.find('{') end = response.rfind('}') + 1 json_str = response[start:end] - json.loads(json_str) - return json_str + json_data = json.loads(json_str) + # Fix spatial coverage format if needed + json_data = self._fix_spatial_coverage(json_data) + return json.dumps(json_data, indent=2) return response except (json.JSONDecodeError, ValueError): return response + + def _fix_spatial_coverage(self, data: Dict) -> Dict: + """Fix spatial coverage box format to match Schema.org standard. + + Converts "20 -40 50 10" to "20,-40 50,10" format. + """ + if isinstance(data, dict) and 'spatialCoverage' in data: + spatial = data['spatialCoverage'] + if isinstance(spatial, dict) and 'geo' in spatial: + geo = spatial['geo'] + if isinstance(geo, dict) and 'box' in geo: + box = geo['box'] + if isinstance(box, str): + # Fix format: "20 -40 50 10" -> "20,-40 50,10" + # Check if it's space-separated without commas (wrong format) + parts = box.split() + if len(parts) == 4 and ',' not in box: + try: + # Convert to proper format: "west,south east,north" + west, south, east, north = map(float, parts) + geo['box'] = f"{west},{south} {east},{north}" + except (ValueError, TypeError): + pass # If conversion fails, leave as is + return data def fetch_webpage(url: str) -> Optional[str]: From fb1d201b0cf2af1abd4a5efa9ed4289c0f4c5413 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Tue, 16 Dec 2025 14:50:22 -0600 Subject: [PATCH 05/58] Fixed another curly braces errors --- prompts/jsonld-generation-prompt.txt | 2 +- scripts/generate_jsonld.py | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/prompts/jsonld-generation-prompt.txt b/prompts/jsonld-generation-prompt.txt index ad1851d..2a83b19 100644 --- a/prompts/jsonld-generation-prompt.txt +++ b/prompts/jsonld-generation-prompt.txt @@ -30,7 +30,7 @@ You are generating a JSON-LD (JSON for Linking Data) description for a scientifi - Use Place with geo containing GeoShape - The box format MUST be: "west,south east,north" (comma-separated pairs, space between pairs) - Example: For coordinates 20,-40,50,10 use box: "20,-40 50,10" (NOT "20 -40 50 10") - - Format: {"@type": "Place", "geo": {"@type": "GeoShape", "box": "west,south east,north"}} + - Format: {{"@type": "Place", "geo": {{"@type": "GeoShape", "box": "west,south east,north"}}}} 10. Include license and access information 11. Use proper JSON-LD structure (arrays for multiple values, nested objects where appropriate) diff --git a/scripts/generate_jsonld.py b/scripts/generate_jsonld.py index 1700682..999eb7b 100644 --- a/scripts/generate_jsonld.py +++ b/scripts/generate_jsonld.py @@ -105,6 +105,9 @@ def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: with open(prompt_path, 'r', encoding='utf-8') as f: prompt_template = f.read() + # Escape curly braces in example JSON-LD to prevent format errors + escaped_example = example_jsonld[:2000].replace('{', '{{').replace('}', '}}') + prompt = prompt_template.format( DATASET_NAME=metadata.get('name', ''), URL=metadata.get('url', ''), @@ -115,8 +118,8 @@ def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: PUBLISHER=metadata.get('publisher', ''), KEYWORDS=metadata.get('keywords', ''), SPATIAL_COVERAGE=metadata.get('spatial_coverage', ''), - EXTRACTED_METADATA=json.dumps(metadata.get('extracted', {}), indent=2), - EXAMPLE_JSONLD=example_jsonld[:2000] # Limit example size + EXTRACTED_METADATA=json.dumps(metadata.get('extracted', {}), indent=2).replace('{', '{{').replace('}', '}}'), + EXAMPLE_JSONLD=escaped_example ) response = self._call_api(prompt) @@ -212,6 +215,9 @@ def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: with open(prompt_path, 'r', encoding='utf-8') as f: prompt_template = f.read() + # Escape curly braces in example JSON-LD to prevent format errors + escaped_example = example_jsonld[:2000].replace('{', '{{').replace('}', '}}') + prompt = prompt_template.format( DATASET_NAME=metadata.get('name', ''), URL=metadata.get('url', ''), @@ -222,8 +228,8 @@ def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: PUBLISHER=metadata.get('publisher', ''), KEYWORDS=metadata.get('keywords', ''), SPATIAL_COVERAGE=metadata.get('spatial_coverage', ''), - EXTRACTED_METADATA=json.dumps(metadata.get('extracted', {}), indent=2), - EXAMPLE_JSONLD=example_jsonld[:2000] + EXTRACTED_METADATA=json.dumps(metadata.get('extracted', {}), indent=2).replace('{', '{{').replace('}', '}}'), + EXAMPLE_JSONLD=escaped_example ) response = self._call_api(prompt) From c58123fd8e0c5eddf5138f123a06b24d6902c782 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Tue, 16 Dec 2025 15:05:26 -0600 Subject: [PATCH 06/58] added error handling for time out --- scripts/generate_jsonld.py | 224 ++++++++++++++++++++++++++----------- 1 file changed, 157 insertions(+), 67 deletions(-) diff --git a/scripts/generate_jsonld.py b/scripts/generate_jsonld.py index 999eb7b..5dbdd63 100644 --- a/scripts/generate_jsonld.py +++ b/scripts/generate_jsonld.py @@ -67,14 +67,20 @@ def _call_api(self, prompt: str, system_prompt: str = None) -> str: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": prompt}) - print(f" Sending request to API (this may take a minute)...") - response = self.client.chat.completions.create( - model=self.model, - messages=messages, - temperature=0.3, - timeout=120.0 # 2 minute timeout - ) - print(f" Received response") + print(f" Sending request to API (this may take 1-3 minutes)...") + try: + response = self.client.chat.completions.create( + model=self.model, + messages=messages, + temperature=0.3, + timeout=180.0 # 3 minute timeout + ) + print(f" Received response") + except Exception as e: + error_msg = str(e).lower() + if "timeout" in error_msg or "timed out" in error_msg: + raise TimeoutError("API request timed out") + raise return response.choices[0].message.content def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict: @@ -93,11 +99,21 @@ def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict DESCRIPTION=context.get('Description', '') ) - response = self._call_api(prompt) - try: - return json.loads(response) - except json.JSONDecodeError: - return {"raw_response": response, "error": "Failed to parse JSON"} + # Retry logic for timeouts + max_retries = 2 + for attempt in range(max_retries): + try: + response = self._call_api(prompt) + try: + return json.loads(response) + except json.JSONDecodeError: + return {"raw_response": response, "error": "Failed to parse JSON"} + except TimeoutError as e: + if attempt < max_retries - 1: + print(f" Timeout occurred, retrying ({attempt + 1}/{max_retries})...") + continue + else: + raise def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: """Generate JSON-LD using OpenAI.""" @@ -122,22 +138,34 @@ def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: EXAMPLE_JSONLD=escaped_example ) - response = self._call_api(prompt) - # Try to extract JSON from response - try: - # Look for JSON block in response - if '{' in response: - start = response.find('{') - end = response.rfind('}') + 1 - json_str = response[start:end] - # Validate it's valid JSON - json_data = json.loads(json_str) - # Fix spatial coverage format if needed - json_data = self._fix_spatial_coverage(json_data) - return json.dumps(json_data, indent=2) - return response - except (json.JSONDecodeError, ValueError): - return response + # Retry logic for timeouts + max_retries = 2 + for attempt in range(max_retries): + try: + response = self._call_api(prompt) + # Try to extract JSON from response + try: + # Look for JSON block in response + if '{' in response: + start = response.find('{') + end = response.rfind('}') + 1 + json_str = response[start:end] + # Validate it's valid JSON + json_data = json.loads(json_str) + # Fix spatial coverage format if needed + json_data = self._fix_spatial_coverage(json_data) + return json.dumps(json_data, indent=2) + return response + except (json.JSONDecodeError, ValueError): + return response + except TimeoutError as e: + if attempt < max_retries - 1: + print(f" Timeout occurred, retrying ({attempt + 1}/{max_retries})...") + continue + else: + raise + except Exception as e: + raise def _fix_spatial_coverage(self, data: Dict) -> Dict: """Fix spatial coverage box format to match Schema.org standard.""" @@ -181,13 +209,22 @@ def __init__(self, api_key: str, model: str = "claude-3-5-sonnet-20241022"): def _call_api(self, prompt: str, system_prompt: str = None) -> str: """Make API call to Anthropic.""" - response = self.client.messages.create( - model=self.model, - max_tokens=4096, - system=system_prompt or "You are a helpful assistant that generates structured data.", - messages=[{"role": "user", "content": prompt}] - ) - return response.content[0].text + print(f" Sending request to API (this may take 1-3 minutes)...") + try: + response = self.client.messages.create( + model=self.model, + max_tokens=4096, + system=system_prompt or "You are a helpful assistant that generates structured data.", + messages=[{"role": "user", "content": prompt}], + timeout=180.0 # 3 minute timeout + ) + print(f" Received response") + return response.content[0].text + except Exception as e: + error_msg = str(e).lower() + if "timeout" in error_msg or "timed out" in error_msg: + raise TimeoutError("API request timed out") + raise def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict: """Detect datasets using Anthropic.""" @@ -203,11 +240,21 @@ def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict DESCRIPTION=context.get('Description', '') ) - response = self._call_api(prompt) - try: - return json.loads(response) - except json.JSONDecodeError: - return {"raw_response": response, "error": "Failed to parse JSON"} + # Retry logic for timeouts + max_retries = 2 + for attempt in range(max_retries): + try: + response = self._call_api(prompt) + try: + return json.loads(response) + except json.JSONDecodeError: + return {"raw_response": response, "error": "Failed to parse JSON"} + except TimeoutError as e: + if attempt < max_retries - 1: + print(f" Timeout occurred, retrying ({attempt + 1}/{max_retries})...") + continue + else: + raise def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: """Generate JSON-LD using Anthropic.""" @@ -232,19 +279,31 @@ def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: EXAMPLE_JSONLD=escaped_example ) - response = self._call_api(prompt) - try: - if '{' in response: - start = response.find('{') - end = response.rfind('}') + 1 - json_str = response[start:end] - json_data = json.loads(json_str) - # Fix spatial coverage format if needed - json_data = self._fix_spatial_coverage(json_data) - return json.dumps(json_data, indent=2) - return response - except (json.JSONDecodeError, ValueError): - return response + # Retry logic for timeouts + max_retries = 2 + for attempt in range(max_retries): + try: + response = self._call_api(prompt) + try: + if '{' in response: + start = response.find('{') + end = response.rfind('}') + 1 + json_str = response[start:end] + json_data = json.loads(json_str) + # Fix spatial coverage format if needed + json_data = self._fix_spatial_coverage(json_data) + return json.dumps(json_data, indent=2) + return response + except (json.JSONDecodeError, ValueError): + return response + except TimeoutError as e: + if attempt < max_retries - 1: + print(f" Timeout occurred, retrying ({attempt + 1}/{max_retries})...") + continue + else: + raise + except Exception as e: + raise def _fix_spatial_coverage(self, data: Dict) -> Dict: """Fix spatial coverage box format to match Schema.org standard. @@ -404,6 +463,8 @@ def main(): print(f"Processing {len(to_process)} datasets that need JSON-LD") + timed_out_urls = [] + for i, dataset in enumerate(to_process, 1): url = dataset.get('Dataset Webpage URL', '').strip() name = dataset.get('Dataset Name', 'Unknown') @@ -427,8 +488,16 @@ def main(): # Detect datasets print(" Detecting datasets with AI...") - detection_result = client.detect_datasets(url, content, dataset) - print(f" Detection complete") + try: + detection_result = client.detect_datasets(url, content, dataset) + print(f" Detection complete") + except TimeoutError: + print(f" Error: Timed out after 2 retries. Skipping this dataset.") + timed_out_urls.append({'name': name, 'url': url}) + continue + except Exception as e: + print(f" Error during detection: {e}") + continue # Prepare metadata metadata = { @@ -446,18 +515,39 @@ def main(): # Generate JSON-LD print(" Generating JSON-LD...") - jsonld = client.generate_jsonld(metadata, example_jsonld) - - # Validate JSON try: - json.loads(jsonld) - print(" Valid JSON") - except json.JSONDecodeError as e: - print(f" Warning: Generated JSON may be invalid: {e}") - - # Save - output_path = save_jsonld(jsonld, output_dir, name, url) - print(f" Saved to: {output_path}") + jsonld = client.generate_jsonld(metadata, example_jsonld) + + # Validate JSON + try: + json.loads(jsonld) + print(" Valid JSON") + except json.JSONDecodeError as e: + print(f" Warning: Generated JSON may be invalid: {e}") + + # Save + output_path = save_jsonld(jsonld, output_dir, name, url) + print(f" Saved to: {output_path}") + except TimeoutError: + print(f" Error: Timed out after 2 retries. Skipping this dataset.") + timed_out_urls.append({'name': name, 'url': url}) + continue + except Exception as e: + print(f" Error: {e}") + continue + + # Print summary of timed out URLs + if timed_out_urls: + print(f"\n{'='*60}") + print(f"Summary: {len(timed_out_urls)} dataset(s) timed out:") + print(f"{'='*60}") + for item in timed_out_urls: + print(f" - {item['name']}: {item['url']}") + print(f"{'='*60}") + else: + print(f"\n{'='*60}") + print("All datasets processed successfully!") + print(f"{'='*60}") if __name__ == '__main__': From a31f7221cf173c00d6cb8ceebaed4f4ffec19c57 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Tue, 16 Dec 2025 15:27:51 -0600 Subject: [PATCH 07/58] updated error handling --- scripts/generate_jsonld.py | 508 ++++++++++++++++++++----------------- 1 file changed, 271 insertions(+), 237 deletions(-) diff --git a/scripts/generate_jsonld.py b/scripts/generate_jsonld.py index 5dbdd63..70f7a39 100644 --- a/scripts/generate_jsonld.py +++ b/scripts/generate_jsonld.py @@ -8,13 +8,14 @@ """ import csv +import hashlib import json import os import sys import argparse +import threading from pathlib import Path from typing import Dict, List, Optional -from urllib.parse import urlparse # Try to load .env file if python-dotenv is available try: @@ -40,6 +41,42 @@ import requests from bs4 import BeautifulSoup +# Constants +API_TIMEOUT_SECONDS = 180.0 # 3 minutes +MAX_RETRIES = 2 +CONTENT_LIMIT_DETECTION = 5000 # Characters for detection prompt +CONTENT_LIMIT_ANTHROPIC = 10000 # Characters for Anthropic detection +EXAMPLE_JSONLD_LIMIT = 2000 # Characters for example JSON-LD in prompt +WEBPAGE_TIMEOUT = 30 # Seconds for webpage fetching +FILENAME_MAX_LENGTH = 50 # Maximum length for dataset name in filename +URL_HASH_LENGTH = 8 # Length of URL hash in filename +HTML_FALLBACK_LIMIT = 10000 # Characters to return if HTML parsing fails + +# Server error codes to detect +SERVER_ERROR_CODES = ['500', '502', '503', '504', 'internal server error'] + +# CSV field names +CSV_FIELDS = { + 'HAS_JSONLD': 'hasJSONLD?', + 'WEBPAGE_URL': 'Dataset Webpage URL', + 'NAME': 'Dataset Name', + 'DESCRIPTION': 'Description', + 'GROUP': 'Group', + 'CREATOR': 'Creator', + 'PROVIDER': 'Provider', + 'PUBLISHER': 'Publisher', + 'KEYWORDS': 'Keywords', + 'BOX_LON_MIN': 'box_lon_min', + 'BOX_LAT_MIN': 'box_lat_min', + 'BOX_LON_MAX': 'box_lon_max', + 'BOX_LAT_MAX': 'box_lat_max', +} + +# Project root path (parent of scripts directory) +PROJECT_ROOT = Path(__file__).parent.parent +PROMPTS_DIR = PROJECT_ROOT / "prompts" +DATA_DIR = PROJECT_ROOT / "data" / "objects" / "summoned" + class AIClient: """Abstract base class for AI clients.""" @@ -51,6 +88,135 @@ def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: """Generate JSON-LD from metadata.""" raise NotImplementedError + + def _retry_with_timeout(self, func, *args, **kwargs): + """Helper method to retry a function call with timeout handling.""" + for attempt in range(MAX_RETRIES): + try: + return func(*args, **kwargs) + except TimeoutError as e: + if attempt < MAX_RETRIES - 1: + print(f" Timeout occurred, retrying ({attempt + 1}/{MAX_RETRIES})...") + continue + else: + raise + + def _extract_json_from_response(self, response: str) -> str: + """Extract and validate JSON from API response.""" + try: + if '{' in response: + start = response.find('{') + end = response.rfind('}') + 1 + json_str = response[start:end] + json_data = json.loads(json_str) + # Fix spatial coverage format if needed + json_data = self._fix_spatial_coverage(json_data) + return json.dumps(json_data, indent=2) + return response + except (json.JSONDecodeError, ValueError): + return response + + def _fix_spatial_coverage(self, data: Dict) -> Dict: + """Fix spatial coverage box format to match Schema.org standard. + + Converts "20 -40 50 10" to "20,-40 50,10" format. + """ + if isinstance(data, dict) and 'spatialCoverage' in data: + spatial = data['spatialCoverage'] + if isinstance(spatial, dict) and 'geo' in spatial: + geo = spatial['geo'] + if isinstance(geo, dict) and 'box' in geo: + box = geo['box'] + if isinstance(box, str): + # Fix format: "20 -40 50 10" -> "20,-40 50,10" + parts = box.split() + if len(parts) == 4 and ',' not in box: + try: + # Convert to proper format: "west,south east,north" + west, south, east, north = map(float, parts) + geo['box'] = f"{west},{south} {east},{north}" + except (ValueError, TypeError): + pass # If conversion fails, leave as is + return data + + def _is_server_error(self, error: Exception) -> bool: + """Check if an exception represents a server error.""" + error_str = str(error).lower() + return any(code in error_str for code in SERVER_ERROR_CODES) + + def _call_api_with_timeout(self, api_call_func): + """Execute API call with threading-based timeout enforcement.""" + result = [None] + exception = [None] + + def api_call(): + try: + result[0] = api_call_func() + except Exception as e: + exception[0] = e + + thread = threading.Thread(target=api_call) + thread.daemon = True + thread.start() + thread.join(timeout=API_TIMEOUT_SECONDS) + + if thread.is_alive(): + print(f" Request exceeded {API_TIMEOUT_SECONDS/60:.0f} minute timeout") + raise TimeoutError(f"API request timed out after {API_TIMEOUT_SECONDS/60:.0f} minutes") + + if exception[0]: + error_msg = str(exception[0]).lower() + # Check for timeout errors + if "timeout" in error_msg or "timed out" in error_msg: + raise TimeoutError("API request timed out") + # Check for server errors + if self._is_server_error(exception[0]): + raise Exception(f"API server error: {exception[0]}") + # Re-raise other exceptions + raise exception[0] + + if result[0] is None: + raise TimeoutError("API request timed out") + + return result[0] + + def _load_prompt_template(self, filename: str) -> str: + """Load a prompt template from the prompts directory.""" + prompt_path = PROMPTS_DIR / filename + with open(prompt_path, 'r', encoding='utf-8') as f: + return f.read() + + def _format_detection_prompt(self, url: str, content: str, context: Dict, content_limit: int) -> str: + """Format the dataset detection prompt.""" + template = self._load_prompt_template("dataset-detection-prompt.txt") + limited_content = content[:content_limit] + return template.format( + URL=url, + CONTENT=limited_content, + DATASET_NAME=context.get(CSV_FIELDS['NAME'], ''), + GROUP=context.get(CSV_FIELDS['GROUP'], ''), + DESCRIPTION=context.get(CSV_FIELDS['DESCRIPTION'], '') + ) + + def _format_generation_prompt(self, metadata: Dict, example_jsonld: str) -> str: + """Format the JSON-LD generation prompt.""" + template = self._load_prompt_template("jsonld-generation-prompt.txt") + escaped_example = example_jsonld[:EXAMPLE_JSONLD_LIMIT].replace('{', '{{').replace('}', '}}') + escaped_metadata = json.dumps(metadata.get('extracted', {}), indent=2).replace('{', '{{').replace('}', '}}') + + return template.format( + DATASET_NAME=metadata.get('name', ''), + URL=metadata.get('url', ''), + DESCRIPTION=metadata.get('description', ''), + GROUP=metadata.get('group', ''), + CREATOR=metadata.get('creator', ''), + PROVIDER=metadata.get('provider', ''), + PUBLISHER=metadata.get('publisher', ''), + KEYWORDS=metadata.get('keywords', ''), + SPATIAL_COVERAGE=metadata.get('spatial_coverage', ''), + EXTRACTED_METADATA=escaped_metadata, + EXAMPLE_JSONLD=escaped_example + ) class OpenAIClient(AIClient): @@ -61,132 +227,48 @@ def __init__(self, api_key: str, model: str = "gpt-4", base_url: str = None): self.model = model def _call_api(self, prompt: str, system_prompt: str = None) -> str: - """Make API call to OpenAI.""" + """Make API call to OpenAI with timeout enforcement.""" messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": prompt}) print(f" Sending request to API (this may take 1-3 minutes)...") - try: - response = self.client.chat.completions.create( + + def api_call(): + return self.client.chat.completions.create( model=self.model, messages=messages, temperature=0.3, - timeout=180.0 # 3 minute timeout + timeout=API_TIMEOUT_SECONDS ) - print(f" Received response") - except Exception as e: - error_msg = str(e).lower() - if "timeout" in error_msg or "timed out" in error_msg: - raise TimeoutError("API request timed out") - raise + + response = self._call_api_with_timeout(api_call) + print(f" Received response") return response.choices[0].message.content def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict: """Detect datasets using OpenAI.""" - prompt_path = Path(__file__).parent.parent / "prompts" / "dataset-detection-prompt.txt" - with open(prompt_path, 'r', encoding='utf-8') as f: - prompt_template = f.read() + prompt = self._format_detection_prompt(url, webpage_content, context, CONTENT_LIMIT_DETECTION) - # Limit content to 5000 chars to speed up API calls - limited_content = webpage_content[:5000] - prompt = prompt_template.format( - URL=url, - CONTENT=limited_content, - DATASET_NAME=context.get('Dataset Name', ''), - GROUP=context.get('Group', ''), - DESCRIPTION=context.get('Description', '') - ) - - # Retry logic for timeouts - max_retries = 2 - for attempt in range(max_retries): + def call_detect(): + response = self._call_api(prompt) try: - response = self._call_api(prompt) - try: - return json.loads(response) - except json.JSONDecodeError: - return {"raw_response": response, "error": "Failed to parse JSON"} - except TimeoutError as e: - if attempt < max_retries - 1: - print(f" Timeout occurred, retrying ({attempt + 1}/{max_retries})...") - continue - else: - raise + return json.loads(response) + except json.JSONDecodeError: + return {"raw_response": response, "error": "Failed to parse JSON"} + + return self._retry_with_timeout(call_detect) def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: """Generate JSON-LD using OpenAI.""" - prompt_path = Path(__file__).parent.parent / "prompts" / "jsonld-generation-prompt.txt" - with open(prompt_path, 'r', encoding='utf-8') as f: - prompt_template = f.read() + prompt = self._format_generation_prompt(metadata, example_jsonld) - # Escape curly braces in example JSON-LD to prevent format errors - escaped_example = example_jsonld[:2000].replace('{', '{{').replace('}', '}}') + def call_generate(): + response = self._call_api(prompt) + return self._extract_json_from_response(response) - prompt = prompt_template.format( - DATASET_NAME=metadata.get('name', ''), - URL=metadata.get('url', ''), - DESCRIPTION=metadata.get('description', ''), - GROUP=metadata.get('group', ''), - CREATOR=metadata.get('creator', ''), - PROVIDER=metadata.get('provider', ''), - PUBLISHER=metadata.get('publisher', ''), - KEYWORDS=metadata.get('keywords', ''), - SPATIAL_COVERAGE=metadata.get('spatial_coverage', ''), - EXTRACTED_METADATA=json.dumps(metadata.get('extracted', {}), indent=2).replace('{', '{{').replace('}', '}}'), - EXAMPLE_JSONLD=escaped_example - ) - - # Retry logic for timeouts - max_retries = 2 - for attempt in range(max_retries): - try: - response = self._call_api(prompt) - # Try to extract JSON from response - try: - # Look for JSON block in response - if '{' in response: - start = response.find('{') - end = response.rfind('}') + 1 - json_str = response[start:end] - # Validate it's valid JSON - json_data = json.loads(json_str) - # Fix spatial coverage format if needed - json_data = self._fix_spatial_coverage(json_data) - return json.dumps(json_data, indent=2) - return response - except (json.JSONDecodeError, ValueError): - return response - except TimeoutError as e: - if attempt < max_retries - 1: - print(f" Timeout occurred, retrying ({attempt + 1}/{max_retries})...") - continue - else: - raise - except Exception as e: - raise - - def _fix_spatial_coverage(self, data: Dict) -> Dict: - """Fix spatial coverage box format to match Schema.org standard.""" - if isinstance(data, dict) and 'spatialCoverage' in data: - spatial = data['spatialCoverage'] - if isinstance(spatial, dict) and 'geo' in spatial: - geo = spatial['geo'] - if isinstance(geo, dict) and 'box' in geo: - box = geo['box'] - if isinstance(box, str): - # Fix format: "20 -40 50 10" -> "20,-40 50,10" - # Check if it's space-separated (wrong format) - parts = box.split() - if len(parts) == 4 and ',' not in box: - try: - # Convert to proper format: "west,south east,north" - west, south, east, north = map(float, parts) - geo['box'] = f"{west},{south} {east},{north}" - except (ValueError, TypeError): - pass # If conversion fails, leave as is - return data + return self._retry_with_timeout(call_generate) class NRPClient(OpenAIClient): @@ -208,126 +290,44 @@ def __init__(self, api_key: str, model: str = "claude-3-5-sonnet-20241022"): self.model = model def _call_api(self, prompt: str, system_prompt: str = None) -> str: - """Make API call to Anthropic.""" + """Make API call to Anthropic with timeout enforcement.""" print(f" Sending request to API (this may take 1-3 minutes)...") - try: - response = self.client.messages.create( + + def api_call(): + return self.client.messages.create( model=self.model, max_tokens=4096, system=system_prompt or "You are a helpful assistant that generates structured data.", messages=[{"role": "user", "content": prompt}], - timeout=180.0 # 3 minute timeout + timeout=API_TIMEOUT_SECONDS ) - print(f" Received response") - return response.content[0].text - except Exception as e: - error_msg = str(e).lower() - if "timeout" in error_msg or "timed out" in error_msg: - raise TimeoutError("API request timed out") - raise + + response = self._call_api_with_timeout(api_call) + print(f" Received response") + return response.content[0].text def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict: """Detect datasets using Anthropic.""" - prompt_path = Path(__file__).parent.parent / "prompts" / "dataset-detection-prompt.txt" - with open(prompt_path, 'r', encoding='utf-8') as f: - prompt_template = f.read() + prompt = self._format_detection_prompt(url, webpage_content, context, CONTENT_LIMIT_ANTHROPIC) - prompt = prompt_template.format( - URL=url, - CONTENT=webpage_content[:10000], - DATASET_NAME=context.get('Dataset Name', ''), - GROUP=context.get('Group', ''), - DESCRIPTION=context.get('Description', '') - ) - - # Retry logic for timeouts - max_retries = 2 - for attempt in range(max_retries): + def call_detect(): + response = self._call_api(prompt) try: - response = self._call_api(prompt) - try: - return json.loads(response) - except json.JSONDecodeError: - return {"raw_response": response, "error": "Failed to parse JSON"} - except TimeoutError as e: - if attempt < max_retries - 1: - print(f" Timeout occurred, retrying ({attempt + 1}/{max_retries})...") - continue - else: - raise + return json.loads(response) + except json.JSONDecodeError: + return {"raw_response": response, "error": "Failed to parse JSON"} + + return self._retry_with_timeout(call_detect) def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: """Generate JSON-LD using Anthropic.""" - prompt_path = Path(__file__).parent.parent / "prompts" / "jsonld-generation-prompt.txt" - with open(prompt_path, 'r', encoding='utf-8') as f: - prompt_template = f.read() - - # Escape curly braces in example JSON-LD to prevent format errors - escaped_example = example_jsonld[:2000].replace('{', '{{').replace('}', '}}') - - prompt = prompt_template.format( - DATASET_NAME=metadata.get('name', ''), - URL=metadata.get('url', ''), - DESCRIPTION=metadata.get('description', ''), - GROUP=metadata.get('group', ''), - CREATOR=metadata.get('creator', ''), - PROVIDER=metadata.get('provider', ''), - PUBLISHER=metadata.get('publisher', ''), - KEYWORDS=metadata.get('keywords', ''), - SPATIAL_COVERAGE=metadata.get('spatial_coverage', ''), - EXTRACTED_METADATA=json.dumps(metadata.get('extracted', {}), indent=2).replace('{', '{{').replace('}', '}}'), - EXAMPLE_JSONLD=escaped_example - ) + prompt = self._format_generation_prompt(metadata, example_jsonld) - # Retry logic for timeouts - max_retries = 2 - for attempt in range(max_retries): - try: - response = self._call_api(prompt) - try: - if '{' in response: - start = response.find('{') - end = response.rfind('}') + 1 - json_str = response[start:end] - json_data = json.loads(json_str) - # Fix spatial coverage format if needed - json_data = self._fix_spatial_coverage(json_data) - return json.dumps(json_data, indent=2) - return response - except (json.JSONDecodeError, ValueError): - return response - except TimeoutError as e: - if attempt < max_retries - 1: - print(f" Timeout occurred, retrying ({attempt + 1}/{max_retries})...") - continue - else: - raise - except Exception as e: - raise - - def _fix_spatial_coverage(self, data: Dict) -> Dict: - """Fix spatial coverage box format to match Schema.org standard. + def call_generate(): + response = self._call_api(prompt) + return self._extract_json_from_response(response) - Converts "20 -40 50 10" to "20,-40 50,10" format. - """ - if isinstance(data, dict) and 'spatialCoverage' in data: - spatial = data['spatialCoverage'] - if isinstance(spatial, dict) and 'geo' in spatial: - geo = spatial['geo'] - if isinstance(geo, dict) and 'box' in geo: - box = geo['box'] - if isinstance(box, str): - # Fix format: "20 -40 50 10" -> "20,-40 50,10" - # Check if it's space-separated without commas (wrong format) - parts = box.split() - if len(parts) == 4 and ',' not in box: - try: - # Convert to proper format: "west,south east,north" - west, south, east, north = map(float, parts) - geo['box'] = f"{west},{south} {east},{north}" - except (ValueError, TypeError): - pass # If conversion fails, leave as is - return data + return self._retry_with_timeout(call_generate) def fetch_webpage(url: str) -> Optional[str]: @@ -336,7 +336,7 @@ def fetch_webpage(url: str) -> Optional[str]: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } - response = requests.get(url, headers=headers, timeout=30) + response = requests.get(url, headers=headers, timeout=WEBPAGE_TIMEOUT) response.raise_for_status() return response.text except Exception as e: @@ -354,12 +354,12 @@ def extract_text_content(html: str) -> str: return soup.get_text(separator=' ', strip=True) except Exception as e: print(f"Error parsing HTML: {e}") - return html[:10000] # Return first 10k chars if parsing fails + return html[:HTML_FALLBACK_LIMIT] # Return first N chars if parsing fails def load_example_jsonld() -> str: """Load an example JSON-LD file for reference.""" - example_path = Path(__file__).parent.parent / "data" / "objects" / "summoned" / "gpp" / "2d78c4242a108f70ea2c0604964dc095b34bfd7b.jsonld" + example_path = DATA_DIR / "gpp" / "2d78c4242a108f70ea2c0604964dc095b34bfd7b.jsonld" if example_path.exists(): with open(example_path, 'r', encoding='utf-8') as f: return f.read() @@ -380,11 +380,10 @@ def save_jsonld(jsonld_str: str, output_dir: Path, dataset_name: str, url: str) """Save JSON-LD to file.""" # Create safe filename from dataset name safe_name = "".join(c for c in dataset_name if c.isalnum() or c in (' ', '-', '_')).rstrip() - safe_name = safe_name.replace(' ', '_')[:50] # Limit length + safe_name = safe_name.replace(' ', '_')[:FILENAME_MAX_LENGTH] # Use URL hash as fallback - import hashlib - url_hash = hashlib.sha1(url.encode()).hexdigest()[:8] + url_hash = hashlib.sha1(url.encode()).hexdigest()[:URL_HASH_LENGTH] filename = f"{safe_name}_{url_hash}.jsonld" output_path = output_dir / filename @@ -441,7 +440,11 @@ def main(): html = fetch_webpage(args.test_url) if html: content = extract_text_content(html) - context = {'Dataset Name': 'Test Dataset', 'Group': 'test', 'Description': ''} + context = { + CSV_FIELDS['NAME']: 'Test Dataset', + CSV_FIELDS['GROUP']: 'test', + CSV_FIELDS['DESCRIPTION']: '' + } result = client.detect_datasets(args.test_url, content, context) print("\n=== Detection Result ===") print(json.dumps(result, indent=2)) @@ -454,8 +457,8 @@ def main(): # Filter datasets that need JSON-LD to_process = [ d for d in datasets - if d.get('hasJSONLD?', '').upper() in ('FALSE', '#ERROR!', '') - and d.get('Dataset Webpage URL', '').strip() + if d.get(CSV_FIELDS['HAS_JSONLD'], '').upper() in ('FALSE', '#ERROR!', '') + and d.get(CSV_FIELDS['WEBPAGE_URL'], '').strip() ] if args.limit: @@ -466,8 +469,8 @@ def main(): timed_out_urls = [] for i, dataset in enumerate(to_process, 1): - url = dataset.get('Dataset Webpage URL', '').strip() - name = dataset.get('Dataset Name', 'Unknown') + url = dataset.get(CSV_FIELDS['WEBPAGE_URL'], '').strip() + name = dataset.get(CSV_FIELDS['NAME'], 'Unknown') if not url: print(f"[{i}/{len(to_process)}] Skipping {name}: No URL") @@ -492,24 +495,37 @@ def main(): detection_result = client.detect_datasets(url, content, dataset) print(f" Detection complete") except TimeoutError: - print(f" Error: Timed out after 2 retries. Skipping this dataset.") - timed_out_urls.append({'name': name, 'url': url}) + print(f" Error: Timed out after {MAX_RETRIES} retries. Skipping this dataset.") + timed_out_urls.append({'name': name, 'url': url, 'reason': 'timeout'}) continue except Exception as e: - print(f" Error during detection: {e}") + # Check if it's a server error + if any(code in str(e).lower() for code in SERVER_ERROR_CODES): + print(f" Error: API server error during detection. Skipping this dataset.") + print(f" Details: {e}") + timed_out_urls.append({'name': name, 'url': url, 'reason': 'server_error'}) + else: + print(f" Error during detection: {e}") + timed_out_urls.append({'name': name, 'url': url, 'reason': 'detection_error'}) continue # Prepare metadata metadata = { 'name': name, 'url': url, - 'description': dataset.get('Description', ''), - 'group': dataset.get('Group', ''), - 'creator': dataset.get('Creator', ''), - 'provider': dataset.get('Provider', ''), - 'publisher': dataset.get('Publisher', ''), - 'keywords': dataset.get('Keywords', ''), - 'spatial_coverage': f"{dataset.get('box_lon_min', '')},{dataset.get('box_lat_min', '')},{dataset.get('box_lon_max', '')},{dataset.get('box_lat_max', '')}" if dataset.get('box_lon_min') else '', + 'description': dataset.get(CSV_FIELDS['DESCRIPTION'], ''), + 'group': dataset.get(CSV_FIELDS['GROUP'], ''), + 'creator': dataset.get(CSV_FIELDS['CREATOR'], ''), + 'provider': dataset.get(CSV_FIELDS['PROVIDER'], ''), + 'publisher': dataset.get(CSV_FIELDS['PUBLISHER'], ''), + 'keywords': dataset.get(CSV_FIELDS['KEYWORDS'], ''), + 'spatial_coverage': ( + f"{dataset.get(CSV_FIELDS['BOX_LON_MIN'], '')}," + f"{dataset.get(CSV_FIELDS['BOX_LAT_MIN'], '')}," + f"{dataset.get(CSV_FIELDS['BOX_LON_MAX'], '')}," + f"{dataset.get(CSV_FIELDS['BOX_LAT_MAX'], '')}" + if dataset.get(CSV_FIELDS['BOX_LON_MIN']) else '' + ), 'extracted': detection_result } @@ -529,20 +545,38 @@ def main(): output_path = save_jsonld(jsonld, output_dir, name, url) print(f" Saved to: {output_path}") except TimeoutError: - print(f" Error: Timed out after 2 retries. Skipping this dataset.") - timed_out_urls.append({'name': name, 'url': url}) + print(f" Error: Timed out after {MAX_RETRIES} retries. Skipping this dataset.") + timed_out_urls.append({'name': name, 'url': url, 'reason': 'timeout'}) continue except Exception as e: - print(f" Error: {e}") + # Check if it's a server error + if any(code in str(e).lower() for code in SERVER_ERROR_CODES): + print(f" Error: API server error after {MAX_RETRIES} retries. Skipping this dataset.") + print(f" Details: {e}") + timed_out_urls.append({'name': name, 'url': url, 'reason': 'server_error'}) + else: + print(f" Error: {e}. Skipping this dataset.") + timed_out_urls.append({'name': name, 'url': url, 'reason': 'other_error'}) continue - # Print summary of timed out URLs + # Print summary of failed URLs if timed_out_urls: print(f"\n{'='*60}") - print(f"Summary: {len(timed_out_urls)} dataset(s) timed out:") + print(f"Summary: {len(timed_out_urls)} dataset(s) failed:") print(f"{'='*60}") + # Group by reason + by_reason = {} for item in timed_out_urls: - print(f" - {item['name']}: {item['url']}") + reason = item.get('reason', 'unknown') + if reason not in by_reason: + by_reason[reason] = [] + by_reason[reason].append(item) + + for reason, items in by_reason.items(): + reason_name = reason.replace('_', ' ').title() + print(f"\n{reason_name} ({len(items)}):") + for item in items: + print(f" - {item['name']}: {item['url']}") print(f"{'='*60}") else: print(f"\n{'='*60}") From 10b6ed4477562d4b4ba19f86be0810f02a27cb5e Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Tue, 16 Dec 2025 15:33:13 -0600 Subject: [PATCH 08/58] updated error handling --- scripts/generate_jsonld.py | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/scripts/generate_jsonld.py b/scripts/generate_jsonld.py index 70f7a39..d1a7d79 100644 --- a/scripts/generate_jsonld.py +++ b/scripts/generate_jsonld.py @@ -55,6 +55,12 @@ # Server error codes to detect SERVER_ERROR_CODES = ['500', '502', '503', '504', 'internal server error'] +# Connection error patterns (should be retried) +CONNECTION_ERROR_PATTERNS = [ + 'connection refused', 'connection reset', 'connection error', + 'upstream connect error', 'disconnect/reset', 'delayed connect error' +] + # CSV field names CSV_FIELDS = { 'HAS_JSONLD': 'hasJSONLD?', @@ -169,6 +175,9 @@ def api_call(): # Check for timeout errors if "timeout" in error_msg or "timed out" in error_msg: raise TimeoutError("API request timed out") + # Check for connection errors (should be retried) + if any(err in error_msg for err in CONNECTION_ERROR_PATTERNS): + raise TimeoutError(f"Connection error (will retry): {exception[0]}") # Check for server errors if self._is_server_error(exception[0]): raise Exception(f"API server error: {exception[0]}") @@ -183,6 +192,8 @@ def api_call(): def _load_prompt_template(self, filename: str) -> str: """Load a prompt template from the prompts directory.""" prompt_path = PROMPTS_DIR / filename + if not prompt_path.exists(): + raise FileNotFoundError(f"Prompt template not found: {prompt_path}") with open(prompt_path, 'r', encoding='utf-8') as f: return f.read() @@ -201,7 +212,8 @@ def _format_detection_prompt(self, url: str, content: str, context: Dict, conten def _format_generation_prompt(self, metadata: Dict, example_jsonld: str) -> str: """Format the JSON-LD generation prompt.""" template = self._load_prompt_template("jsonld-generation-prompt.txt") - escaped_example = example_jsonld[:EXAMPLE_JSONLD_LIMIT].replace('{', '{{').replace('}', '}}') + # Handle empty example_jsonld + escaped_example = (example_jsonld[:EXAMPLE_JSONLD_LIMIT] if example_jsonld else '').replace('{', '{{').replace('}', '}}') escaped_metadata = json.dumps(metadata.get('extracted', {}), indent=2).replace('{', '{{').replace('}', '}}') return template.format( @@ -245,6 +257,8 @@ def api_call(): response = self._call_api_with_timeout(api_call) print(f" Received response") + if not response.choices or not response.choices[0].message.content: + raise ValueError("Empty response from API") return response.choices[0].message.content def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict: @@ -304,6 +318,8 @@ def api_call(): response = self._call_api_with_timeout(api_call) print(f" Received response") + if not response.content or not response.content[0].text: + raise ValueError("Empty response from API") return response.content[0].text def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict: @@ -368,6 +384,8 @@ def load_example_jsonld() -> str: def read_csv(csv_path: str) -> List[Dict]: """Read the datasets CSV file.""" + if not os.path.exists(csv_path): + raise FileNotFoundError(f"CSV file not found: {csv_path}") datasets = [] with open(csv_path, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) @@ -382,10 +400,14 @@ def save_jsonld(jsonld_str: str, output_dir: Path, dataset_name: str, url: str) safe_name = "".join(c for c in dataset_name if c.isalnum() or c in (' ', '-', '_')).rstrip() safe_name = safe_name.replace(' ', '_')[:FILENAME_MAX_LENGTH] - # Use URL hash as fallback + # Use URL hash as fallback (always include hash for uniqueness) url_hash = hashlib.sha1(url.encode()).hexdigest()[:URL_HASH_LENGTH] - filename = f"{safe_name}_{url_hash}.jsonld" + # If safe_name is empty, use just the hash + if not safe_name: + filename = f"dataset_{url_hash}.jsonld" + else: + filename = f"{safe_name}_{url_hash}.jsonld" output_path = output_dir / filename output_dir.mkdir(parents=True, exist_ok=True) @@ -451,7 +473,15 @@ def main(): return # Process CSV - datasets = read_csv(args.csv) + try: + datasets = read_csv(args.csv) + except FileNotFoundError as e: + print(f"Error: {e}") + sys.exit(1) + except Exception as e: + print(f"Error reading CSV file: {e}") + sys.exit(1) + print(f"Found {len(datasets)} datasets in CSV") # Filter datasets that need JSON-LD From 08b85ee16c5c22e764acdf0613a5cb82f0059438 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Tue, 16 Dec 2025 15:43:01 -0600 Subject: [PATCH 09/58] Updated API for the different situation --- scripts/generate_jsonld.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/generate_jsonld.py b/scripts/generate_jsonld.py index d1a7d79..5a6c1d5 100644 --- a/scripts/generate_jsonld.py +++ b/scripts/generate_jsonld.py @@ -238,14 +238,14 @@ def __init__(self, api_key: str, model: str = "gpt-4", base_url: str = None): self.client = openai.OpenAI(api_key=api_key, base_url=base_url) self.model = model - def _call_api(self, prompt: str, system_prompt: str = None) -> str: + def _call_api(self, prompt: str, system_prompt: str = None, operation: str = "processing") -> str: """Make API call to OpenAI with timeout enforcement.""" messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": prompt}) - print(f" Sending request to API (this may take 1-3 minutes)...") + print(f" Sending request to API for {operation} (this may take 1-3 minutes)...") def api_call(): return self.client.chat.completions.create( @@ -266,7 +266,7 @@ def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict prompt = self._format_detection_prompt(url, webpage_content, context, CONTENT_LIMIT_DETECTION) def call_detect(): - response = self._call_api(prompt) + response = self._call_api(prompt, operation="dataset detection") try: return json.loads(response) except json.JSONDecodeError: @@ -279,7 +279,7 @@ def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: prompt = self._format_generation_prompt(metadata, example_jsonld) def call_generate(): - response = self._call_api(prompt) + response = self._call_api(prompt, operation="JSON-LD generation") return self._extract_json_from_response(response) return self._retry_with_timeout(call_generate) @@ -303,9 +303,9 @@ def __init__(self, api_key: str, model: str = "claude-3-5-sonnet-20241022"): self.client = anthropic.Anthropic(api_key=api_key) self.model = model - def _call_api(self, prompt: str, system_prompt: str = None) -> str: + def _call_api(self, prompt: str, system_prompt: str = None, operation: str = "processing") -> str: """Make API call to Anthropic with timeout enforcement.""" - print(f" Sending request to API (this may take 1-3 minutes)...") + print(f" Sending request to API for {operation} (this may take 1-3 minutes)...") def api_call(): return self.client.messages.create( @@ -327,7 +327,7 @@ def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict prompt = self._format_detection_prompt(url, webpage_content, context, CONTENT_LIMIT_ANTHROPIC) def call_detect(): - response = self._call_api(prompt) + response = self._call_api(prompt, operation="dataset detection") try: return json.loads(response) except json.JSONDecodeError: @@ -340,7 +340,7 @@ def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: prompt = self._format_generation_prompt(metadata, example_jsonld) def call_generate(): - response = self._call_api(prompt) + response = self._call_api(prompt, operation="JSON-LD generation") return self._extract_json_from_response(response) return self._retry_with_timeout(call_generate) From 6fa777fe21621491cd1bec9aa4cfc8ebf0a499db Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Tue, 16 Dec 2025 16:36:17 -0600 Subject: [PATCH 10/58] Changed the time out to 6min --- scripts/generate_jsonld.py | 40 ++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/scripts/generate_jsonld.py b/scripts/generate_jsonld.py index 5a6c1d5..50ec9e1 100644 --- a/scripts/generate_jsonld.py +++ b/scripts/generate_jsonld.py @@ -42,9 +42,9 @@ from bs4 import BeautifulSoup # Constants -API_TIMEOUT_SECONDS = 180.0 # 3 minutes -MAX_RETRIES = 2 -CONTENT_LIMIT_DETECTION = 5000 # Characters for detection prompt +API_TIMEOUT_SECONDS = 360.0 # 6 minutes +MAX_RETRIES = 1 +CONTENT_LIMIT_DETECTION = 3000 # Characters for detection prompt (reduced to avoid timeouts) CONTENT_LIMIT_ANTHROPIC = 10000 # Characters for Anthropic detection EXAMPLE_JSONLD_LIMIT = 2000 # Characters for example JSON-LD in prompt WEBPAGE_TIMEOUT = 30 # Seconds for webpage fetching @@ -175,9 +175,9 @@ def api_call(): # Check for timeout errors if "timeout" in error_msg or "timed out" in error_msg: raise TimeoutError("API request timed out") - # Check for connection errors (should be retried) + # Check for connection errors if any(err in error_msg for err in CONNECTION_ERROR_PATTERNS): - raise TimeoutError(f"Connection error (will retry): {exception[0]}") + raise TimeoutError(f"Connection error: {exception[0]}") # Check for server errors if self._is_server_error(exception[0]): raise Exception(f"API server error: {exception[0]}") @@ -245,7 +245,7 @@ def _call_api(self, prompt: str, system_prompt: str = None, operation: str = "pr messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": prompt}) - print(f" Sending request to API for {operation} (this may take 1-3 minutes)...") + print(f" Sending request to API for {operation} (this may take 1-6 minutes)...") def api_call(): return self.client.chat.completions.create( @@ -265,6 +265,11 @@ def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict """Detect datasets using OpenAI.""" prompt = self._format_detection_prompt(url, webpage_content, context, CONTENT_LIMIT_DETECTION) + # Debug: Log prompt size + prompt_size = len(prompt) + if prompt_size > 10000: + print(f" Warning: Large prompt size ({prompt_size} characters), this may cause timeouts") + def call_detect(): response = self._call_api(prompt, operation="dataset detection") try: @@ -278,6 +283,11 @@ def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: """Generate JSON-LD using OpenAI.""" prompt = self._format_generation_prompt(metadata, example_jsonld) + # Debug: Log prompt size + prompt_size = len(prompt) + if prompt_size > 15000: + print(f" Warning: Large prompt size ({prompt_size} characters), this may cause timeouts") + def call_generate(): response = self._call_api(prompt, operation="JSON-LD generation") return self._extract_json_from_response(response) @@ -305,7 +315,7 @@ def __init__(self, api_key: str, model: str = "claude-3-5-sonnet-20241022"): def _call_api(self, prompt: str, system_prompt: str = None, operation: str = "processing") -> str: """Make API call to Anthropic with timeout enforcement.""" - print(f" Sending request to API for {operation} (this may take 1-3 minutes)...") + print(f" Sending request to API for {operation} (this may take 1-6 minutes)...") def api_call(): return self.client.messages.create( @@ -326,6 +336,11 @@ def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict """Detect datasets using Anthropic.""" prompt = self._format_detection_prompt(url, webpage_content, context, CONTENT_LIMIT_ANTHROPIC) + # Debug: Log prompt size + prompt_size = len(prompt) + if prompt_size > 20000: + print(f" Warning: Large prompt size ({prompt_size} characters), this may cause timeouts") + def call_detect(): response = self._call_api(prompt, operation="dataset detection") try: @@ -339,6 +354,11 @@ def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: """Generate JSON-LD using Anthropic.""" prompt = self._format_generation_prompt(metadata, example_jsonld) + # Debug: Log prompt size + prompt_size = len(prompt) + if prompt_size > 20000: + print(f" Warning: Large prompt size ({prompt_size} characters), this may cause timeouts") + def call_generate(): response = self._call_api(prompt, operation="JSON-LD generation") return self._extract_json_from_response(response) @@ -525,7 +545,7 @@ def main(): detection_result = client.detect_datasets(url, content, dataset) print(f" Detection complete") except TimeoutError: - print(f" Error: Timed out after {MAX_RETRIES} retries. Skipping this dataset.") + print(f" Error: Request timed out. Skipping this dataset.") timed_out_urls.append({'name': name, 'url': url, 'reason': 'timeout'}) continue except Exception as e: @@ -575,13 +595,13 @@ def main(): output_path = save_jsonld(jsonld, output_dir, name, url) print(f" Saved to: {output_path}") except TimeoutError: - print(f" Error: Timed out after {MAX_RETRIES} retries. Skipping this dataset.") + print(f" Error: Request timed out. Skipping this dataset.") timed_out_urls.append({'name': name, 'url': url, 'reason': 'timeout'}) continue except Exception as e: # Check if it's a server error if any(code in str(e).lower() for code in SERVER_ERROR_CODES): - print(f" Error: API server error after {MAX_RETRIES} retries. Skipping this dataset.") + print(f" Error: API server error. Skipping this dataset.") print(f" Details: {e}") timed_out_urls.append({'name': name, 'url': url, 'reason': 'server_error'}) else: From df0f822aea5298a471978e40fe5fde05b58fd6ae Mon Sep 17 00:00:00 2001 From: Yong Wook Kim Date: Tue, 16 Dec 2025 16:41:00 -0600 Subject: [PATCH 11/58] added the generated json-ld for MERIT_DEM --- .gitignore | 2 +- .../generated/MERIT_DEM_956de6b6.jsonld | 45 +++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 data/objects/summoned/generated/MERIT_DEM_956de6b6.jsonld diff --git a/.gitignore b/.gitignore index c2add81..c569d3d 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,4 @@ build/ datasets.csv # Generated JSON-LD files -data/objects/summoned/generated/ +#data/objects/summoned/generated/ diff --git a/data/objects/summoned/generated/MERIT_DEM_956de6b6.jsonld b/data/objects/summoned/generated/MERIT_DEM_956de6b6.jsonld new file mode 100644 index 0000000..d5b69f8 --- /dev/null +++ b/data/objects/summoned/generated/MERIT_DEM_956de6b6.jsonld @@ -0,0 +1,45 @@ +{ + "@context": { + "@vocab": "https://schema.org/" + }, + "@type": "Dataset", + "@id": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/", + "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/", + "name": "MERIT DEM", + "description": "Multi-Error-Removed Improved-Terrain Digital Elevation Model developed by removing multiple error components (absolute bias, stripe noise, speckle noise, and tree height bias) from existing spaceborne DEMs (SRTM3 v2.1 and AW3D-30m v1). Represents terrain elevations at 3 arcsecond resolution (~90m at the equator), covering land areas between 90N-60S, referenced to EGM96 geoid. After error removal, land areas mapped with 2 m or better vertical accuracy increased from 39% to 58%.", + "creator": [ + { + "@type": "Organization", + "name": "University of Tokyo (Institute of Industrial Science)" + } + ], + "publisher": { + "@type": "Organization", + "name": "University of Tokyo" + }, + "datePublished": "2018-10-15", + "keywords": [ + "topography", + "Digital Elevation Model", + "terrain elevation", + "geoscience", + "hydrology", + "error removal", + "SRTM", + "AW3D" + ], + "spatialCoverage": { + "@type": "Place", + "geo": { + "@type": "GeoShape", + "box": "20,-40 50,10" + } + }, + "license": "CC-BY-NC 4.0 or ODbL 1.0 (dual license - user may choose one)", + "distribution": [ + { + "@type": "DataDownload", + "contentUrl": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" + } + ] +} \ No newline at end of file From 2cf0321a1bf74c4603feb0cfe16846568a007ee7 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Fri, 2 Jan 2026 13:01:28 -0600 Subject: [PATCH 12/58] Updated to use gemini --- prompts/dataset-detection-prompt.txt | 5 ++ scripts/generate_jsonld.py | 82 +++++++++++++++++++++++++++- scripts/requirements.txt | 1 + 3 files changed, 87 insertions(+), 1 deletion(-) diff --git a/prompts/dataset-detection-prompt.txt b/prompts/dataset-detection-prompt.txt index 90db45b..3d51a1e 100644 --- a/prompts/dataset-detection-prompt.txt +++ b/prompts/dataset-detection-prompt.txt @@ -31,3 +31,8 @@ You are analyzing a scientific dataset webpage to identify available datasets an **Output Format**: Provide a structured JSON response with the extracted information. + + + + + diff --git a/scripts/generate_jsonld.py b/scripts/generate_jsonld.py index 50ec9e1..a5b03e2 100644 --- a/scripts/generate_jsonld.py +++ b/scripts/generate_jsonld.py @@ -37,6 +37,12 @@ except ImportError: ANTHROPIC_AVAILABLE = False +try: + import google.generativeai as genai + GEMINI_AVAILABLE = True +except ImportError: + GEMINI_AVAILABLE = False + # Standard libraries import requests from bs4 import BeautifulSoup @@ -366,6 +372,74 @@ def call_generate(): return self._retry_with_timeout(call_generate) +class GeminiClient(AIClient): + """Google Gemini API client.""" + + def __init__(self, api_key: str, model: str = "gemini-pro"): + genai.configure(api_key=api_key) + self.model = genai.GenerativeModel(model) + print(f"Using Google Gemini API") + print(f"Using model: {model}") + + def _call_api(self, prompt: str, system_prompt: str = None, operation: str = "processing") -> str: + """Make API call to Gemini with timeout enforcement.""" + print(f" Sending request to API for {operation} (this may take 1-6 minutes)...") + + # Gemini uses generate_content, combine system and user prompts if needed + full_prompt = prompt + if system_prompt: + full_prompt = f"{system_prompt}\n\n{prompt}" + + def api_call(): + response = self.model.generate_content( + full_prompt, + generation_config={ + "temperature": 0.3, + "max_output_tokens": 4096, + } + ) + return response + + response = self._call_api_with_timeout(api_call) + print(f" Received response") + if not response or not response.text: + raise ValueError("Empty response from API") + return response.text + + def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict: + """Detect datasets using Gemini.""" + prompt = self._format_detection_prompt(url, webpage_content, context, CONTENT_LIMIT_DETECTION) + + # Debug: Log prompt size + prompt_size = len(prompt) + if prompt_size > 10000: + print(f" Warning: Large prompt size ({prompt_size} characters), this may cause timeouts") + + def call_detect(): + response = self._call_api(prompt, operation="dataset detection") + try: + return json.loads(response) + except json.JSONDecodeError: + return {"raw_response": response, "error": "Failed to parse JSON"} + + return self._retry_with_timeout(call_detect) + + def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: + """Generate JSON-LD using Gemini.""" + prompt = self._format_generation_prompt(metadata, example_jsonld) + + # Debug: Log prompt size + prompt_size = len(prompt) + if prompt_size > 15000: + print(f" Warning: Large prompt size ({prompt_size} characters), this may cause timeouts") + + def call_generate(): + response = self._call_api(prompt, operation="JSON-LD generation") + return self._extract_json_from_response(response) + + return self._retry_with_timeout(call_generate) + + def fetch_webpage(url: str) -> Optional[str]: """Fetch webpage content.""" try: @@ -441,7 +515,7 @@ def main(): parser = argparse.ArgumentParser(description='Generate JSON-LD for datasets') parser.add_argument('--csv', default='datasets.csv', help='Path to CSV file') parser.add_argument('--output-dir', default='data/objects/summoned/generated', help='Output directory for JSON-LD files') - parser.add_argument('--ai-service', choices=['openai', 'anthropic', 'nrp'], default='nrp', help='AI service to use (default: nrp)') + parser.add_argument('--ai-service', choices=['openai', 'anthropic', 'nrp', 'gemini'], default='nrp', help='AI service to use (default: nrp)') parser.add_argument('--api-key', help='API key (or set environment variable)') parser.add_argument('--model', help='Model name (optional)') parser.add_argument('--limit', type=int, help='Limit number of datasets to process') @@ -472,6 +546,12 @@ def main(): print("Error: anthropic package not installed. Run: pip install anthropic") sys.exit(1) client = AnthropicClient(api_key, args.model or "claude-3-5-sonnet-20241022") + elif args.ai_service == 'gemini': + if not GEMINI_AVAILABLE: + print("Error: google-generativeai package not installed. Run: pip install google-generativeai") + sys.exit(1) + # Default Gemini models: gemini-pro, gemini-1.5-pro, gemini-1.5-flash + client = GeminiClient(api_key, args.model or "gemini-pro") output_dir = Path(args.output_dir) example_jsonld = load_example_jsonld() diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 925cf35..bb3088f 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -4,6 +4,7 @@ # AI API clients (install at least one) openai>=1.0.0 anthropic>=0.18.0 +google-generativeai>=0.3.0 # Web scraping and parsing requests>=2.31.0 From 6a3716ab49baca1622c3e6b364ba106f4a2ce53d Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 5 Jan 2026 11:37:06 -0600 Subject: [PATCH 13/58] updated the code for using gemini with 3 retries --- prompts/dataset-detection-prompt.txt | 1 + scripts/generate_jsonld.py | 93 +++++++++++++++++++++++----- 2 files changed, 79 insertions(+), 15 deletions(-) diff --git a/prompts/dataset-detection-prompt.txt b/prompts/dataset-detection-prompt.txt index 3d51a1e..2f26d5a 100644 --- a/prompts/dataset-detection-prompt.txt +++ b/prompts/dataset-detection-prompt.txt @@ -36,3 +36,4 @@ You are analyzing a scientific dataset webpage to identify available datasets an + diff --git a/scripts/generate_jsonld.py b/scripts/generate_jsonld.py index a5b03e2..e26d1c4 100644 --- a/scripts/generate_jsonld.py +++ b/scripts/generate_jsonld.py @@ -11,7 +11,9 @@ import hashlib import json import os +import re import sys +import time import argparse import threading from pathlib import Path @@ -20,7 +22,11 @@ # Try to load .env file if python-dotenv is available try: from dotenv import load_dotenv - load_dotenv() + # Load .env from project root (parent of scripts directory) + PROJECT_ROOT_FOR_ENV = Path(__file__).parent.parent + dotenv_path = PROJECT_ROOT_FOR_ENV / '.env' + # Use override=True to ensure environment variables are loaded + load_dotenv(dotenv_path, override=True) except ImportError: pass # dotenv is optional @@ -37,11 +43,14 @@ except ImportError: ANTHROPIC_AVAILABLE = False +# Try to import Gemini (using deprecated package for now - still works) try: import google.generativeai as genai + from google.api_core import exceptions as google_exceptions GEMINI_AVAILABLE = True except ImportError: GEMINI_AVAILABLE = False + google_exceptions = None # Standard libraries import requests @@ -375,17 +384,19 @@ def call_generate(): class GeminiClient(AIClient): """Google Gemini API client.""" - def __init__(self, api_key: str, model: str = "gemini-pro"): + def __init__(self, api_key: str, model: str = "gemini-2.0-flash"): genai.configure(api_key=api_key) - self.model = genai.GenerativeModel(model) + # Strip "models/" prefix if present (list_models returns full names) + model_name = model.replace("models/", "") if model.startswith("models/") else model + self.model = genai.GenerativeModel(model_name) print(f"Using Google Gemini API") - print(f"Using model: {model}") + print(f"Using model: {model_name}") def _call_api(self, prompt: str, system_prompt: str = None, operation: str = "processing") -> str: - """Make API call to Gemini with timeout enforcement.""" + """Make API call to Gemini with timeout enforcement and quota error handling.""" print(f" Sending request to API for {operation} (this may take 1-6 minutes)...") - # Gemini uses generate_content, combine system and user prompts if needed + # Combine system and user prompts if needed full_prompt = prompt if system_prompt: full_prompt = f"{system_prompt}\n\n{prompt}" @@ -400,11 +411,38 @@ def api_call(): ) return response - response = self._call_api_with_timeout(api_call) - print(f" Received response") - if not response or not response.text: - raise ValueError("Empty response from API") - return response.text + # Retry up to 3 times for quota errors + max_quota_retries = 3 + for quota_attempt in range(max_quota_retries): + try: + response = self._call_api_with_timeout(api_call) + print(f" Received response") + if not response or not response.text: + raise ValueError("Empty response from API") + return response.text + except Exception as e: + # Check if it's a quota error + if google_exceptions and isinstance(e, google_exceptions.ResourceExhausted): + error_str = str(e) + # Extract retry delay from error message if available + retry_match = re.search(r'retry in ([\d.]+)s', error_str, re.IGNORECASE) + if retry_match: + retry_delay = float(retry_match.group(1)) + retry_delay = min(retry_delay + 5, 60) # Add 5s buffer, max 60s + else: + retry_delay = 30 # Default 30 seconds + + if quota_attempt < max_quota_retries - 1: + print(f" Quota limit reached. Waiting {retry_delay:.0f} seconds before retry ({quota_attempt + 1}/{max_quota_retries})...") + time.sleep(retry_delay) + continue + else: + print(f" Quota limit reached after {max_quota_retries} attempts.") + print(f" Please check your quota at: https://ai.dev/usage?tab=rate-limit") + raise Exception(f"Gemini API quota exceeded. Please wait and try again later, or check your quota limits.") + else: + # Not a quota error, re-raise + raise def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict: """Detect datasets using Gemini.""" @@ -524,9 +562,34 @@ def main(): args = parser.parse_args() # Initialize AI client - api_key = args.api_key or os.getenv(f"{args.ai_service.upper()}_API_KEY") or os.getenv("NRP_API_KEY") + env_var_name = f"{args.ai_service.upper()}_API_KEY" + api_key = args.api_key or os.getenv(env_var_name) or os.getenv("NRP_API_KEY") + + # Debug: Check if API key was loaded (but don't print the actual key) if not api_key: - print(f"Error: API key required. Set {args.ai_service.upper()}_API_KEY environment variable or use --api-key") + print(f"Error: API key required.") + print(f" Looking for: {env_var_name} or NRP_API_KEY") + # Check if .env file exists and has the variable + env_file = PROJECT_ROOT / '.env' + if env_file.exists(): + print(f" Found .env file at: {env_file}") + with open(env_file, 'r') as f: + content = f.read() + if env_var_name in content: + if f"{env_var_name}=your-" in content or f"{env_var_name}=your_" in content: + print(f" Warning: .env file contains placeholder value. Please replace 'your-{args.ai_service.lower()}-api-key-here' with your actual API key.") + else: + print(f" Note: {env_var_name} found in .env but not loaded. Check file format (no spaces around =).") + else: + print(f" Note: {env_var_name} not found in .env file.") + else: + print(f" .env file not found at: {env_file}") + print(f" Set {env_var_name} environment variable or use --api-key") + sys.exit(1) + + # Check if API key looks like a placeholder + if api_key.startswith('your-') or 'your-' in api_key.lower(): + print(f"Warning: API key appears to be a placeholder. Please set a real API key.") sys.exit(1) if args.ai_service == 'openai': @@ -550,8 +613,8 @@ def main(): if not GEMINI_AVAILABLE: print("Error: google-generativeai package not installed. Run: pip install google-generativeai") sys.exit(1) - # Default Gemini models: gemini-pro, gemini-1.5-pro, gemini-1.5-flash - client = GeminiClient(api_key, args.model or "gemini-pro") + # Default Gemini models: gemini-2.0-flash (fast), gemini-2.5-flash, gemini-2.5-pro (more capable) + client = GeminiClient(api_key, args.model or "gemini-2.0-flash") output_dir = Path(args.output_dir) example_jsonld = load_example_jsonld() From 338a3f176d6ed16251d3b81e5a33ee48fb8868e2 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 5 Jan 2026 11:59:58 -0600 Subject: [PATCH 14/58] Update the code to directly fetch the url content --- prompts/dataset-detection-prompt.txt | 13 +- scripts/generate_jsonld.py | 202 ++++++++++++++++++--------- scripts/requirements.txt | 3 +- 3 files changed, 147 insertions(+), 71 deletions(-) diff --git a/prompts/dataset-detection-prompt.txt b/prompts/dataset-detection-prompt.txt index 2f26d5a..3dd02b4 100644 --- a/prompts/dataset-detection-prompt.txt +++ b/prompts/dataset-detection-prompt.txt @@ -1,7 +1,7 @@ You are analyzing a scientific dataset webpage to identify available datasets and their metadata. -**Task**: Examine the following webpage and identify: -1. What datasets are available on this page? +**Task**: Analyze the following URL and identify: +1. What datasets are available at this URL? 2. For each dataset found, extract: - Dataset name/title - Description @@ -15,7 +15,10 @@ You are analyzing a scientific dataset webpage to identify available datasets an - Keywords or topics **Webpage URL**: {URL} -**Webpage Content**: {CONTENT} + +**Important**: Please analyze the content at this URL: {URL} + +If you have URL Context Tool access, fetch and analyze the webpage content directly. Otherwise, use your knowledge of the domain and URL structure to infer what datasets might be available. **Context from Google Sheet**: - Expected Dataset Name: {DATASET_NAME} @@ -23,7 +26,9 @@ You are analyzing a scientific dataset webpage to identify available datasets an - Description: {DESCRIPTION} **Instructions**: -- If the page contains multiple datasets, list all of them +- Browse the URL and explore the webpage structure +- If the page contains multiple datasets or files, identify if this is a data catalog +- For data catalogs (like MERIT DEM with multiple spatial regions), note the file naming conventions and structure - Focus on structured data products (not just documentation) - Look for download links, API endpoints, or data access points - Extract any existing JSON-LD or structured metadata if present diff --git a/scripts/generate_jsonld.py b/scripts/generate_jsonld.py index e26d1c4..754e395 100644 --- a/scripts/generate_jsonld.py +++ b/scripts/generate_jsonld.py @@ -43,14 +43,28 @@ except ImportError: ANTHROPIC_AVAILABLE = False -# Try to import Gemini (using deprecated package for now - still works) +# Try to import Gemini (try new package first, fallback to deprecated) try: - import google.generativeai as genai - from google.api_core import exceptions as google_exceptions + # Try new google.genai package (supports URL Context Tool) + from google import genai as new_genai + from google.genai.types import Tool, UrlContext + from google.genai import errors as new_genai_errors + GEMINI_NEW_API = True GEMINI_AVAILABLE = True except ImportError: - GEMINI_AVAILABLE = False - google_exceptions = None + GEMINI_NEW_API = False + new_genai_errors = None + try: + # Fallback to deprecated google.generativeai package + import google.generativeai as genai + from google.api_core import exceptions as google_exceptions + GEMINI_AVAILABLE = True + except ImportError: + GEMINI_AVAILABLE = False + google_exceptions = None + new_genai = None + Tool = None + UrlContext = None # Standard libraries import requests @@ -102,8 +116,8 @@ class AIClient: """Abstract base class for AI clients.""" - def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict: - """Detect datasets on a webpage and extract metadata.""" + def detect_datasets(self, url: str, context: Dict) -> Dict: + """Detect datasets by analyzing the URL directly (AI will browse/analyze the webpage).""" raise NotImplementedError def generate_jsonld(self, metadata: Dict, example_jsonld: str) -> str: @@ -212,13 +226,11 @@ def _load_prompt_template(self, filename: str) -> str: with open(prompt_path, 'r', encoding='utf-8') as f: return f.read() - def _format_detection_prompt(self, url: str, content: str, context: Dict, content_limit: int) -> str: - """Format the dataset detection prompt.""" + def _format_detection_prompt(self, url: str, context: Dict) -> str: + """Format the dataset detection prompt with URL only (no HTML content).""" template = self._load_prompt_template("dataset-detection-prompt.txt") - limited_content = content[:content_limit] return template.format( URL=url, - CONTENT=limited_content, DATASET_NAME=context.get(CSV_FIELDS['NAME'], ''), GROUP=context.get(CSV_FIELDS['GROUP'], ''), DESCRIPTION=context.get(CSV_FIELDS['DESCRIPTION'], '') @@ -276,9 +288,9 @@ def api_call(): raise ValueError("Empty response from API") return response.choices[0].message.content - def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict: - """Detect datasets using OpenAI.""" - prompt = self._format_detection_prompt(url, webpage_content, context, CONTENT_LIMIT_DETECTION) + def detect_datasets(self, url: str, context: Dict) -> Dict: + """Detect datasets using OpenAI by analyzing the URL directly.""" + prompt = self._format_detection_prompt(url, context) # Debug: Log prompt size prompt_size = len(prompt) @@ -347,9 +359,9 @@ def api_call(): raise ValueError("Empty response from API") return response.content[0].text - def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict: - """Detect datasets using Anthropic.""" - prompt = self._format_detection_prompt(url, webpage_content, context, CONTENT_LIMIT_ANTHROPIC) + def detect_datasets(self, url: str, context: Dict) -> Dict: + """Detect datasets using Anthropic by analyzing the URL directly.""" + prompt = self._format_detection_prompt(url, context) # Debug: Log prompt size prompt_size = len(prompt) @@ -382,18 +394,31 @@ def call_generate(): class GeminiClient(AIClient): - """Google Gemini API client.""" + """Google Gemini API client with URL Context Tool support.""" def __init__(self, api_key: str, model: str = "gemini-2.0-flash"): - genai.configure(api_key=api_key) - # Strip "models/" prefix if present (list_models returns full names) - model_name = model.replace("models/", "") if model.startswith("models/") else model - self.model = genai.GenerativeModel(model_name) - print(f"Using Google Gemini API") - print(f"Using model: {model_name}") + self.api_key = api_key + self.model_name = model.replace("models/", "") if model.startswith("models/") else model + + # Try to use new API with URL Context Tool support + if GEMINI_NEW_API: + self.client = new_genai.Client(api_key=api_key) + self.use_url_context = True + print(f"Using Google Gemini API (new package with URL Context Tool)") + else: + # Fallback to deprecated package + genai.configure(api_key=api_key) + self.model = genai.GenerativeModel(self.model_name) + self.use_url_context = False + print(f"Using Google Gemini API (deprecated package - URL Context Tool not available)") + + print(f"Using model: {self.model_name}") - def _call_api(self, prompt: str, system_prompt: str = None, operation: str = "processing") -> str: - """Make API call to Gemini with timeout enforcement and quota error handling.""" + def _call_api(self, prompt: str, system_prompt: str = None, operation: str = "processing", url: str = None) -> str: + """Make API call to Gemini with timeout enforcement and quota error handling. + + If url is provided and using new API, will use URL Context Tool to fetch webpage content. + """ print(f" Sending request to API for {operation} (this may take 1-6 minutes)...") # Combine system and user prompts if needed @@ -402,14 +427,38 @@ def _call_api(self, prompt: str, system_prompt: str = None, operation: str = "pr full_prompt = f"{system_prompt}\n\n{prompt}" def api_call(): - response = self.model.generate_content( - full_prompt, - generation_config={ - "temperature": 0.3, - "max_output_tokens": 4096, - } - ) - return response + if self.use_url_context and url: + # Use new API with URL Context Tool + from google.genai.types import GenerateContentConfig + url_context_tool = Tool(url_context=UrlContext()) + response = self.client.models.generate_content( + model=self.model_name, + contents=full_prompt, + config=GenerateContentConfig( + tools=[url_context_tool], + temperature=0.3, + max_output_tokens=4096, + ) + ) + # Extract text from response + if response.candidates and response.candidates[0].content.parts: + text_parts = [part.text for part in response.candidates[0].content.parts if hasattr(part, 'text')] + class Response: + def __init__(self, text): + self.text = text + return Response('\n'.join(text_parts)) + else: + raise ValueError("Empty response from API") + else: + # Use deprecated API (no URL Context Tool) + response = self.model.generate_content( + full_prompt, + generation_config={ + "temperature": 0.3, + "max_output_tokens": 4096, + } + ) + return response # Retry up to 3 times for quota errors max_quota_retries = 3 @@ -422,7 +471,31 @@ def api_call(): return response.text except Exception as e: # Check if it's a quota error - if google_exceptions and isinstance(e, google_exceptions.ResourceExhausted): + is_quota_error = False + retry_delay = 30 # Default 30 seconds + + # Check for new API quota errors + if self.use_url_context and new_genai_errors: + if isinstance(e, new_genai_errors.ClientError): + error_str = str(e) + if '429' in error_str or 'RESOURCE_EXHAUSTED' in error_str or 'quota' in error_str.lower(): + is_quota_error = True + retry_match = re.search(r'retry in ([\d.]+)s', error_str, re.IGNORECASE) + if retry_match: + retry_delay = float(retry_match.group(1)) + retry_delay = min(retry_delay + 5, 60) + + # Check for deprecated API quota errors + if not is_quota_error and not self.use_url_context and google_exceptions: + if isinstance(e, google_exceptions.ResourceExhausted): + is_quota_error = True + error_str = str(e) + retry_match = re.search(r'retry in ([\d.]+)s', error_str, re.IGNORECASE) + if retry_match: + retry_delay = float(retry_match.group(1)) + retry_delay = min(retry_delay + 5, 60) + + if is_quota_error: error_str = str(e) # Extract retry delay from error message if available retry_match = re.search(r'retry in ([\d.]+)s', error_str, re.IGNORECASE) @@ -444,21 +517,30 @@ def api_call(): # Not a quota error, re-raise raise - def detect_datasets(self, url: str, webpage_content: str, context: Dict) -> Dict: - """Detect datasets using Gemini.""" - prompt = self._format_detection_prompt(url, webpage_content, context, CONTENT_LIMIT_DETECTION) + def detect_datasets(self, url: str, context: Dict) -> Dict: + """Detect datasets using Gemini by analyzing the URL directly. + + If using new API with URL Context Tool, Gemini will fetch and analyze the webpage. + Otherwise, it will analyze the URL string only. + """ + prompt = self._format_detection_prompt(url, context) # Debug: Log prompt size prompt_size = len(prompt) if prompt_size > 10000: print(f" Warning: Large prompt size ({prompt_size} characters), this may cause timeouts") + if self.use_url_context: + print(f" Using URL Context Tool - Gemini will fetch and analyze: {url}") + def call_detect(): - response = self._call_api(prompt, operation="dataset detection") + # Pass URL to _call_api so it can use URL Context Tool if available + response = self._call_api(prompt, operation="dataset detection", url=url) + response_text = response.text if hasattr(response, 'text') else str(response) try: - return json.loads(response) + return json.loads(response_text) except json.JSONDecodeError: - return {"raw_response": response, "error": "Failed to parse JSON"} + return {"raw_response": response_text, "error": "Failed to parse JSON"} return self._retry_with_timeout(call_detect) @@ -553,7 +635,7 @@ def main(): parser = argparse.ArgumentParser(description='Generate JSON-LD for datasets') parser.add_argument('--csv', default='datasets.csv', help='Path to CSV file') parser.add_argument('--output-dir', default='data/objects/summoned/generated', help='Output directory for JSON-LD files') - parser.add_argument('--ai-service', choices=['openai', 'anthropic', 'nrp', 'gemini'], default='nrp', help='AI service to use (default: nrp)') + parser.add_argument('--ai-service', choices=['openai', 'anthropic', 'nrp', 'gemini'], default='gemini', help='AI service to use (default: gemini)') parser.add_argument('--api-key', help='API key (or set environment variable)') parser.add_argument('--model', help='Model name (optional)') parser.add_argument('--limit', type=int, help='Limit number of datasets to process') @@ -622,17 +704,15 @@ def main(): # Test mode with single URL if args.test_url: print(f"Testing with URL: {args.test_url}") - html = fetch_webpage(args.test_url) - if html: - content = extract_text_content(html) - context = { - CSV_FIELDS['NAME']: 'Test Dataset', - CSV_FIELDS['GROUP']: 'test', - CSV_FIELDS['DESCRIPTION']: '' - } - result = client.detect_datasets(args.test_url, content, context) - print("\n=== Detection Result ===") - print(json.dumps(result, indent=2)) + print(" Sending URL to AI for analysis (AI will browse/analyze the webpage)...") + context = { + CSV_FIELDS['NAME']: 'Test Dataset', + CSV_FIELDS['GROUP']: 'test', + CSV_FIELDS['DESCRIPTION']: '' + } + result = client.detect_datasets(args.test_url, context) + print("\n=== Detection Result ===") + print(json.dumps(result, indent=2)) return # Process CSV @@ -672,20 +752,10 @@ def main(): print(f"\n[{i}/{len(to_process)}] Processing: {name}") print(f" URL: {url}") - # Fetch webpage - print(" Fetching webpage...") - html = fetch_webpage(url) - if not html: - print(f" Warning: Failed to fetch webpage") - continue - - content = extract_text_content(html) - print(f" Fetched {len(content)} characters") - - # Detect datasets - print(" Detecting datasets with AI...") + # Detect datasets (AI will browse/analyze the URL directly) + print(" Analyzing URL with AI (AI will browse/analyze the webpage)...") try: - detection_result = client.detect_datasets(url, content, dataset) + detection_result = client.detect_datasets(url, dataset) print(f" Detection complete") except TimeoutError: print(f" Error: Request timed out. Skipping this dataset.") diff --git a/scripts/requirements.txt b/scripts/requirements.txt index bb3088f..f9f88a8 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -4,7 +4,8 @@ # AI API clients (install at least one) openai>=1.0.0 anthropic>=0.18.0 -google-generativeai>=0.3.0 +google-genai>=0.2.0 # New package with URL Context Tool support +google-generativeai>=0.3.0 # Deprecated, but kept for backward compatibility # Web scraping and parsing requests>=2.31.0 From bf21b7734d14108a948dd38741823cfa80bf1ec3 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Tue, 20 Jan 2026 16:54:56 -0600 Subject: [PATCH 15/58] Added first 7 sites --- .../generated/CHELSA/chelsa_bioclim.jsonld | 80 ++++++ .../chelsa_canaryclim_climatologies.jsonld | 71 ++++++ .../CHELSA/chelsa_cerra_daily.jsonld | 61 +++++ .../chelsa_ch_highres_climatologies.jsonld | 81 ++++++ .../CHELSA/chelsa_ch_highres_daily.jsonld | 81 ++++++ .../CHELSA/chelsa_climatologies.jsonld | 100 ++++++++ .../generated/CHELSA/chelsa_daily.jsonld | 133 ++++++++++ .../CHELSA/chelsa_drought_indices.jsonld | 80 ++++++ .../generated/CHELSA/chelsa_monthly.jsonld | 92 +++++++ .../CHELSA/chelsa_trace21k_centennial.jsonld | 82 ++++++ .../chelsa_trace21k_centennial_bioclim.jsonld | 71 ++++++ .../generated/CHELSA/chelsa_w5e5_daily.jsonld | 69 +++++ .../generated/CHELSA/datacatalog.jsonld | 40 +++ .../summoned/generated/CHELSA/prompt.txt | 75 ++++++ .../summoned/generated/CHELSA/webpage.jsonld | 99 ++++++++ .../consensus-land-cover.jsonld | 119 +++++++++ .../generated/Consensus_Land_Cover/prompt.txt | 117 +++++++++ .../Consensus_Land_Cover/webpage.jsonld | 70 +++++ .../objects/summoned/generated/GFC/gfc.jsonld | 231 +++++++++++++++++ .../objects/summoned/generated/GFC/prompt.txt | 80 ++++++ .../summoned/generated/GFC/webpage.jsonld | 50 ++++ .../global-tree-density.jsonld | 132 ++++++++++ .../generated/Global_Tree_Density/prompt.txt | 99 ++++++++ .../Global_Tree_Density/webpage.jsonld | 49 ++++ .../generated/MERIT_DEM/merit-dem.jsonld | 155 ++++++++++++ .../summoned/generated/MERIT_DEM/prompt.txt | 59 +++++ .../generated/MERIT_DEM/webpage.jsonld | 70 +++++ .../generated/MERIT_DEM_956de6b6.jsonld | 45 ---- .../generated/MRLC_NLCD/annual-nlcd.jsonld | 91 +++++++ .../generated/MRLC_NLCD/datacatalog.jsonld | 67 +++++ .../MRLC_NLCD/exotic-annual-grass.jsonld | 83 ++++++ .../generated/MRLC_NLCD/legacy-nlcd.jsonld | 82 ++++++ .../generated/MRLC_NLCD/nalcms.jsonld | 68 +++++ .../summoned/generated/MRLC_NLCD/prompt.txt | 97 +++++++ .../summoned/generated/MRLC_NLCD/rcmap.jsonld | 104 ++++++++ .../generated/MRLC_NLCD/webpage.jsonld | 55 ++++ .../generated/TerraClimate/prompt.txt | 98 +++++++ .../TerraClimate/terraclimate.jsonld | 239 ++++++++++++++++++ .../generated/TerraClimate/webpage.jsonld | 61 +++++ 39 files changed, 3491 insertions(+), 45 deletions(-) create mode 100644 data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld create mode 100644 data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld create mode 100644 data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld create mode 100644 data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld create mode 100644 data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld create mode 100644 data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld create mode 100644 data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld create mode 100644 data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld create mode 100644 data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld create mode 100644 data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld create mode 100644 data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld create mode 100644 data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld create mode 100644 data/objects/summoned/generated/CHELSA/datacatalog.jsonld create mode 100644 data/objects/summoned/generated/CHELSA/prompt.txt create mode 100644 data/objects/summoned/generated/CHELSA/webpage.jsonld create mode 100644 data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld create mode 100644 data/objects/summoned/generated/Consensus_Land_Cover/prompt.txt create mode 100644 data/objects/summoned/generated/Consensus_Land_Cover/webpage.jsonld create mode 100644 data/objects/summoned/generated/GFC/gfc.jsonld create mode 100644 data/objects/summoned/generated/GFC/prompt.txt create mode 100644 data/objects/summoned/generated/GFC/webpage.jsonld create mode 100644 data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld create mode 100644 data/objects/summoned/generated/Global_Tree_Density/prompt.txt create mode 100644 data/objects/summoned/generated/Global_Tree_Density/webpage.jsonld create mode 100644 data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld create mode 100644 data/objects/summoned/generated/MERIT_DEM/prompt.txt create mode 100644 data/objects/summoned/generated/MERIT_DEM/webpage.jsonld delete mode 100644 data/objects/summoned/generated/MERIT_DEM_956de6b6.jsonld create mode 100644 data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld create mode 100644 data/objects/summoned/generated/MRLC_NLCD/datacatalog.jsonld create mode 100644 data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld create mode 100644 data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld create mode 100644 data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld create mode 100644 data/objects/summoned/generated/MRLC_NLCD/prompt.txt create mode 100644 data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld create mode 100644 data/objects/summoned/generated/MRLC_NLCD/webpage.jsonld create mode 100644 data/objects/summoned/generated/TerraClimate/prompt.txt create mode 100644 data/objects/summoned/generated/TerraClimate/terraclimate.jsonld create mode 100644 data/objects/summoned/generated/TerraClimate/webpage.jsonld diff --git a/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld new file mode 100644 index 0000000..398c123 --- /dev/null +++ b/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld @@ -0,0 +1,80 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://www.chelsa-climate.org/datasets/chelsa_bioclim#dataset", + "name": "CHELSA-bioclim (V2.1)", + "description": "CHELSA-bioclim is a global, kilometer-scale climate dataset generated with the CHELSA downscaling model. It consists of bioclimatic variables and related predictors used in ecological and environmental analyses.", + "url": "https://www.chelsa-climate.org/datasets/chelsa_bioclim", + "version": "2.1", + "creator": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "publisher": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "keywords": [ + "CHELSA", + "bioclim", + "bioclimatic variables", + "ecology", + "species distribution modeling", + "climate predictors" + ], + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "BIO1–BIO19", + "description": "Standard bioclimatic variables (temperature and precipitation derivatives)" + }, + { + "@type": "PropertyValue", + "name": "gdd", + "description": "Growing degree days (and related growing-season metrics where provided)" + }, + { + "@type": "PropertyValue", + "name": "koppen", + "description": "Köppen–Geiger climate classification (where provided in CHELSA bioclim products)" + } + ], + "license": "https://creativecommons.org/publicdomain/zero/1.0/", + "encodingFormat": [ + "image/tiff; application=geotiff; profile=cloud-optimized", + "COG" + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "CHELSA-bioclim downloads (COG)", + "description": "Download portal for CHELSA bioclimatic variables.", + "contentUrl": "https://envicloud.wsl.ch/", + "encodingFormat": "image/tiff; application=geotiff; profile=cloud-optimized" + } + ], + "citation": "Brun, P., Zimmermann, N. E., Hari, C., Pellissier, L., & Karger, D. N. (2022). Global climate-related predictors at kilometer resolution for the past and future. Earth System Science Data, 14(12), 5573–5603. https://doi.org/10.5194/essd-14-5573-2022", + "about": [ + { + "@type": "Thing", + "name": "Bioclimatic predictors" + }, + { + "@type": "Thing", + "name": "Biodiversity and ecology" + } + ] +} \ No newline at end of file diff --git a/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld new file mode 100644 index 0000000..e94c23a --- /dev/null +++ b/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld @@ -0,0 +1,71 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://www.chelsa-climate.org/datasets/canary-clim-canaries#dataset", + "name": "CHELSACanaryClim-climatologies (Canary Islands) (V1.0)", + "description": "CHELSACanaryClim-climatologies is a very-high-resolution climate dataset generated with the CHELSACanaryClim downscaling model. It consists of monthly climatologies of air temperature and precipitation for the Canary Islands.", + "url": "https://www.chelsa-climate.org/datasets/canary-clim-canaries", + "version": "1.0", + "creator": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "publisher": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "temporalCoverage": "1979-01-01/2013-12-31", + "spatialCoverage": { + "@type": "Place", + "name": "Canary Islands" + }, + "keywords": [ + "CHELSA", + "Canary Islands", + "very high resolution", + "climatologies", + "temperature", + "precipitation" + ], + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "tas", + "description": "Air temperature climatologies" + }, + { + "@type": "PropertyValue", + "name": "pr", + "description": "Precipitation climatologies" + } + ], + "license": "https://creativecommons.org/publicdomain/zero/1.0/", + "encodingFormat": [ + "image/tiff; application=geotiff; profile=cloud-optimized", + "COG" + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "CHELSACanaryClim-climatologies downloads (COG)", + "description": "Download portal for CanaryClim climatologies.", + "contentUrl": "https://envicloud.wsl.ch/", + "encodingFormat": "image/tiff; application=geotiff; profile=cloud-optimized" + } + ], + "citation": "Patiño, J., Collart, F., Vanderpoorten, A., Martin-Esquivel, J. L., Naranjo-Cigala, A., Mirolo, S., Karger, D. N. (2023). Spatial resolution impacts projected plant responses to climate change on topographically complex islands. Diversity and Distributions, 29(10), 1245–1262.", + "about": [ + { + "@type": "Thing", + "name": "Island climatology" + }, + { + "@type": "Thing", + "name": "Downscaled regional climate" + } + ] +} diff --git a/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld new file mode 100644 index 0000000..d143b2e --- /dev/null +++ b/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld @@ -0,0 +1,61 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://www.chelsa-climate.org/datasets#chelsacerra-daily-dataset", + "name": "CHELSAcerra-daily (Europe) (V1.0)", + "description": "CHELSAcerra-daily is a high-resolution climate dataset for air temperatures generated with the CHELSA downscaling model using the Copernicus European Regional ReAnalysis (CERRA) for Europe.", + "url": "https://www.chelsa-climate.org/datasets", + "version": "1.0", + "creator": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "publisher": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "temporalCoverage": "1985-01-01/2015-12-31", + "spatialCoverage": { + "@type": "Place", + "name": "Europe" + }, + "keywords": [ + "CHELSA", + "CERRA", + "Europe", + "downscaling", + "daily temperature", + "regional reanalysis" + ], + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "tas", + "description": "Daily mean near-surface air temperature (downscaled from CERRA)" + } + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "CHELSAcerra-daily downloads (portal)", + "description": "Download portal linked from the CHELSA catalog entry for CHELSAcerra-daily.", + "contentUrl": "https://envicloud.wsl.ch/", + "encodingFormat": "text/html" + } + ], + "citation": "Karger, D. N. and Janzing, J. (2025). CHELSAcerra-daily. EnviDat. https://doi.org/10.16904/envidat.703", + "about": [ + { + "@type": "Thing", + "name": "Downscaled regional reanalysis (Europe)" + }, + { + "@type": "Thing", + "name": "Daily air temperature" + } + ] +} diff --git a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld new file mode 100644 index 0000000..9f9bfa6 --- /dev/null +++ b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld @@ -0,0 +1,81 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://www.chelsa-climate.org/datasets/chelsach-highres-climatologies#dataset", + "name": "CHELSAch-highres-climatologies (Switzerland) (V1.0)", + "description": "CHELSAch-highres-climatologies is a high-resolution climate dataset generated with the CHELSA downscaling model for Switzerland. It consists of long-term 30-year mean aggregated surface variables.", + "url": "https://www.chelsa-climate.org/datasets/chelsach-highres-climatologies", + "version": "1.0", + "creator": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "publisher": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "temporalCoverage": "1981-01-01/2010-12-31", + "spatialCoverage": { + "@type": "Place", + "name": "Switzerland" + }, + "keywords": [ + "CHELSA", + "Switzerland", + "climatologies", + "high resolution", + "temperature", + "precipitation" + ], + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "tas", + "description": "Near-surface air temperature (aggregated)" + }, + { + "@type": "PropertyValue", + "name": "tasmax", + "description": "Maximum near-surface air temperature (aggregated)" + }, + { + "@type": "PropertyValue", + "name": "tasmin", + "description": "Minimum near-surface air temperature (aggregated)" + }, + { + "@type": "PropertyValue", + "name": "pr", + "description": "Precipitation (aggregated)" + } + ], + "license": "https://creativecommons.org/publicdomain/zero/1.0/", + "encodingFormat": [ + "application/x-netcdf", + "NetCDF" + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "CHELSAch-highres-climatologies downloads (NetCDF)", + "description": "Download portal for CHELSAch-highres-climatologies.", + "contentUrl": "https://envicloud.wsl.ch/", + "encodingFormat": "application/x-netcdf" + } + ], + "citation": "Karger, D. N.; Conrad, O.; Böhner, J.; Kawohl, T.; Kreft, H.; Soria-Auza, R. W.; Zimmermann, N. E.; Linder, H. P.; Kessler, M. (2017). Climatologies at high resolution for the earth's land surface areas. Scientific Data, 4, 170122. https://doi.org/10.1038/sdata.2017.122", + "about": [ + { + "@type": "Thing", + "name": "Regional climatology" + }, + { + "@type": "Thing", + "name": "Switzerland climate normals" + } + ] +} diff --git a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld new file mode 100644 index 0000000..19627d8 --- /dev/null +++ b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld @@ -0,0 +1,81 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://www.chelsa-climate.org/datasets/chelsach-highres_daily#dataset", + "name": "CHELSAch-highres-daily (Switzerland) (V1.0)", + "description": "CHELSAch-highres-daily is a high-resolution climate dataset generated with the CHELSA downscaling model for Switzerland. It consists of daily surface variables.", + "url": "https://www.chelsa-climate.org/datasets/chelsach-highres_daily", + "version": "1.0", + "creator": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "publisher": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "temporalCoverage": "1981-01-01/2022-12-31", + "spatialCoverage": { + "@type": "Place", + "name": "Switzerland" + }, + "keywords": [ + "CHELSA", + "Switzerland", + "high resolution", + "daily climate", + "temperature", + "precipitation" + ], + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "tas", + "description": "Daily mean near-surface air temperature" + }, + { + "@type": "PropertyValue", + "name": "tasmax", + "description": "Daily maximum near-surface air temperature" + }, + { + "@type": "PropertyValue", + "name": "tasmin", + "description": "Daily minimum near-surface air temperature" + }, + { + "@type": "PropertyValue", + "name": "pr", + "description": "Precipitation" + } + ], + "license": "https://creativecommons.org/publicdomain/zero/1.0/", + "encodingFormat": [ + "application/x-netcdf", + "NetCDF" + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "CHELSAch-highres-daily downloads (NetCDF)", + "description": "Download portal for CHELSAch-highres-daily.", + "contentUrl": "https://envicloud.wsl.ch/", + "encodingFormat": "application/x-netcdf" + } + ], + "citation": "Zilker, F., Karger, D. N. (2025). CHELSAch-highres-daily climate data at high resolution. EnviDat. https://www.doi.org/10.16904/envidat.688", + "about": [ + { + "@type": "Thing", + "name": "Regional high-resolution climate" + }, + { + "@type": "Thing", + "name": "Mountain climatology" + } + ] +} diff --git a/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld new file mode 100644 index 0000000..c53356e --- /dev/null +++ b/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld @@ -0,0 +1,100 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://www.chelsa-climate.org/datasets/chelsa_climatologies#dataset", + "name": "CHELSA-climatologies (V2.1)", + "description": "CHELSA-climatologies is a global, kilometer-scale climate dataset generated with the CHELSA downscaling model. It consists of long-term climatological means.", + "url": "https://www.chelsa-climate.org/datasets/chelsa_climatologies", + "version": "2.1", + "creator": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "publisher": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "temporalCoverage": "1981-2010/2071-2100", + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "keywords": [ + "CHELSA", + "climatologies", + "climate normals", + "downscaling", + "temperature", + "precipitation" + ], + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "tas", + "description": "Near-surface air temperature (climatological means)" + }, + { + "@type": "PropertyValue", + "name": "tasmax", + "description": "Maximum near-surface air temperature (climatological means)" + }, + { + "@type": "PropertyValue", + "name": "tasmin", + "description": "Minimum near-surface air temperature (climatological means)" + }, + { + "@type": "PropertyValue", + "name": "pr", + "description": "Precipitation (climatological means/accumulations)" + }, + { + "@type": "PropertyValue", + "name": "hurs", + "description": "Near-surface relative humidity" + }, + { + "@type": "PropertyValue", + "name": "clt", + "description": "Total cloud cover percentage" + }, + { + "@type": "PropertyValue", + "name": "sfcWind", + "description": "Near-surface wind speed" + } + ], + "license": "https://creativecommons.org/publicdomain/zero/1.0/", + "encodingFormat": [ + "image/tiff; application=geotiff; profile=cloud-optimized", + "COG" + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "CHELSA-climatologies downloads (COG)", + "description": "Download portal for CHELSA climatologies.", + "contentUrl": "https://envicloud.wsl.ch/", + "encodingFormat": "image/tiff; application=geotiff; profile=cloud-optimized" + } + ], + "citation": "Karger, D. N.; Conrad, O.; Böhner, J.; Kawohl, T.; Kreft, H.; Soria-Auza, R. W.; Zimmermann, N. E.; Linder, H. P.; Kessler, M. (2017). Climatologies at high resolution for the earth's land surface areas. Scientific Data, 4, 170122. https://doi.org/10.1038/sdata.2017.122", + "about": [ + { + "@type": "Thing", + "name": "Climatology" + }, + { + "@type": "Thing", + "name": "Baseline climate" + } + ] +} \ No newline at end of file diff --git a/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld new file mode 100644 index 0000000..b1d7c63 --- /dev/null +++ b/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld @@ -0,0 +1,133 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://www.chelsa-climate.org/datasets/chelsa_daily#dataset", + "name": "CHELSA-daily (V2.1)", + "description": "CHELSA-daily is a global, kilometer-scale climate dataset generated with the CHELSA downscaling model. It consists of daily surface variables summarized as daily means, minima, maxima, or (in some cases) daily accumulations.", + "url": "https://www.chelsa-climate.org/datasets/chelsa_daily", + "version": "2.1", + "creator": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "publisher": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "temporalCoverage": "1979-01-01/2025-08-29", + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "keywords": [ + "CHELSA", + "daily climate", + "downscaling", + "temperature", + "precipitation", + "humidity", + "wind speed", + "solar radiation" + ], + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "tas", + "description": "Daily mean near-surface air temperature" + }, + { + "@type": "PropertyValue", + "name": "tasmax", + "description": "Daily maximum near-surface air temperature" + }, + { + "@type": "PropertyValue", + "name": "tasmin", + "description": "Daily minimum near-surface air temperature" + }, + { + "@type": "PropertyValue", + "name": "pr", + "description": "Precipitation" + }, + { + "@type": "PropertyValue", + "name": "hurs", + "description": "Near-surface relative humidity" + }, + { + "@type": "PropertyValue", + "name": "sfcWind", + "description": "Near-surface wind speed" + }, + { + "@type": "PropertyValue", + "name": "rsds", + "description": "Surface downwelling shortwave flux in air (solar irradiance)" + }, + { + "@type": "PropertyValue", + "name": "clt", + "description": "Total cloud cover percentage" + }, + { + "@type": "PropertyValue", + "name": "vpd", + "description": "Vapor pressure deficit" + }, + { + "@type": "PropertyValue", + "name": "pet", + "description": "Potential evapotranspiration" + }, + { + "@type": "PropertyValue", + "name": "cmi", + "description": "Climate moisture index" + } + ], + "license": "https://creativecommons.org/publicdomain/zero/1.0/", + "encodingFormat": [ + "image/tiff; application=geotiff; profile=cloud-optimized", + "COG" + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "CHELSA-daily downloads (COG)", + "description": "Download portal for CHELSA-daily. Files are provided via the CHELSA download service; see dataset page for details and variable-specific subdirectories.", + "contentUrl": "https://envicloud.wsl.ch/", + "encodingFormat": "image/tiff; application=geotiff; profile=cloud-optimized" + }, + { + "@type": "DataDownload", + "name": "Catalog landing page (downloads)", + "description": "Alternative CHELSA downloads landing page.", + "contentUrl": "https://chelsa-climate.org/downloads/", + "encodingFormat": "text/html" + } + ], + "citation": "Karger, D. N.; Conrad, O.; Böhner, J.; Kawohl, T.; Kreft, H.; Soria-Auza, R. W.; Zimmermann, N. E.; Linder, H. P.; Kessler, M. (2017). Climatologies at high resolution for the earth's land surface areas. Scientific Data, 4, 170122. https://doi.org/10.1038/sdata.2017.122", + "about": [ + { + "@type": "Thing", + "name": "Climate" + }, + { + "@type": "Thing", + "name": "Downscaled meteorology" + }, + { + "@type": "Thing", + "name": "High-resolution gridded data" + } + ] +} \ No newline at end of file diff --git a/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld new file mode 100644 index 0000000..ca374c5 --- /dev/null +++ b/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld @@ -0,0 +1,80 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://www.chelsa-climate.org/datasets/chelsa_annual#dataset", + "name": "CHELSA-drought-indices (V2.1)", + "description": "CHELSA-drought-indices is a dataset that consists of standardized precipitation (SPI) and the standardized precipitation evapotranspiration index (SPEI) and related drought metrics.", + "url": "https://www.chelsa-climate.org/datasets/chelsa_annual", + "version": "2.1", + "creator": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "publisher": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "temporalCoverage": "1980-06-15/2018-07-15", + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "keywords": [ + "CHELSA", + "drought", + "SPI", + "SPEI", + "drought indices", + "climate extremes" + ], + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "spei12", + "description": "Standardized precipitation evapotranspiration index (12-month)" + }, + { + "@type": "PropertyValue", + "name": "spi", + "description": "Standardized precipitation index (multiple timescales)" + }, + { + "@type": "PropertyValue", + "name": "mymd", + "description": "Identifier for multiyear meteorological drought events (and related fields)" + } + ], + "license": "https://creativecommons.org/licenses/by/4.0/", + "encodingFormat": [ + "image/tiff; application=geotiff; profile=cloud-optimized", + "COG" + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "CHELSA-drought-indices downloads (COG)", + "description": "Download portal for CHELSA drought indices.", + "contentUrl": "https://envicloud.wsl.ch/", + "encodingFormat": "image/tiff; application=geotiff; profile=cloud-optimized" + } + ], + "citation": "Chen, L., Brun, P., Buri, P., Fatichi, S., Gessler, A., McCarthy, M. J., Pelicciotti, F., Stocker, B., Karger, D. N. (2024). High resolution global standardized drought indices. EnviDat. https://doi.org/10.16904/envidat.530", + "about": [ + { + "@type": "Thing", + "name": "Drought" + }, + { + "@type": "Thing", + "name": "Climate risk" + } + ] +} \ No newline at end of file diff --git a/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld new file mode 100644 index 0000000..10e511e --- /dev/null +++ b/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld @@ -0,0 +1,92 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://www.chelsa-climate.org/datasets/chelsa_monthly#dataset", + "name": "CHELSA-monthly (V2.1)", + "description": "CHELSA-monthly is a global, kilometer-scale climate dataset generated with the CHELSA downscaling model. It consists of monthly aggregated surface variables summarized as monthly means or (in some cases) monthly accumulations.", + "url": "https://www.chelsa-climate.org/datasets/chelsa_monthly", + "version": "2.1", + "creator": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "publisher": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "temporalCoverage": "1979-01-15/2021-12-15", + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "keywords": [ + "CHELSA", + "monthly climate", + "climate normals", + "downscaling", + "temperature", + "precipitation" + ], + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "tas", + "description": "Near-surface air temperature (monthly aggregated)" + }, + { + "@type": "PropertyValue", + "name": "tasmax", + "description": "Maximum near-surface air temperature (monthly aggregated)" + }, + { + "@type": "PropertyValue", + "name": "tasmin", + "description": "Minimum near-surface air temperature (monthly aggregated)" + }, + { + "@type": "PropertyValue", + "name": "pr", + "description": "Precipitation (monthly accumulated/aggregated)" + } + ], + "license": "https://creativecommons.org/publicdomain/zero/1.0/", + "encodingFormat": [ + "image/tiff; application=geotiff; profile=cloud-optimized", + "COG" + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "CHELSA-monthly downloads (COG)", + "description": "Download portal for CHELSA-monthly.", + "contentUrl": "https://envicloud.wsl.ch/", + "encodingFormat": "image/tiff; application=geotiff; profile=cloud-optimized" + }, + { + "@type": "DataDownload", + "name": "Catalog landing page (downloads)", + "description": "Alternative CHELSA downloads landing page.", + "contentUrl": "https://chelsa-climate.org/downloads/", + "encodingFormat": "text/html" + } + ], + "citation": "Karger, D. N.; Conrad, O.; Böhner, J.; Kawohl, T.; Kreft, H.; Soria-Auza, R. W.; Zimmermann, N. E.; Linder, H. P.; Kessler, M. (2017). Climatologies at high resolution for the earth's land surface areas. Scientific Data, 4, 170122. https://doi.org/10.1038/sdata.2017.122", + "about": [ + { + "@type": "Thing", + "name": "Climate" + }, + { + "@type": "Thing", + "name": "Monthly aggregates" + } + ] +} \ No newline at end of file diff --git a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld new file mode 100644 index 0000000..0dc289d --- /dev/null +++ b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld @@ -0,0 +1,82 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://www.chelsa-climate.org/datasets/chelsa-trace21k-centennial#dataset", + "name": "CHELSA-TraCE21k-centennial (V1.0)", + "description": "CHELSA-TraCE21k-centennial is a global, kilometer-scale climate dataset generated with the CHELSA-TraCE21k downscaling model. It consists of monthly climatologies summarized over centennial time steps from 21k BP to 0 BP.", + "url": "https://www.chelsa-climate.org/datasets/chelsa-trace21k-centennial", + "version": "1.0", + "creator": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "publisher": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "keywords": [ + "CHELSA", + "TraCE21k", + "paleoclimate", + "Last Glacial Maximum", + "downscaling", + "temperature", + "precipitation" + ], + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "pr", + "description": "Precipitation" + }, + { + "@type": "PropertyValue", + "name": "tasmax", + "description": "Near-surface air temperature (maximum)" + }, + { + "@type": "PropertyValue", + "name": "tasmin", + "description": "Near-surface air temperature (minimum)" + }, + { + "@type": "PropertyValue", + "name": "tz", + "description": "Air temperature lapse rate" + } + ], + "license": "https://creativecommons.org/publicdomain/zero/1.0/", + "encodingFormat": [ + "image/tiff; application=geotiff; profile=cloud-optimized", + "COG" + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "CHELSA-TraCE21k-centennial downloads (COG)", + "description": "Download portal for TraCE21k centennial climatologies.", + "contentUrl": "https://envicloud.wsl.ch/", + "encodingFormat": "image/tiff; application=geotiff; profile=cloud-optimized" + } + ], + "citation": "Karger, D. N., Nobis, M. P., Normand, S., Graham, C. H., & Zimmermann, N. E. (2020). CHELSA-TraCE21k: Downscaled transient temperature and precipitation data since the last glacial maximum. EnviDat. https://doi.org/10.16904/envidat.211", + "about": [ + { + "@type": "Thing", + "name": "Paleoclimate" + }, + { + "@type": "Thing", + "name": "Glacial-interglacial climate variability" + } + ] +} \ No newline at end of file diff --git a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld new file mode 100644 index 0000000..a92212a --- /dev/null +++ b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld @@ -0,0 +1,71 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://www.chelsa-climate.org/datasets/chelsa-trace21k-centennial-bioclim#dataset", + "name": "CHELSA-TraCE21k-centennial-bioclim (V1.0)", + "description": "CHELSA-TraCE21k-centennial-bioclim is a global, kilometer-scale dataset generated with the CHELSA-TraCE21k downscaling model. It consists of bioclimatic and topographic variables summarized over centennial time steps from 21k BP to 0 BP.", + "url": "https://www.chelsa-climate.org/datasets/chelsa-trace21k-centennial-bioclim", + "version": "1.0", + "creator": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "publisher": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "keywords": [ + "CHELSA", + "TraCE21k", + "paleoclimate", + "bioclim", + "bioclimatic variables", + "downscaling" + ], + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "BIO1–BIO19", + "description": "Bioclimatic variables derived from temperature and precipitation" + }, + { + "@type": "PropertyValue", + "name": "topographic predictors", + "description": "Topographic variables included with bioclim products (where provided)" + } + ], + "license": "https://creativecommons.org/publicdomain/zero/1.0/", + "encodingFormat": [ + "image/tiff; application=geotiff; profile=cloud-optimized", + "COG" + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "CHELSA-TraCE21k-centennial-bioclim downloads (COG)", + "description": "Download portal for TraCE21k centennial bioclim variables.", + "contentUrl": "https://envicloud.wsl.ch/", + "encodingFormat": "image/tiff; application=geotiff; profile=cloud-optimized" + } + ], + "citation": "Karger, D. N., Nobis, M. P., Normand, S., Graham, C. H., & Zimmermann, N. E. (2020). CHELSA-TraCE21k: Downscaled transient temperature and precipitation data since the last glacial maximum. EnviDat. https://doi.org/10.16904/envidat.211", + "about": [ + { + "@type": "Thing", + "name": "Paleoclimate bioclimatic predictors" + }, + { + "@type": "Thing", + "name": "Species distribution modeling (paleo)" + } + ] +} \ No newline at end of file diff --git a/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld new file mode 100644 index 0000000..2d38027 --- /dev/null +++ b/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld @@ -0,0 +1,69 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://www.chelsa-climate.org/datasets/chelsaw5e5#dataset", + "name": "CHELSA-W5E5-daily (V2.0 / listed as V1.0 in catalog entry)", + "description": "CHELSA-W5E5-daily is a global, kilometer-scale climate dataset generated with the CHELSA downscaling model. It consists of daily surface variables summarized as daily means, minima, maxima, or (in some cases) daily accumulations.", + "url": "https://www.chelsa-climate.org/datasets/chelsaw5e5", + "version": "1.0", + "creator": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "publisher": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "temporalCoverage": "1979-01-01/2016-12-31", + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "keywords": [ + "CHELSA", + "W5E5", + "daily climate", + "downscaling", + "ISIMIP", + "temperature", + "precipitation" + ], + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "tas/tasmax/tasmin", + "description": "Near-surface air temperature summaries" + }, + { + "@type": "PropertyValue", + "name": "pr", + "description": "Precipitation" + } + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "CHELSA-W5E5-daily downloads (ISIMIP portal)", + "description": "The catalog links the W5E5-based CHELSA daily product to the ISIMIP data portal.", + "contentUrl": "https://data.isimip.org/", + "encodingFormat": "text/html" + } + ], + "encodingFormat": [ + "text/html" + ], + "about": [ + { + "@type": "Thing", + "name": "Bias-adjusted climate forcing" + }, + { + "@type": "Thing", + "name": "Impact modeling" + } + ] +} \ No newline at end of file diff --git a/data/objects/summoned/generated/CHELSA/datacatalog.jsonld b/data/objects/summoned/generated/CHELSA/datacatalog.jsonld new file mode 100644 index 0000000..45e1d95 --- /dev/null +++ b/data/objects/summoned/generated/CHELSA/datacatalog.jsonld @@ -0,0 +1,40 @@ +{ + "@context": "https://schema.org/", + "@type": "DataCatalog", + "@id": "https://www.chelsa-climate.org/datasets#datacatalog", + "name": "CHELSA Climate Data Catalog", + "description": "CHELSA is a global, kilometer-scale climate dataset generated with the CHELSA downscaling model. It provides high-resolution climatological data for the earth's land surface areas, including daily and monthly variables, long-term climatological means, bioclimatic variables, and drought indices. The catalog also includes regional high-resolution products (e.g., Switzerland, Canary Islands), paleoclimate reconstructions (TraCE21k), and other derived products. Coverage includes global land surface areas and selected regional domains; temporal scope spans past (paleoclimate), present (including 1979 onward for many products), and future scenarios (where applicable).", + "url": "https://www.chelsa-climate.org/datasets", + "inLanguage": "en", + "publisher": { + "@type": "Organization", + "@id": "https://www.chelsa-climate.org/#publisher", + "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", + "url": "https://www.wsl.ch/" + }, + "keywords": [ + "climate", + "climatology", + "CHELSA", + "downscaling", + "high resolution", + "temperature", + "precipitation", + "bioclimatic variables", + "drought indices" + ], + "dataset": [ + { "@id": "https://www.chelsa-climate.org/datasets/chelsa_daily#dataset" }, + { "@id": "https://www.chelsa-climate.org/datasets/chelsa_monthly#dataset" }, + { "@id": "https://www.chelsa-climate.org/datasets/chelsa_annual#dataset" }, + { "@id": "https://www.chelsa-climate.org/datasets/chelsa_climatologies#dataset" }, + { "@id": "https://www.chelsa-climate.org/datasets/chelsa_bioclim#dataset" }, + { "@id": "https://www.chelsa-climate.org/datasets/chelsach-highres_daily#dataset" }, + { "@id": "https://www.chelsa-climate.org/datasets/chelsach-highres-climatologies#dataset" }, + { "@id": "https://www.chelsa-climate.org/datasets/canary-clim-canaries#dataset" }, + { "@id": "https://www.chelsa-climate.org/datasets/chelsa-trace21k-centennial#dataset" }, + { "@id": "https://www.chelsa-climate.org/datasets/chelsa-trace21k-centennial-bioclim#dataset" }, + { "@id": "https://www.chelsa-climate.org/datasets/chelsaw5e5#dataset" }, + { "@id": "https://www.chelsa-climate.org/datasets#chelsacerra-daily-dataset" } + ] +} diff --git a/data/objects/summoned/generated/CHELSA/prompt.txt b/data/objects/summoned/generated/CHELSA/prompt.txt new file mode 100644 index 0000000..72795ed --- /dev/null +++ b/data/objects/summoned/generated/CHELSA/prompt.txt @@ -0,0 +1,75 @@ +I need to create Schema.org JSON-LD descriptions for a scientific climate data catalog and its datasets. + +**Website URL**: https://www.chelsa-climate.org/datasets +**Alternative URL**: https://chelsa-climate.org/downloads/ + +**Catalog Information**: +- Name: CHELSA (Climatologies at high resolution for the earth's land surface areas) +- Group/Category: climate +- Description: CHELSA is a global, kilometer-scale climate dataset generated with the CHELSA downscaling model. It provides high-resolution climatological data for the earth's land surface areas, including daily, monthly, and climatological means, as well as bioclimatic variables and drought indices. + +**Key Details**: +- Publisher: WSL (Swiss Federal Institute for Forest, Snow and Landscape Research) +- Website: https://www.chelsa-climate.org/ +- Coverage: Global, Europe, Switzerland, Canary Islands +- Temporal Coverage: Past (paleoclimate), Present (1979-present), Future (climate scenarios) +- Resolution: Kilometer-scale (~1km) +- Multiple datasets available with different temporal frequencies and variables + +**Available Datasets** (from the catalog): +1. CHELSA-daily (V2.1) - Daily surface variables, 1979-01-01 to 2025-08-29 +2. CHELSA-monthly (V2.1) - Monthly aggregated variables, 1979-01-15 to 2021-12-15 +3. CHELSA-drought-indices (V2.1) - SPI and SPEI indices, 1980-06-15 to 2018-07-15 +4. CHELSA-climatologies (V2.1) - Long-term climatological means, 1981-2010 to 2071-2100 +5. CHELSA-bioclim (V2.1) - Bioclimatic variables, 1981-2010 to 1971-2100 +6. CHELSAch-highres (Switzerland) - High resolution for Switzerland +7. CHELSACanaryClim (Canary Islands) - Very high resolution for Canary Islands +8. CHELSA-TraCE21k - Paleoclimate data, 21k BP to 0 BP +9. CHELSA-W5E5-daily (V2.0) - Daily data, 1979-2016 +10. CHELSAcerra-daily (Europe) - High resolution for Europe, 1985-2015 + +**Variables Available**: +- Temperature (mean, min, max, diurnal range, seasonality) +- Precipitation (annual, monthly, seasonal) +- Bioclimatic variables +- Drought indices (SPI, SPEI) +- Cloud cover, humidity, wind speed +- Growing degree days, growing season metrics +- Köppen-Geiger climate classification +- And many more... + +**Instructions for Step 1.0 (WebPage Description)**: +Please create a Schema.org WebPage JSON-LD that includes: +- @context: "https://schema.org/" +- @type: "WebPage" +- @id: The catalog webpage URL +- name: Full catalog name +- description: Comprehensive description of the CHELSA climate data catalog +- url: Main catalog webpage URL +- publisher: WSL (Swiss Federal Institute for Forest, Snow and Landscape Research) +- inLanguage: "en" +- isPartOf: WebSite information +- about: Reference to the catalog as a DataCatalog +- keywords: Relevant keywords (climate, climatology, CHELSA, high resolution, downscaling, etc.) +- mainEntity: Reference to the DataCatalog + +**Instructions for Dataset JSON-LD (to be created separately)**: +For each major dataset, create a Schema.org Dataset JSON-LD that includes: +- @context, @type, @id, name, description, url +- creator/publisher: WSL +- temporalCoverage: Time period covered +- spatialCoverage: Geographic coverage (global, regional, etc.) +- variableMeasured: List of climate variables +- distribution: Download links and formats +- encodingFormat: Data formats (likely GeoTIFF, NetCDF, etc.) +- version: Dataset version +- license: License information (if available) + +**Output Format**: +Provide the complete JSON-LD in a code block, properly formatted and valid JSON. + +**Important Notes**: +- This is a data catalog with multiple datasets +- Each dataset should be described separately +- The catalog page describes the overall project and provides access to multiple datasets +- Include all relevant metadata from the website diff --git a/data/objects/summoned/generated/CHELSA/webpage.jsonld b/data/objects/summoned/generated/CHELSA/webpage.jsonld new file mode 100644 index 0000000..a4e1fc7 --- /dev/null +++ b/data/objects/summoned/generated/CHELSA/webpage.jsonld @@ -0,0 +1,99 @@ +{ + "@context": "https://schema.org/", + "@type": "WebPage", + "@id": "https://www.chelsa-climate.org/datasets", + "name": "CHELSA Climate Datasets", + "alternateName": "CHELSA: Climatologies at high resolution for the earth's land surface areas", + "description": "Webpage for the CHELSA (Climatologies at high resolution for the earth's land surface areas) climate data catalog. CHELSA is a global, kilometer-scale climate dataset generated with the CHELSA downscaling model. The catalog provides access to multiple climate datasets including daily, monthly, and climatological means, bioclimatic variables, drought indices, and paleoclimate data. Datasets cover global, European, Swiss, and Canary Islands regions with temporal coverage from the Last Glacial Maximum (21k BP) through present observations to future climate scenarios (2071-2100).", + "url": "https://www.chelsa-climate.org/datasets", + "alternateUrl": "https://chelsa-climate.org/downloads/", + "inLanguage": "en", + "isPartOf": { + "@type": "WebSite", + "name": "CHELSA Climate", + "url": "https://www.chelsa-climate.org/" + }, + "about": { + "@type": "DataCatalog", + "name": "CHELSA Climate Data Catalog", + "description": "A comprehensive catalog of high-resolution climate datasets for the earth's land surface areas", + "url": "https://www.chelsa-climate.org/datasets" + }, + "publisher": { + "@type": "Organization", + "name": "WSL", + "alternateName": "Swiss Federal Institute for Forest, Snow and Landscape Research", + "url": "https://www.wsl.ch/en/index.html" + }, + "mainEntity": { + "@type": "DataCatalog", + "name": "CHELSA Climate Data Catalog", + "description": "Global, kilometer-scale climate datasets generated with the CHELSA downscaling model", + "url": "https://www.chelsa-climate.org/datasets", + "publisher": { + "@type": "Organization", + "name": "WSL", + "alternateName": "Swiss Federal Institute for Forest, Snow and Landscape Research" + }, + "hasPart": [ + { + "@type": "Dataset", + "name": "CHELSA-daily", + "version": "2.1" + }, + { + "@type": "Dataset", + "name": "CHELSA-monthly", + "version": "2.1" + }, + { + "@type": "Dataset", + "name": "CHELSA-climatologies", + "version": "2.1" + }, + { + "@type": "Dataset", + "name": "CHELSA-bioclim", + "version": "2.1" + }, + { + "@type": "Dataset", + "name": "CHELSA-drought-indices", + "version": "2.1" + } + ] + }, + "keywords": [ + "CHELSA", + "climate", + "climatology", + "high resolution", + "downscaling", + "climate data", + "temperature", + "precipitation", + "bioclimatic variables", + "drought indices", + "paleoclimate", + "climate scenarios", + "global climate", + "kilometer-scale" + ], + "breadcrumb": { + "@type": "BreadcrumbList", + "itemListElement": [ + { + "@type": "ListItem", + "position": 1, + "name": "CHELSA Climate", + "item": "https://www.chelsa-climate.org/" + }, + { + "@type": "ListItem", + "position": 2, + "name": "Datasets", + "item": "https://www.chelsa-climate.org/datasets" + } + ] + } +} diff --git a/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld b/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld new file mode 100644 index 0000000..cf370d4 --- /dev/null +++ b/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld @@ -0,0 +1,119 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://www.earthenv.org/landcover#dataset", + "name": "Consensus Land Cover (Global 1-km Consensus Land Cover), Version 1.0", + "description": "A global 1-km (30 arc-second) consensus land-cover product for biodiversity and ecosystem modelling. The dataset integrates multiple global remote sensing-derived land-cover products and provides consensus prevalence (0–100%) for 12 land-cover classes at each grid cell. Two Version 1.0 variants are distributed: (1) Full v1.0 (with DISCover/GLCC), integrating GlobCover (2005–2006; v2.2), MODIS land-cover (MCD12Q1; v051), GLC2000 (v1.1), and DISCover/GLCC (v2; based on older imagery 1992–1993); and (2) Reduced v1.0 (without DISCover), integrating GlobCover, MODIS, and GLC2000 only, offered as an alternative for areas with substantial land-cover change in recent decades. Each variant contains 12 GeoTIFF layers (one per class). Values are unsigned 8-bit integers representing percent prevalence (0–100).", + "url": "https://www.earthenv.org/landcover", + "creator": [ + { "@type": "Person", "name": "Mao-Ning Tuanmu" }, + { "@type": "Person", "name": "Walter Jetz" } + ], + "publisher": { + "@type": "Organization", + "@id": "https://www.earthenv.org/#org", + "name": "EarthEnv", + "url": "https://www.earthenv.org/", + "member": [ + { "@type": "Organization", "name": "Yale University" }, + { "@type": "Organization", "name": "University of Florida" }, + { "@type": "Organization", "name": "University at Buffalo" } + ] + }, + "keywords": [ + "land cover", + "consensus", + "remote sensing", + "biodiversity", + "ecosystem modelling", + "GlobCover", + "MODIS", + "MCD12Q1", + "GLC2000", + "DISCover", + "GLCC", + "prevalence", + "GeoTIFF" + ], + "license": "https://creativecommons.org/licenses/by-nc/4.0/", + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-56 180,90" + } + }, + "spatialResolution": "30 arc-second (~1 km at the equator)", + "temporalCoverage": "1992-01-01/2006-12-31", + "encodingFormat": [ + "image/tiff; application=geotiff" + ], + "variableMeasured": [ + { "@type": "PropertyValue", "name": "Evergreen/Deciduous Needleleaf Trees", "description": "Consensus prevalence (0–100%)" }, + { "@type": "PropertyValue", "name": "Evergreen Broadleaf Trees", "description": "Consensus prevalence (0–100%)" }, + { "@type": "PropertyValue", "name": "Deciduous Broadleaf Trees", "description": "Consensus prevalence (0–100%)" }, + { "@type": "PropertyValue", "name": "Mixed/Other Trees", "description": "Consensus prevalence (0–100%)" }, + { "@type": "PropertyValue", "name": "Shrubs", "description": "Consensus prevalence (0–100%)" }, + { "@type": "PropertyValue", "name": "Herbaceous Vegetation", "description": "Consensus prevalence (0–100%)" }, + { "@type": "PropertyValue", "name": "Cultivated and Managed Vegetation", "description": "Consensus prevalence (0–100%)" }, + { "@type": "PropertyValue", "name": "Regularly Flooded Vegetation", "description": "Consensus prevalence (0–100%)" }, + { "@type": "PropertyValue", "name": "Urban/Built-up", "description": "Consensus prevalence (0–100%)" }, + { "@type": "PropertyValue", "name": "Snow/Ice", "description": "Consensus prevalence (0–100%)" }, + { "@type": "PropertyValue", "name": "Barren", "description": "Consensus prevalence (0–100%)" }, + { "@type": "PropertyValue", "name": "Open Water", "description": "Consensus prevalence (0–100%)" } + ], + "measurementTechnique": "Integration of multiple global remote sensing-derived land-cover products to estimate per-class consensus prevalence (percent) at 1-km resolution.", + "distribution": [ + { + "@type": "DataDownload", + "name": "Full Version 1.0 downloads (with DISCover) – directory", + "description": "Directory containing GeoTIFF layers (one per class) for Full Version 1.0 (with DISCover/GLCC).", + "contentUrl": "https://data.earthenv.org/consensus_landcover/with_DISCover/", + "encodingFormat": "text/html" + }, + { + "@type": "DataDownload", + "name": "Reduced Version 1.0 downloads (without DISCover) – directory", + "description": "Directory containing GeoTIFF layers (one per class) for Reduced Version 1.0 (without DISCover/GLCC).", + "contentUrl": "https://data.earthenv.org/consensus_landcover/without_DISCover/", + "encodingFormat": "text/html" + }, + { + "@type": "DataDownload", + "name": "Example class GeoTIFF (Full v1.0): class 1", + "description": "Example direct file link for a single land-cover class GeoTIFF in the Full v1.0 distribution.", + "contentUrl": "https://data.earthenv.org/consensus_landcover/with_DISCover/consensus_full_class_1.tif", + "encodingFormat": "image/tiff; application=geotiff" + }, + { + "@type": "DataDownload", + "name": "Example class GeoTIFF (Reduced v1.0): class 1", + "description": "Example direct file link for a single land-cover class GeoTIFF in the Reduced v1.0 distribution.", + "contentUrl": "https://data.earthenv.org/consensus_landcover/without_DISCover/Consensus_reduced_class_1.tif", + "encodingFormat": "image/tiff; application=geotiff" + } + ], + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "A global 1-km consensus land-cover product for biodiversity and ecosystem modeling", + "author": [ + { "@type": "Person", "name": "Mao-Ning Tuanmu" }, + { "@type": "Person", "name": "Walter Jetz" } + ], + "datePublished": "2014", + "isPartOf": { "@type": "Periodical", "name": "Global Ecology and Biogeography" }, + "identifier": [ + { "@type": "PropertyValue", "propertyID": "doi", "value": "10.1111/geb.12182" } + ], + "sameAs": "https://doi.org/10.1111/geb.12182" + } + ], + "about": [ + { "@type": "Thing", "name": "Land cover" }, + { "@type": "Thing", "name": "Remote sensing" }, + { "@type": "Thing", "name": "Biodiversity modelling" }, + { "@type": "Thing", "name": "Ecosystem modelling" } + ] +} diff --git a/data/objects/summoned/generated/Consensus_Land_Cover/prompt.txt b/data/objects/summoned/generated/Consensus_Land_Cover/prompt.txt new file mode 100644 index 0000000..bc0d88f --- /dev/null +++ b/data/objects/summoned/generated/Consensus_Land_Cover/prompt.txt @@ -0,0 +1,117 @@ +I need to create Schema.org JSON-LD descriptions for a scientific land cover dataset. + +**Website URL**: https://www.earthenv.org/landcover + +**Dataset Information**: +- Name: Consensus Land Cover (Global 1-km Consensus Land Cover) +- Group/Category: land_cover +- Description: A global 1-km consensus land-cover product for biodiversity and ecosystem modelling. The dataset integrates multiple global remote sensing-derived land-cover products and provides consensus information on the prevalence of 12 land-cover classes at 1-km resolution. +- Website: https://www.earthenv.org/landcover +- Coverage: Global (90°N to 56°S, 180°W to 180°E) +- Spatial Resolution: 30 arc-second (~1 km per pixel at the equator) +- License: Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) + +**Key Details**: +- Publisher: EarthEnv (Yale University, University of Florida, University of Buffalo) +- Creator: Mao-Ning Tuanmu, Walter Jetz +- Website: https://www.earthenv.org/landcover +- Coverage: Global (90°N to 56°S, 180°W to 180°E) +- Spatial Resolution: 30 arc-second (~1 km at equator) +- Temporal Coverage: Based on source products (2005-2006 for GlobCover, 1992-1993 for DISCover) +- License: CC BY-NC 4.0 + +**Dataset Versions**: +1. **Full Version 1.0** (with DISCover) + - Integrates: GlobCover (2005-06; v2.2), MODIS land-cover product (MCD12Q1; v051), GLC2000 (global product; v1.1), and DISCover (GLCC; v2) + - Recommended for most applications + +2. **Reduced Version 1.0** (without DISCover) + - Integrates: GlobCover, MODIS, GLC2000 (first three products only) + - Alternative for applications in regions with large land cover change in the past two decades + +**12 Land Cover Classes**: +1. Evergreen/Deciduous Needleleaf Trees +2. Evergreen Broadleaf Trees +3. Deciduous Broadleaf Trees +4. Mixed/Other Trees +5. Shrubs +6. Herbaceous Vegetation +7. Cultivated and Managed Vegetation +8. Regularly Flooded Vegetation +9. Urban/Built-up +10. Snow/Ice +11. Barren +12. Open Water + +**Data Format and Structure**: +- Each dataset version contains 12 data layers (one per land-cover class) +- All data layers contain unsigned 8-bit values (0-100, representing consensus prevalence in percentage) +- Format: GeoTIFF (20-100MB per class file) +- Spatial extent: 90°N to 56°S, 180°W to 180°E +- Spatial resolution: 30 arc-second per pixel (~1 km per pixel at the equator) + +**Methods**: +- Integrates multiple global remote sensing-derived land-cover products +- Provides consensus information on land-cover class prevalence +- Uses percentage values (0-100) to represent consensus prevalence + +**Data Access**: +- GeoTIFF format downloads for individual land-cover classes +- Full Version downloads: https://data.earthenv.org/consensus_landcover/with_DISCover/ +- Reduced Version downloads: https://data.earthenv.org/consensus_landcover/without_DISCover/ +- Each class available as separate GeoTIFF file (20-100MB each) + +**Citation**: +Tuanmu, M.-N. and W. Jetz. 2014. A global 1-km consensus land-cover product for biodiversity and ecosystem modeling. Global Ecology and Biogeography 23(9): 1031-1045. Data available on-line at http://www.earthenv.org/. + +**Instructions for Step 1.0 (WebPage Description)**: +Please create a Schema.org WebPage JSON-LD that describes the webpage itself. This should include: + +Required fields: +- @context: "https://schema.org/" +- @type: "WebPage" +- @id: The webpage URL with fragment identifier (e.g., "https://www.earthenv.org/landcover#webpage") +- name: "Global 1-km Consensus Land Cover" or similar webpage title +- description: Comprehensive description of what the webpage is about (the dataset and its purpose) +- url: Main webpage URL (https://www.earthenv.org/landcover) +- inLanguage: "en" + +Recommended fields: +- isPartOf: WebSite object with name "EarthEnv" and url "https://www.earthenv.org/" +- publisher: Organization object for EarthEnv, including member organizations (Yale University, University of Florida, University at Buffalo) +- about: Reference to the Dataset using @id (e.g., {"@type": "Dataset", "@id": "https://www.earthenv.org/landcover#dataset"}) +- mainEntity: Reference to the Dataset (same as about) +- keywords: Array of relevant keywords (land cover, consensus, remote sensing, biodiversity, ecosystem modeling, GeoTIFF, GlobCover, MODIS, GLC2000, DISCover, etc.) +- breadcrumb: BreadcrumbList with EarthEnv as parent and the landcover page as current + +The WebPage JSON-LD should describe the webpage that hosts information about the dataset, not the dataset itself. It should reference the dataset via @id links. + +**Instructions for Dataset JSON-LD**: +Please create a Schema.org Dataset JSON-LD that includes: +- @context, @type, @id, name, description, url +- creator: Mao-Ning Tuanmu, Walter Jetz +- publisher: EarthEnv / Yale University +- temporalCoverage: Based on source products (mention the range) +- spatialCoverage: Global (90°N to 56°S, 180°W to 180°E) - use bounding box format "west,south east,north" +- variableMeasured: List all 12 land cover classes +- distribution: Multiple DataDownload entries for: + - Full Version downloads (with DISCover) + - Reduced Version downloads (without DISCover) + - Individual class downloads +- encodingFormat: GeoTIFF +- version: Version 1.0 (mention both Full and Reduced versions) +- license: CC BY-NC 4.0 +- citation: Scientific publication citation +- measurementTechnique: Integration of multiple global remote sensing-derived land-cover products +- about: Land cover, biodiversity, ecosystem modeling +- spatialResolution: 30 arc-second (~1 km at equator) + +**Output Format**: +Provide the complete JSON-LD in a code block, properly formatted and valid JSON. + +**Important Notes**: +- This is a single comprehensive dataset (not a catalog) +- Two versions (Full and Reduced) are variants of the same product, not separate datasets +- 12 land cover classes are components of the dataset, not separate datasets +- All classes are available as separate GeoTIFF files +- Bounding box format: "-180,-56 180,90" (west,south east,north) diff --git a/data/objects/summoned/generated/Consensus_Land_Cover/webpage.jsonld b/data/objects/summoned/generated/Consensus_Land_Cover/webpage.jsonld new file mode 100644 index 0000000..19fdcc9 --- /dev/null +++ b/data/objects/summoned/generated/Consensus_Land_Cover/webpage.jsonld @@ -0,0 +1,70 @@ +{ + "@context": "https://schema.org/", + "@type": "WebPage", + "@id": "https://www.earthenv.org/landcover#webpage", + "name": "Global 1-km Consensus Land Cover", + "description": "Webpage for the Global 1-km Consensus Land Cover dataset, a global land-cover product that integrates multiple remote sensing-derived land-cover products and provides consensus information on the prevalence of 12 land-cover classes at 1-km resolution for biodiversity and ecosystem modeling. The dataset is available in two versions: Full Version 1.0 (with DISCover) and Reduced Version 1.0 (without DISCover), each containing 12 GeoTIFF layers representing consensus prevalence (0-100%) for different land-cover classes.", + "url": "https://www.earthenv.org/landcover", + "inLanguage": "en", + "isPartOf": { + "@type": "WebSite", + "name": "EarthEnv", + "url": "https://www.earthenv.org/" + }, + "publisher": { + "@type": "Organization", + "@id": "https://www.earthenv.org/#org", + "name": "EarthEnv", + "url": "https://www.earthenv.org/", + "member": [ + { "@type": "Organization", "name": "Yale University" }, + { "@type": "Organization", "name": "University of Florida" }, + { "@type": "Organization", "name": "University at Buffalo" } + ] + }, + "about": { + "@type": "Dataset", + "@id": "https://www.earthenv.org/landcover#dataset", + "name": "Consensus Land Cover (Global 1-km Consensus Land Cover)", + "description": "A global 1-km consensus land-cover product for biodiversity and ecosystem modelling", + "url": "https://www.earthenv.org/landcover" + }, + "mainEntity": { + "@type": "Dataset", + "@id": "https://www.earthenv.org/landcover#dataset", + "name": "Consensus Land Cover (Global 1-km Consensus Land Cover)", + "description": "A global 1-km consensus land-cover product for biodiversity and ecosystem modelling", + "url": "https://www.earthenv.org/landcover" + }, + "keywords": [ + "land cover", + "consensus", + "remote sensing", + "biodiversity", + "ecosystem modelling", + "global", + "1-km resolution", + "GeoTIFF", + "GlobCover", + "MODIS", + "GLC2000", + "DISCover" + ], + "breadcrumb": { + "@type": "BreadcrumbList", + "itemListElement": [ + { + "@type": "ListItem", + "position": 1, + "name": "EarthEnv", + "item": "https://www.earthenv.org/" + }, + { + "@type": "ListItem", + "position": 2, + "name": "Global 1-km Consensus Land Cover", + "item": "https://www.earthenv.org/landcover" + } + ] + } +} diff --git a/data/objects/summoned/generated/GFC/gfc.jsonld b/data/objects/summoned/generated/GFC/gfc.jsonld new file mode 100644 index 0000000..ff0430e --- /dev/null +++ b/data/objects/summoned/generated/GFC/gfc.jsonld @@ -0,0 +1,231 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/download.html#dataset", + "name": "Global Forest Change (GFC) 2000–2023 — GFC-2023-v1.11", + "description": "Global Forest Change (GFC) provides results from time-series analysis of Landsat imagery to characterize global forest extent and change from 2000 through 2023. The direct-download package is organized as 10×10 degree GeoTIFF tiles at ~30 m resolution (1 arc-second per pixel). Core layers include: tree canopy cover for year 2000 (percent canopy closure for vegetation >5 m), forest gain (2000–2012), year of gross forest cover loss (coded by loss year for 2001–2023), a data mask for land/water/no-data, and reference Landsat cloud-free composites for the first and last available years (typically ~2000 and ~2023) in red, NIR, SWIR1, and SWIR2 bands. Version 1.11 adds 2023 loss-year updates and updated multispectral imagery and includes methodological updates (including Landsat 8/9 use and reprocessing from 2011 onward).", + "url": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/download.html", + "version": "GFC-2023-v1.11", + "creator": [ + { "@type": "Person", "name": "Matthew C. Hansen" }, + { "@type": "Person", "name": "Peter V. Potapov" }, + { "@type": "Person", "name": "Rebecca Moore" }, + { "@type": "Person", "name": "Matt Hancher" }, + { "@type": "Person", "name": "Svetlana A. Turubanova" }, + { "@type": "Person", "name": "Alexandra Tyukavina" }, + { "@type": "Person", "name": "David Thau" }, + { "@type": "Person", "name": "Stephen V. Stehman" }, + { "@type": "Person", "name": "Scott J. Goetz" }, + { "@type": "Person", "name": "Thomas R. Loveland" }, + { "@type": "Person", "name": "Arun Kommareddy" }, + { "@type": "Person", "name": "Andrey Egorov" }, + { "@type": "Person", "name": "Lydia Chini" }, + { "@type": "Person", "name": "Christopher O. Justice" }, + { "@type": "Person", "name": "John R. G. Townshend" } + ], + "publisher": [ + { + "@type": "Organization", + "name": "Hansen/UMD/Google/USGS/NASA", + "url": "https://glad.earthengine.app/view/global-forest-change" + }, + { + "@type": "Organization", + "name": "Global Land Analysis and Discovery (GLAD) Laboratory, University of Maryland", + "url": "https://glad.umd.edu/" + } + ], + "temporalCoverage": "2000-01-01/2023-12-31", + "spatialCoverage": { + "@type": "Place", + "name": "Global (granules provided for 180W–180E, 80N–60S)", + "geo": { + "@type": "GeoShape", + "box": "-180,-60 180,80" + } + }, + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitCode": "MTR", + "description": "Approx. 30 m (1 arc-second per pixel; Earth Engine catalog lists ~30.92 m pixel size)." + }, + "keywords": [ + "forest monitoring", + "deforestation", + "forest loss", + "forest gain", + "tree canopy cover", + "Global Forest Change", + "GFC", + "Landsat", + "remote sensing", + "GLAD", + "University of Maryland", + "Google Earth Engine" + ], + "license": "https://creativecommons.org/licenses/by/4.0/", + "encodingFormat": [ + "image/tiff; application=geotiff", + "text/plain" + ], + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "treecover2000", + "unitText": "percent", + "description": "Tree canopy cover for year 2000 (0–100), defined as canopy closure for vegetation taller than 5 m." + }, + { + "@type": "PropertyValue", + "name": "gain", + "description": "Forest gain during 2000–2012 (binary: 1 gain, 0 no gain)." + }, + { + "@type": "PropertyValue", + "name": "lossyear", + "description": "Year of gross forest cover loss event (0 no loss; 1–23 correspond primarily to 2001–2023)." + }, + { + "@type": "PropertyValue", + "name": "datamask", + "description": "Data mask: 0 no data, 1 mapped land surface, 2 persistent water bodies (based on 2000–2012)." + }, + { + "@type": "PropertyValue", + "name": "first", + "description": "Circa year-2000 Landsat cloud-free composite in red, NIR, SWIR1, SWIR2 bands (median, quality-assessed growing-season observations)." + }, + { + "@type": "PropertyValue", + "name": "last", + "description": "Circa year-2023 Landsat cloud-free composite in red, NIR, SWIR1, SWIR2 bands (median, quality-assessed growing-season observations)." + }, + { + "@type": "PropertyValue", + "name": "loss (derived)", + "description": "Not released as a separate download layer in newer versions; corresponds to pixels where lossyear > 0." + } + ], + "measurementTechnique": [ + "Time-series analysis of Landsat imagery to map forest extent and change", + "Change detection for stand-replacement disturbance (forest to non-forest)", + "Annual updating of loss-year and reference imagery layers" + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "Direct download page (tile selection map and links)", + "description": "Interactive download instructions and example granule URLs for 10×10 degree GeoTIFF tiles.", + "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/download.html", + "encodingFormat": "text/html" + }, + { + "@type": "DataDownload", + "name": "Example tile (treecover2000) — 40N 080W", + "description": "Example GeoTIFF granule for tree canopy cover in year 2000; filenames encode the tile top-left corner.", + "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/Hansen_GFC-2023-v1.11_treecover2000_40N_080W.tif", + "encodingFormat": "image/tiff; application=geotiff" + }, + { + "@type": "DataDownload", + "name": "Example tile (gain) — 40N 080W", + "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/Hansen_GFC-2023-v1.11_gain_40N_080W.tif", + "encodingFormat": "image/tiff; application=geotiff" + }, + { + "@type": "DataDownload", + "name": "Example tile (lossyear) — 40N 080W", + "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/Hansen_GFC-2023-v1.11_lossyear_40N_080W.tif", + "encodingFormat": "image/tiff; application=geotiff" + }, + { + "@type": "DataDownload", + "name": "Example tile (datamask) — 40N 080W", + "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/Hansen_GFC-2023-v1.11_datamask_40N_080W.tif", + "encodingFormat": "image/tiff; application=geotiff" + }, + { + "@type": "DataDownload", + "name": "Example tile (first composite) — 40N 080W", + "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/Hansen_GFC-2023-v1.11_first_40N_080W.tif", + "encodingFormat": "image/tiff; application=geotiff" + }, + { + "@type": "DataDownload", + "name": "Example tile (last composite) — 40N 080W", + "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/Hansen_GFC-2023-v1.11_last_40N_080W.tif", + "encodingFormat": "image/tiff; application=geotiff" + }, + { + "@type": "DataDownload", + "name": "Layer-wide URL list: treecover2000", + "description": "Text file listing URLs for all granules for the treecover2000 layer.", + "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/treecover2000.txt", + "encodingFormat": "text/plain" + }, + { + "@type": "DataDownload", + "name": "Layer-wide URL list: gain", + "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/gain.txt", + "encodingFormat": "text/plain" + }, + { + "@type": "DataDownload", + "name": "Layer-wide URL list: lossyear", + "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/lossyear.txt", + "encodingFormat": "text/plain" + }, + { + "@type": "DataDownload", + "name": "Layer-wide URL list: datamask", + "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/datamask.txt", + "encodingFormat": "text/plain" + }, + { + "@type": "DataDownload", + "name": "Layer-wide URL list: first", + "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/first.txt", + "encodingFormat": "text/plain" + }, + { + "@type": "DataDownload", + "name": "Layer-wide URL list: last", + "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/last.txt", + "encodingFormat": "text/plain" + }, + { + "@type": "DataDownload", + "name": "Google Earth Engine access (asset)", + "description": "Analyze the dataset directly in Google Earth Engine using asset ID UMD/hansen/global_forest_change_2023_v1_11.", + "contentUrl": "https://developers.google.com/earth-engine/datasets/catalog/UMD_hansen_global_forest_change_2023_v1_11", + "encodingFormat": "text/html" + }, + { + "@type": "DataDownload", + "name": "Web visualization (recommended linking URL)", + "description": "Interactive visualization site recommended by the dataset providers for linking/citation.", + "contentUrl": "https://glad.earthengine.app/view/global-forest-change", + "encodingFormat": "text/html" + } + ], + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "High-Resolution Global Maps of 21st-Century Forest Cover Change", + "isPartOf": { "@type": "Periodical", "name": "Science" }, + "datePublished": "2013-11-15", + "identifier": [ + { "@type": "PropertyValue", "propertyID": "doi", "value": "10.1126/science.1244693" } + ], + "sameAs": "https://doi.org/10.1126/science.1244693" + } + ], + "about": [ + { "@type": "Thing", "name": "Forest monitoring" }, + { "@type": "Thing", "name": "Deforestation" }, + { "@type": "Thing", "name": "Land cover change" }, + { "@type": "Thing", "name": "Remote sensing" }, + { "@type": "Thing", "name": "Landsat" } + ] +} diff --git a/data/objects/summoned/generated/GFC/prompt.txt b/data/objects/summoned/generated/GFC/prompt.txt new file mode 100644 index 0000000..4c679e4 --- /dev/null +++ b/data/objects/summoned/generated/GFC/prompt.txt @@ -0,0 +1,80 @@ +I need to create Schema.org JSON-LD descriptions for a scientific forest monitoring dataset. + +**Website URL**: https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/download.html + +**Dataset Information**: +- Name: GFC (Global Forest Change) +- Group/Category: land_cover +- Description: Global forest extent and change +- Website: https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/download.html +- Version: GFC-2023-v1.11 (suggests 2023 data, version 1.11) +- Organization: Hansen Global Forest Change (Google Earth Engine Partners) + +**Expected Details** (Hansen Global Forest Change typically includes): +- Creator: Matthew C. Hansen and collaborators (University of Maryland, Google) +- Publisher: Google Earth Engine Partners / University of Maryland +- Coverage: Global +- Temporal Coverage: Typically 2000-2023 (or based on version number) +- Variables: + - Tree cover extent (baseline, typically year 2000) + - Forest loss (yearly from 2000 onward) + - Forest gain (2000-2012 or similar period) + - Tree cover loss year +- Format: GeoTIFF, typically available via Google Earth Engine or direct downloads +- Spatial Resolution: Typically 30m (Landsat-based) + +**Note**: The URL appears to be a Google Cloud Storage download page. Please browse/analyze the actual webpage to verify: +- Available data products/layers +- Temporal coverage +- Spatial coverage and resolution +- File formats +- License information +- Download methods +- Citation information + +**Instructions for Step 1.0 (WebPage Description)**: +Please create a Schema.org WebPage JSON-LD that describes the webpage itself. This should include: + +Required fields: +- @context: "https://schema.org/" +- @type: "WebPage" +- @id: The webpage URL with fragment identifier (e.g., "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/download.html#webpage") +- name: "Global Forest Change" or similar webpage title +- description: Comprehensive description of what the webpage is about (the dataset and its purpose) +- url: Main webpage URL +- inLanguage: "en" + +Recommended fields: +- isPartOf: WebSite object if applicable +- publisher: Organization object (Google Earth Engine Partners, University of Maryland, etc.) +- about: Reference to the Dataset using @id +- mainEntity: Reference to the Dataset (same as about) +- keywords: Array of relevant keywords (forest, deforestation, forest loss, tree cover, global, remote sensing, Landsat, etc.) + +The WebPage JSON-LD should describe the webpage that hosts information about the dataset, not the dataset itself. It should reference the dataset via @id links. + +**Instructions for Dataset JSON-LD**: +Please create a Schema.org Dataset JSON-LD that includes: +- @context, @type, @id, name, description, url +- creator: Matthew C. Hansen and collaborators (verify from website) +- publisher: Google Earth Engine Partners / University of Maryland (verify from website) +- temporalCoverage: Based on dataset version (likely 2000-2023 or similar) +- spatialCoverage: Global - use bounding box format "west,south east,north" (likely "-180,-90 180,90" for global) +- variableMeasured: List data layers/variables (tree cover, forest loss, forest gain, etc.) +- distribution: Multiple DataDownload entries for available download options +- encodingFormat: GeoTIFF or other formats available +- version: GFC-2023-v1.11 (or verify from website) +- license: Verify from website +- citation: Scientific publication citation (Hansen et al., verify from website) +- measurementTechnique: Remote sensing using Landsat imagery, change detection methods +- about: Forest monitoring, deforestation, forest change, global mapping +- spatialResolution: Typically 30m (Landsat-based, verify from website) + +**Output Format**: +Provide the complete JSON-LD in a code block, properly formatted and valid JSON. + +**Important Notes**: +- This appears to be a single comprehensive dataset (not a catalog) +- Multiple data layers (tree cover, loss, gain) are components of the same dataset +- Bounding box format: "-180,-90 180,90" (west,south east,north) for global coverage +- Verify all details from the actual website as this is a Google Cloud Storage page diff --git a/data/objects/summoned/generated/GFC/webpage.jsonld b/data/objects/summoned/generated/GFC/webpage.jsonld new file mode 100644 index 0000000..5b8d80f --- /dev/null +++ b/data/objects/summoned/generated/GFC/webpage.jsonld @@ -0,0 +1,50 @@ +{ + "@context": "https://schema.org/", + "@type": "WebPage", + "@id": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/download.html#webpage", + "name": "Global Forest Change 2000–2023 (GFC-2023-v1.11) — Data Download", + "description": "Download page for the Hansen/GLAD Global Forest Change (GFC) dataset version GFC-2023-v1.11. The page provides direct-download links for 10×10 degree GeoTIFF tiles and layer-wide URL lists for global forest extent and change products derived from time-series analysis of Landsat imagery, including year-2000 tree canopy cover, forest gain (2000–2012), annual forest loss year (2001–2023), a data mask, and reference Landsat composites for the first and last available years (typically ~2000 and ~2023). It also includes version 1.11 user notes, license/attribution guidance, and pointers to web visualization and Google Earth Engine access.", + "url": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/download.html", + "inLanguage": "en", + "isPartOf": { + "@type": "WebSite", + "@id": "https://storage.googleapis.com/#website", + "name": "Google Cloud Storage", + "url": "https://storage.googleapis.com/" + }, + "publisher": [ + { + "@type": "Organization", + "name": "Global Land Analysis and Discovery (GLAD) Laboratory, University of Maryland", + "url": "https://glad.umd.edu/" + }, + { + "@type": "Organization", + "name": "Google Earth Engine", + "url": "https://earthengine.google.com/" + } + ], + "keywords": [ + "Global Forest Change", + "GFC", + "forest", + "deforestation", + "forest loss", + "forest gain", + "tree cover", + "Landsat", + "remote sensing", + "global mapping", + "GeoTIFF", + "GLAD", + "University of Maryland" + ], + "about": { + "@type": "Dataset", + "@id": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/download.html#dataset" + }, + "mainEntity": { + "@type": "Dataset", + "@id": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/download.html#dataset" + } +} diff --git a/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld b/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld new file mode 100644 index 0000000..388a0a6 --- /dev/null +++ b/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld @@ -0,0 +1,132 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://elischolar.library.yale.edu/yale_fes_data/1/#dataset", + "name": "Global Tree Density (Global tree density map)", + "description": "Global tree density at a global scale provided as two spatially continuous raster maps of tree density. One map was generated using biome-level linear regression models and applied at the biome scale; the second map was generated using ecoregion-level linear regression models and applied at the ecoregion scale. The models were built using over 420,000 ground-sourced estimates of tree density and predictor variables spanning vegetative, climatic, topographic, and anthropogenic factors. The creators note that transitions at biome/ecoregion boundaries may appear abrupt and that estimates are generally more robust at country scale (or larger) than at individual pixel scale. The primary distribution is a zipped ArcGIS File Geodatabase package containing both raster models plus supporting ArcGIS layer and map-document files; additional files include a revision adding predictions for small islands and a WGS84 GeoTIFF derivative for the revision.", + "url": "https://elischolar.library.yale.edu/yale_fes_data/1/", + "creator": [ + { "@type": "Person", "name": "T. W. Crowther", "affiliation": { "@type": "Organization", "name": "Yale University" } }, + { "@type": "Person", "name": "H. B. Glick", "affiliation": { "@type": "Organization", "name": "Yale University" } }, + { "@type": "Person", "name": "K. R. Covey", "affiliation": { "@type": "Organization", "name": "Yale University" } }, + { "@type": "Person", "name": "G. Amatulli", "affiliation": { "@type": "Organization", "name": "Yale University" } }, + { "@type": "Person", "name": "M.-N. Tuanmu", "affiliation": { "@type": "Organization", "name": "Yale University" } }, + { "@type": "Person", "name": "W. Jetz", "affiliation": { "@type": "Organization", "name": "Yale University" } } + ], + "publisher": { + "@type": "Organization", + "name": "EliScholar (Yale School of the Environment)", + "url": "https://elischolar.library.yale.edu/yale_fes_data/", + "parentOrganization": { + "@type": "Organization", + "name": "Yale University", + "url": "https://www.yale.edu/" + } + }, + "datePublished": "2015-09-02", + "temporalCoverage": "2015-09-02", + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "spatialResolution": "Best suited for country-scale (or larger) summaries; pixel-level precision is less reliable per creators' guidance.", + "keywords": [ + "tree density", + "forest", + "global map", + "biome", + "ecoregion", + "biodiversity", + "conservation", + "GIS", + "ArcGIS", + "GeoTIFF", + "spatial modeling" + ], + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "Tree density (biome-level model)", + "description": "Biome-level regression model predictions applied at biome scale." + }, + { + "@type": "PropertyValue", + "name": "Tree density (ecoregion-level model)", + "description": "Ecoregion-level regression model predictions applied at ecoregion scale." + } + ], + "measurementTechnique": [ + "Compilation of >420,000 ground-sourced tree density estimates", + "Linear regression modeling using vegetative, climatic, topographic, and anthropogenic predictors", + "Modeling in R; mapping in R and ArcGIS 10.1" + ], + "funding": [ + { + "@type": "Organization", + "name": "Yale Climate and Energy Institute" + }, + { + "@type": "Organization", + "name": "British Ecological Society" + } + ], + "license": "https://creativecommons.org/licenses/by-nd/4.0/", + "encodingFormat": [ + "application/zip", + "application/x-esri-geodatabase", + "image/tiff; application=geotiff" + ], + "version": "2015-09-02 release; includes biome-level and ecoregion-level models; additional files provide Revision_01 (small islands) and a WGS84 GeoTIFF derivative for Revision_01.", + "distribution": [ + { + "@type": "DataDownload", + "name": "Primary download (ZIP; ArcGIS File Geodatabase + supporting ArcGIS files)", + "description": "Primary dataset package (zipped) containing an ArcGIS File Geodatabase (.gdb) with two rasters (biome-level and ecoregion-level), plus supporting ArcGIS layer files (.lyr) and a map document (.mxd).", + "contentUrl": "https://elischolar.library.yale.edu/context/yale_fes_data/article/1000/type/native/viewcontent", + "encodingFormat": "application/zip" + }, + { + "@type": "DataDownload", + "name": "Revision 01 (small islands) ZIP", + "description": "Revision_01 adds tree density predictions for small islands not included in the primary download; follows the original file structure.", + "contentUrl": "https://elischolar.library.yale.edu/cgi/viewcontent.cgi?article=1000&context=yale_fes_data&filename=0&type=additional", + "encodingFormat": "application/zip" + }, + { + "@type": "DataDownload", + "name": "Revision 01 WGS84 GeoTIFF ZIP", + "description": "Revision_01 biome-level model stored in WGS84 GeoTIFF format (reprojected from Goode Homolosine using nearest-neighbor resampling); includes the primary .tif and visualization support files.", + "contentUrl": "https://elischolar.library.yale.edu/cgi/viewcontent.cgi?article=1000&context=yale_fes_data&filename=1&type=additional", + "encodingFormat": "application/zip" + } + ], + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "Mapping tree density at a global scale", + "author": [ + { "@type": "Person", "name": "T. W. Crowther" }, + { "@type": "Person", "name": "H. B. Glick" }, + { "@type": "Person", "name": "K. R. Covey" }, + { "@type": "Person", "name": "et al." } + ], + "isPartOf": { "@type": "Periodical", "name": "Nature" }, + "datePublished": "2015-09-10", + "identifier": [ + { "@type": "PropertyValue", "propertyID": "doi", "value": "10.1038/nature14967" } + ], + "sameAs": "https://doi.org/10.1038/nature14967" + } + ], + "about": [ + { "@type": "Thing", "name": "Tree density" }, + { "@type": "Thing", "name": "Forests" }, + { "@type": "Thing", "name": "Land cover" }, + { "@type": "Thing", "name": "Biodiversity" }, + { "@type": "Thing", "name": "Global environmental mapping" } + ] +} diff --git a/data/objects/summoned/generated/Global_Tree_Density/prompt.txt b/data/objects/summoned/generated/Global_Tree_Density/prompt.txt new file mode 100644 index 0000000..afb1ffa --- /dev/null +++ b/data/objects/summoned/generated/Global_Tree_Density/prompt.txt @@ -0,0 +1,99 @@ +I need to create Schema.org JSON-LD descriptions for a scientific tree density dataset. + +**Website URL**: https://elischolar.library.yale.edu/yale_fes_data/1/ + +**Dataset Information**: +- Name: Global Tree Density (Global tree density map) +- Group/Category: land_cover +- Description: Tree density at a global scale. Two global maps (raster files) of tree density highlighting how the number of trees varies across the world. One map generated using biome-level models and applied at the biome scale. The other map generated using ecoregion-level models and applied at the ecoregion scale. +- Website: https://elischolar.library.yale.edu/yale_fes_data/1/ +- Coverage: Global +- License: Creative Commons Attribution-No Derivative Works 4.0 International (CC BY-ND 4.0) + +**Key Details**: +- Publisher: EliScholar / Yale School of the Environment +- Creator: T. W. Crowther (lead author) and many co-authors from Yale University and other institutions +- Publication Date: 2015-09-02 +- Coverage: Global dataset +- License: CC BY-ND 4.0 + +**Two Models**: +1. **Biome-level model** - tree density estimates applied at the biome scale (featured more prominently in publication) +2. **Ecoregion-level model** - tree density estimates applied at the ecoregion scale + +**Methodology**: +- Collected over 420,000 ground-source estimates of tree density from around the world +- Constructed linear regression models using vegetative, climatic, topographic, and anthropogenic variables +- Modeling done in R, mapping done in R and ArcGIS 10.1 +- Estimates are more robust at country-scale (or larger) than individual pixel-level +- Transitions between biomes/ecoregions may be unrealistically harsh, but large-scale estimates are robust + +**File Formats**: +- Primary: ArcGIS File Geodatabase (.gdb) containing both models +- Additional: Revision 01 (small islands), GeoTIFF versions in WGS84 +- Coordinate system: Goode Homolosine interrupted projected coordinate system (original) +- For visualization: Needs reprojection to Eckert III projected coordinate system + +**Distribution**: +- Primary download: ZIP file containing ArcGIS .gdb files, layer files (.lyr), and map document (.mxd) +- Revision 01: Contains tree density predictions for small islands not in primary download +- GeoTIFF version: WGS84 GeoTIFF format (reprojected from Goode Homolosine) + +**Citation**: +Crowther, T. W., Glick, H. B., Covey, K. R., et al. (2015). Mapping tree density at a global scale. Nature, 525(7568), 201-205. DOI: 10.1038/nature14967 + +**Funding**: +Yale Climate and Energy Institute; British Ecological Society + +**Instructions for Step 1.0 (WebPage Description)**: +Please create a Schema.org WebPage JSON-LD that describes the webpage itself. This should include: + +Required fields: +- @context: "https://schema.org/" +- @type: "WebPage" +- @id: The webpage URL with fragment identifier (e.g., "https://elischolar.library.yale.edu/yale_fes_data/1/#webpage") +- name: "Global tree density map" or similar webpage title +- description: Comprehensive description of what the webpage is about (the dataset and its purpose) +- url: Main webpage URL (https://elischolar.library.yale.edu/yale_fes_data/1/) +- inLanguage: "en" + +Recommended fields: +- isPartOf: WebSite object with name "EliScholar" and information about Yale digital repository +- publisher: Organization object for Yale School of the Environment / Yale University +- about: Reference to the Dataset using @id (e.g., {"@type": "Dataset", "@id": "https://elischolar.library.yale.edu/yale_fes_data/1/#dataset"}) +- mainEntity: Reference to the Dataset (same as about) +- keywords: Array of relevant keywords (tree density, forest, global, remote sensing, biodiversity, GIS, ArcGIS, GeoTIFF, etc.) + +The WebPage JSON-LD should describe the webpage that hosts information about the dataset, not the dataset itself. It should reference the dataset via @id links. + +**Instructions for Dataset JSON-LD**: +Please create a Schema.org Dataset JSON-LD that includes: +- @context, @type, @id, name, description, url +- creator: T. W. Crowther (lead) and list of co-authors (can include major contributors or reference the full list) +- publisher: EliScholar / Yale School of the Environment / Yale University +- datePublished: "2015-09-02" +- temporalCoverage: Single snapshot (2015 or based on data collection period) +- spatialCoverage: Global - use bounding box format "west,south east,north" (likely "-180,-90 180,90" for global) +- variableMeasured: Tree density (biome-level model), Tree density (ecoregion-level model) +- distribution: Multiple DataDownload entries for: + - Primary download (ArcGIS .gdb files) + - Revision 01 (small islands) + - GeoTIFF versions (WGS84) +- encodingFormat: ArcGIS File Geodatabase, GeoTIFF +- version: Mention both models (biome-level and ecoregion-level) and Revision 01 +- license: CC BY-ND 4.0 +- citation: Scientific publication citation (Nature paper) +- measurementTechnique: Linear regression modeling using ground-source estimates and environmental variables +- about: Tree density, forest, global mapping, biodiversity +- spatialResolution: Mention that estimates are more reliable at country-scale or larger +- funding: Yale Climate and Energy Institute, British Ecological Society + +**Output Format**: +Provide the complete JSON-LD in a code block, properly formatted and valid JSON. + +**Important Notes**: +- This is a single comprehensive dataset (not a catalog) +- Two models (biome-level and ecoregion-level) are variants of the same product, not separate datasets +- Additional files (Revision 01, GeoTIFF) are alternative formats/revisions, not separate datasets +- Bounding box format: "-180,-90 180,90" (west,south east,north) for global coverage +- Multiple creators - can list key authors or reference the full list from the publication diff --git a/data/objects/summoned/generated/Global_Tree_Density/webpage.jsonld b/data/objects/summoned/generated/Global_Tree_Density/webpage.jsonld new file mode 100644 index 0000000..f28fa06 --- /dev/null +++ b/data/objects/summoned/generated/Global_Tree_Density/webpage.jsonld @@ -0,0 +1,49 @@ +{ + "@context": "https://schema.org/", + "@type": "WebPage", + "@id": "https://elischolar.library.yale.edu/yale_fes_data/1/#webpage", + "name": "Global tree density map", + "description": "This webpage in EliScholar (Yale's institutional repository) hosts the Global tree density map dataset associated with Crowther et al. (2015). It describes two global raster maps of tree density (biome-level and ecoregion-level models), documents the underlying methodology (ground-based plot data and regression modeling), provides viewing/format notes for GIS use (ArcGIS File Geodatabase and GeoTIFF derivatives), and offers download access to the primary dataset package and additional revised/alternative-format files.", + "url": "https://elischolar.library.yale.edu/yale_fes_data/1/", + "inLanguage": "en", + "isPartOf": { + "@type": "WebSite", + "@id": "https://elischolar.library.yale.edu/#website", + "name": "EliScholar", + "url": "https://elischolar.library.yale.edu/", + "description": "Yale's digital repository for scholarly works and research outputs." + }, + "publisher": { + "@type": "Organization", + "name": "Yale School of the Environment", + "url": "https://elischolar.library.yale.edu/yale_fes_data/", + "parentOrganization": { + "@type": "Organization", + "name": "Yale University", + "url": "https://www.yale.edu/" + } + }, + "keywords": [ + "tree density", + "forest", + "global", + "biodiversity", + "GIS", + "ArcGIS", + "File Geodatabase", + "GeoTIFF", + "ecoregion", + "biome", + "spatial model", + "Crowther", + "Jetz" + ], + "about": { + "@type": "Dataset", + "@id": "https://elischolar.library.yale.edu/yale_fes_data/1/#dataset" + }, + "mainEntity": { + "@type": "Dataset", + "@id": "https://elischolar.library.yale.edu/yale_fes_data/1/#dataset" + } +} diff --git a/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld b/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld new file mode 100644 index 0000000..07e4627 --- /dev/null +++ b/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld @@ -0,0 +1,155 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/", + "name": "MERIT DEM (Multi-Error-Removed Improved-Terrain Digital Elevation Model)", + "description": "MERIT DEM is a high-accuracy global digital elevation model (DEM) at 3 arc-second resolution (~90 m at the equator), developed by removing multiple error components from existing spaceborne DEMs. The developers separated and eliminated absolute bias, stripe noise, speckle noise, and tree height bias using multiple satellite datasets and filtering techniques. MERIT DEM was created by processing baseline DEMs including NASA SRTM3 DEM v2.1, JAXA AW3D-30m DEM v1, and Viewfinder Panoramas' DEM. After error removal, land areas mapped with 2 m or better vertical accuracy increased from 39% to 58%. The dataset represents terrain elevations in meters referenced to WGS84 horizontal datum and the EGM96 geoid, covering global land areas between 90°N and 60°S. Data are organized as 5°×5° tiles (6000×6000 pixels) and distributed in 30°×30° packages; filenames encode the center of the lower-left pixel (e.g., \"n30w120_dem.tif\" covers N30–N35 and W120–W115) and package names encode the lower-left corner of the 30°×30° domain (e.g., \"dem_tif_n30w120.tar\" contains tiles for N30–N60 and W120–W090).", + "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/", + "keywords": [ + "topography", + "Digital Elevation Model", + "DEM", + "terrain elevation", + "geoscience", + "hydrology", + "SRTM", + "AW3D", + "global elevation", + "EGM96", + "WGS84", + "MERIT" + ], + "creator": [ + { + "@type": "Person", + "name": "Dai Yamazaki", + "email": "mailto:yamadai@iis.u-tokyo.ac.jp", + "affiliation": { + "@type": "Organization", + "name": "Institute of Industrial Science, The University of Tokyo", + "url": "https://www.iis.u-tokyo.ac.jp/en/" + } + }, + { + "@type": "Organization", + "name": "Institute of Industrial Science, The University of Tokyo", + "url": "https://www.iis.u-tokyo.ac.jp/en/" + } + ], + "publisher": { + "@type": "Organization", + "name": "The University of Tokyo", + "url": "https://www.u-tokyo.ac.jp/en/" + }, + "datePublished": "2018-10-15", + "version": "v1.0.3", + "license": [ + { + "@type": "CreativeWork", + "name": "Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0)", + "url": "https://creativecommons.org/licenses/by-nc/4.0/" + }, + { + "@type": "CreativeWork", + "name": "Open Data Commons Open Database License 1.0 (ODbL 1.0)", + "url": "https://opendatacommons.org/licenses/odbl/1-0/" + } + ], + "spatialCoverage": { + "@type": "Place", + "name": "Global land areas between 90°N and 60°S", + "geo": { + "@type": "GeoShape", + "box": "-180,-60 180,90" + } + }, + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "Elevation", + "unitText": "meter", + "description": "Terrain elevation in meters referenced to WGS84 and the EGM96 geoid." + } + ], + "measurementTechnique": [ + "Error removal from baseline spaceborne DEMs (absolute bias, stripe noise, speckle noise, tree height bias)", + "Filtering and correction using multiple satellite and ancillary datasets" + ], + "encodingFormat": [ + "application/octet-stream (ESRI EHdr/FLT raster with HDR; 4-byte float, little endian)", + "image/tiff; application=geotiff", + "application/octet-stream (MRR, MapInfo Pro raster)" + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "MERIT DEM – ESRI EHdr (FLT) packages (30°×30° tar.gz)", + "description": "Elevation tiles distributed as 30°×30° packages containing 5°×5° tiles (6000×6000 pixels). ESRI FLT rasters with HDR (Fortran Direct Access / ESRI FLT style; 4-byte float, little endian). Package names encode the lower-left corner (e.g., dem_flt_n30w120.tar.gz contains tiles for N30–N60, W120–W090). Filenames encode the center of the lower-left pixel (e.g., n30w120_dem.* covers N30–N35, W120–W115). Access may require registration/password per the provider instructions.", + "encodingFormat": "application/octet-stream", + "contentUrl": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/dem_flt_n30w120.tar.gz", + "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" + }, + { + "@type": "DataDownload", + "name": "MERIT DEM – GeoTIFF packages (30°×30° tar)", + "description": "Elevation tiles distributed as 30°×30° packages containing 5°×5° GeoTIFF tiles (6000×6000 pixels). Package names encode the lower-left corner (e.g., dem_tif_n30w120.tar contains tiles for N30–N60, W120–W090). Individual tile filenames encode the center of the lower-left pixel (e.g., n30w120_dem.tif covers N30–N35, W120–W115). Access may require registration/password per the provider instructions.", + "encodingFormat": "image/tiff; application=geotiff", + "contentUrl": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/dem_tif_n30w120.tar", + "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" + }, + { + "@type": "DataDownload", + "name": "MERIT DEM – MRR (single merged raster)", + "description": "Single merged MRR raster created by merging all MERIT DEM source tiles; vertical resolution converted to 0.01 meters by rounding to the nearest centimeter. MRR rasters can be displayed in the MapInfo Pro GIS platform. Prepared by Sam Roberts (Roberts Geospatial).", + "encodingFormat": "application/octet-stream", + "contentUrl": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/MERIT_DEM.mrr", + "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" + }, + { + "@type": "DataDownload", + "name": "MERIT DEM – 5°×5° tile download page (GeoTIFF tiles)", + "description": "Alternative download page for individual 5°×5° tiles (useful when large package downloads are difficult). Tile filenames encode the center of the lower-left pixel (e.g., n30w120_dem.tif covers N30–N35, W120–W115). Access may require registration/password per the provider instructions.", + "encodingFormat": "text/html", + "contentUrl": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/list_5deg.html", + "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/list_5deg.html" + } + ], + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "A high accuracy map of global terrain elevations", + "author": [ + { "@type": "Person", "name": "D. Yamazaki" }, + { "@type": "Person", "name": "D. Ikeshima" }, + { "@type": "Person", "name": "R. Tawatari" }, + { "@type": "Person", "name": "T. Yamaguchi" }, + { "@type": "Person", "name": "F. O'Loughlin" }, + { "@type": "Person", "name": "J. C. Neal" }, + { "@type": "Person", "name": "C. C. Sampson" }, + { "@type": "Person", "name": "S. Kanae" }, + { "@type": "Person", "name": "P. D. Bates" } + ], + "isPartOf": { + "@type": "Periodical", + "name": "Geophysical Research Letters" + }, + "datePublished": "2017", + "pagination": "5844-5853", + "volumeNumber": "44", + "identifier": [ + { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.1002/2017GL072874" + } + ], + "sameAs": "https://doi.org/10.1002/2017GL072874" + } + ], + "about": [ + { "@type": "Thing", "name": "Terrain elevation" }, + { "@type": "Thing", "name": "Topography" }, + { "@type": "Thing", "name": "Hydrology" }, + { "@type": "Thing", "name": "Geoscience applications" } + ] +} diff --git a/data/objects/summoned/generated/MERIT_DEM/prompt.txt b/data/objects/summoned/generated/MERIT_DEM/prompt.txt new file mode 100644 index 0000000..0a180f7 --- /dev/null +++ b/data/objects/summoned/generated/MERIT_DEM/prompt.txt @@ -0,0 +1,59 @@ +I need to create a Schema.org Dataset JSON-LD description for a scientific dataset. + +**Website URL**: http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/ + +**Dataset Information**: +- Name: MERIT DEM (Multi-Error-Removed Improved-Terrain Digital Elevation Model) +- Group/Category: topography +- Description: A high accuracy global DEM at 3 arcsecond resolution (~90 m at the equator) developed by removing multiple error components (absolute bias, stripe noise, speckle noise, and tree height bias) from existing spaceborne DEMs (NASA SRTM3 DEM v2.1, JAXA AW3D-30m DEM v1, Viewfinder Panoramas' DEM). After error removal, land areas mapped with 2 m or better vertical accuracy were increased from 39% to 58%. + +**Key Details**: +- Publisher: University of Tokyo, Institute of Industrial Science +- Creator: Dai Yamazaki (yamadai@iis.u-tokyo.ac.jp) +- Date Published: October 15, 2018 (v1.0.3) +- License: Creative Commons CC-BY-NC 4.0 or Open Database License (ODbL 1.0) - dual license +- Spatial Coverage: Land areas between 90°N-60°S +- Resolution: 3 arcsecond (~90m at the equator) +- Data Format: Elevation in meters, referenced to WGS84 and EGM96 geoid +- Data Organization: 5 degree × 5 degree tiles, packaged into 30 degree × 30 degree packages +- Available Formats: ESRI EHdr (FLT), GeoTIFF, MRR + +**File Naming Convention**: +- Individual tiles: Filename represents the center of the lower left pixel (e.g., "n30w120_dem.tif" covers N30-N35, W120-W115) +- Packages: Package name represents the lower left corner (e.g., "dem_tif_n30w120.tar" contains files in domain N30-N60, W120-W090) + +**Citation**: +Yamazaki D., D. Ikeshima, R. Tawatari, T. Yamaguchi, F. O'Loughlin, J.C. Neal, C.C. Sampson, S. Kanae & P.D. Bates (2017). A high accuracy map of global terrain elevations. Geophysical Research Letters, vol.44, pp.5844-5853, doi: 10.1002/2017GL072874 + +**Instructions**: +Please create a complete Schema.org Dataset JSON-LD that includes: +- @context: "https://schema.org/" +- @type: "Dataset" +- @id: The dataset URL +- name: Full dataset name +- description: Comprehensive description of the dataset +- url: Main dataset webpage URL +- creator: Organization and/or person (University of Tokyo, Institute of Industrial Science; Dai Yamazaki) +- publisher: Organization (University of Tokyo) +- datePublished: Publication date +- version: Current version (v1.0.3) +- license: Include both license options (CC-BY-NC 4.0 and ODbL 1.0) +- keywords: Relevant keywords (e.g., topography, Digital Elevation Model, terrain elevation, geoscience, hydrology, SRTM, AW3D) +- spatialCoverage: Geographic coverage (90°N to 60°S) +- distribution: Include multiple DataDownload entries for: + - Different formats (ESRI EHdr, GeoTIFF, MRR) + - Note the file naming convention and spatial organization + - Include the main download page URL +- citation: Scientific publication citation +- encodingFormat: List available formats (ESRI FLT, GeoTIFF, MRR) +- temporalCoverage: If applicable +- about: What the dataset is about (terrain elevation, hydrology, geoscience applications) + +**Output Format**: +Provide the complete JSON-LD in a code block, properly formatted and valid JSON. + +**Important Notes**: +- This is a single dataset with multiple distribution options (formats and spatial regions) +- The dataset is organized as spatial tiles, but it represents one cohesive dataset product +- Include all relevant metadata from the website +- Ensure the JSON-LD is valid and follows Schema.org Dataset schema diff --git a/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld b/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld new file mode 100644 index 0000000..c56d33e --- /dev/null +++ b/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld @@ -0,0 +1,70 @@ +{ + "@context": "https://schema.org/", + "@type": "WebPage", + "@id": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/", + "name": "MERIT DEM: Multi-Error-Removed Improved-Terrain DEM", + "description": "Webpage for the MERIT DEM (Multi-Error-Removed Improved-Terrain Digital Elevation Model) dataset. MERIT DEM is a high-accuracy global digital elevation model at 3 arc-second resolution (~90 m at the equator), developed by removing multiple error components from existing spaceborne DEMs. The site provides information about the dataset, download instructions, licensing, and citation information.", + "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/", + "inLanguage": "en", + "isPartOf": { + "@type": "WebSite", + "name": "University of Tokyo, Institute of Industrial Science", + "url": "https://www.iis.u-tokyo.ac.jp/en/" + }, + "about": { + "@type": "Dataset", + "name": "MERIT DEM (Multi-Error-Removed Improved-Terrain Digital Elevation Model)", + "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" + }, + "publisher": { + "@type": "Organization", + "name": "The University of Tokyo", + "subOrganization": { + "@type": "Organization", + "name": "Institute of Industrial Science" + }, + "url": "https://www.u-tokyo.ac.jp/en/" + }, + "datePublished": "2018-10-15", + "dateModified": "2018-10-15", + "lastReviewed": "2018-10-15", + "mainEntity": { + "@type": "Dataset", + "name": "MERIT DEM (Multi-Error-Removed Improved-Terrain Digital Elevation Model)", + "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" + }, + "breadcrumb": { + "@type": "BreadcrumbList", + "itemListElement": [ + { + "@type": "ListItem", + "position": 1, + "name": "University of Tokyo", + "item": "https://www.u-tokyo.ac.jp/en/" + }, + { + "@type": "ListItem", + "position": 2, + "name": "Institute of Industrial Science", + "item": "https://www.iis.u-tokyo.ac.jp/en/" + }, + { + "@type": "ListItem", + "position": 3, + "name": "MERIT DEM", + "item": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" + } + ] + }, + "keywords": [ + "MERIT DEM", + "Digital Elevation Model", + "topography", + "terrain elevation", + "geoscience", + "hydrology", + "global DEM", + "SRTM", + "AW3D" + ] +} diff --git a/data/objects/summoned/generated/MERIT_DEM_956de6b6.jsonld b/data/objects/summoned/generated/MERIT_DEM_956de6b6.jsonld deleted file mode 100644 index d5b69f8..0000000 --- a/data/objects/summoned/generated/MERIT_DEM_956de6b6.jsonld +++ /dev/null @@ -1,45 +0,0 @@ -{ - "@context": { - "@vocab": "https://schema.org/" - }, - "@type": "Dataset", - "@id": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/", - "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/", - "name": "MERIT DEM", - "description": "Multi-Error-Removed Improved-Terrain Digital Elevation Model developed by removing multiple error components (absolute bias, stripe noise, speckle noise, and tree height bias) from existing spaceborne DEMs (SRTM3 v2.1 and AW3D-30m v1). Represents terrain elevations at 3 arcsecond resolution (~90m at the equator), covering land areas between 90N-60S, referenced to EGM96 geoid. After error removal, land areas mapped with 2 m or better vertical accuracy increased from 39% to 58%.", - "creator": [ - { - "@type": "Organization", - "name": "University of Tokyo (Institute of Industrial Science)" - } - ], - "publisher": { - "@type": "Organization", - "name": "University of Tokyo" - }, - "datePublished": "2018-10-15", - "keywords": [ - "topography", - "Digital Elevation Model", - "terrain elevation", - "geoscience", - "hydrology", - "error removal", - "SRTM", - "AW3D" - ], - "spatialCoverage": { - "@type": "Place", - "geo": { - "@type": "GeoShape", - "box": "20,-40 50,10" - } - }, - "license": "CC-BY-NC 4.0 or ODbL 1.0 (dual license - user may choose one)", - "distribution": [ - { - "@type": "DataDownload", - "contentUrl": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" - } - ] -} \ No newline at end of file diff --git a/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld b/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld new file mode 100644 index 0000000..e6772a5 --- /dev/null +++ b/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld @@ -0,0 +1,91 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://www.mrlc.gov/data/project/annual-nlcd#dataset", + "name": "Annual National Land Cover Database (Annual NLCD) CONUS Collection 1.1", + "description": "Annual NLCD provides an annual record of land cover and related land surface change products for the conterminous United States derived from the Landsat satellite record using an integrated framework of modern modeling approaches. Collection 1.1 extends coverage through 2024 and includes six raster science products: Land Cover, Land Cover Change, Land Cover Confidence, Fractional Impervious Surface, Impervious Descriptor, and Spectral Change Day of Year.", + "url": "https://www.mrlc.gov/data/project/annual-nlcd", + "creator": { + "@type": "Organization", + "name": "U.S. Geological Survey (USGS) / MRLC Consortium", + "url": "https://www.mrlc.gov/partners" + }, + "publisher": { + "@type": "Organization", + "name": "Multi-Resolution Land Characteristics (MRLC) Consortium", + "url": "https://www.mrlc.gov/" + }, + "version": "Collection 1.1 (ver. 1.1, June 2025)", + "temporalCoverage": "1985-01-01/2024-12-31", + "spatialCoverage": { + "@type": "Place", + "name": "Conterminous United States (CONUS)", + "geo": { + "@type": "GeoShape", + "box": "-125,24 -66,50" + } + }, + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + }, + "keywords": [ + "Annual NLCD", + "NLCD", + "land cover", + "land change", + "impervious surface", + "tree canopy", + "Landsat", + "remote sensing" + ], + "variableMeasured": [ + { "@type": "PropertyValue", "name": "Land Cover" }, + { "@type": "PropertyValue", "name": "Land Cover Change" }, + { "@type": "PropertyValue", "name": "Land Cover Confidence" }, + { "@type": "PropertyValue", "name": "Fractional Impervious Surface" }, + { "@type": "PropertyValue", "name": "Impervious Descriptor" }, + { "@type": "PropertyValue", "name": "Spectral Change Day of Year" } + ], + "measurementTechnique": [ + "Landsat remote sensing", + "Land cover classification and change detection using modern modeling approaches (including deep learning and time-series analysis)" + ], + "license": "https://creativecommons.org/publicdomain/zero/1.0/", + "citation": "U.S. Geological Survey (USGS), 2024, Annual NLCD Collection 1 Science Products (ver. 1.1, June 2025): U.S. Geological Survey data release, https://doi.org/10.5066/P94UXNTS.", + "distribution": [ + { + "@type": "DataDownload", + "name": "Annual NLCD Collection 1.1 (ScienceBase data release)", + "contentUrl": "https://www.sciencebase.gov/catalog/item/655ceb8ad34ee4b6e05cc51a", + "description": "Authoritative distribution for Annual NLCD Collection 1.1, including metadata and product package structure." + }, + { + "@type": "DataDownload", + "name": "MRLC OGC Services (WMS/WCS) for Annual NLCD", + "contentUrl": "https://www.mrlc.gov/data-services-page", + "description": "OGC WMS/WCS endpoints for Annual NLCD products (access via GIS clients)." + }, + { + "@type": "DataDownload", + "name": "MRLC Viewer (interactive subsetting and downloads)", + "contentUrl": "https://www.mrlc.gov/viewer/", + "description": "Interactive tool for visualizing and downloading NLCD products for custom extents." + } + ], + "encodingFormat": [ + "application/zip", + "image/tiff; application=geotiff", + "application/octet-stream" + ], + "isPartOf": { + "@type": "DataCatalog", + "@id": "https://www.mrlc.gov/data#catalog" + }, + "about": [ + { "@type": "Thing", "name": "Land cover" }, + { "@type": "Thing", "name": "Land use" }, + { "@type": "Thing", "name": "Land change" } + ] +} diff --git a/data/objects/summoned/generated/MRLC_NLCD/datacatalog.jsonld b/data/objects/summoned/generated/MRLC_NLCD/datacatalog.jsonld new file mode 100644 index 0000000..996f1a1 --- /dev/null +++ b/data/objects/summoned/generated/MRLC_NLCD/datacatalog.jsonld @@ -0,0 +1,67 @@ +{ + "@context": "https://schema.org/", + "@type": "DataCatalog", + "@id": "https://www.mrlc.gov/data#catalog", + "name": "MRLC Data Catalog", + "description": "Catalog of land cover, land change, rangeland vegetation component, and related remote-sensing products produced and distributed by the Multi-Resolution Land Characteristics (MRLC) Consortium for the United States and North America. Includes Annual NLCD, RCMAP, Exotic Annual Grass products, NALCMS, and legacy NLCD products, with access via direct downloads, interactive tools, and OGC web services.", + "url": "https://www.mrlc.gov/data", + "publisher": { + "@type": "Organization", + "@id": "https://www.mrlc.gov/#org", + "name": "Multi-Resolution Land Characteristics (MRLC) Consortium", + "url": "https://www.mrlc.gov/" + }, + "creator": { + "@type": "Organization", + "name": "Multi-Resolution Land Characteristics (MRLC) Consortium", + "url": "https://www.mrlc.gov/partners" + }, + "keywords": [ + "land cover", + "land use", + "land change", + "NLCD", + "Annual NLCD", + "tree canopy", + "impervious surface", + "rangeland", + "RCMAP", + "Exotic Annual Grass", + "NALCMS", + "remote sensing", + "Landsat" + ], + "about": [ + { "@type": "Thing", "name": "Land cover" }, + { "@type": "Thing", "name": "Land use" }, + { "@type": "Thing", "name": "Land change" }, + { "@type": "Thing", "name": "Remote sensing" } + ], + "dataset": [ + { "@id": "https://www.mrlc.gov/data/project/annual-nlcd#dataset" }, + { "@id": "https://www.mrlc.gov/data/project/rcmap#dataset" }, + { "@id": "https://www.mrlc.gov/data/type/exotic-annual-grass#dataset" }, + { "@id": "https://data.usgs.gov/datacatalog/data/USGS:74edd739-1584-41c3-bf08-0274681a779b#dataset" }, + { "@id": "https://www.mrlc.gov/data/type/legacy-nlcd#dataset" } + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "MRLC Data Download Interface", + "contentUrl": "https://www.mrlc.gov/data", + "description": "Primary MRLC landing page for browsing and downloading MRLC datasets and products." + }, + { + "@type": "DataDownload", + "name": "MRLC OGC Web Services (WMS/WCS)", + "contentUrl": "https://www.mrlc.gov/data-services-page", + "description": "Access MRLC datasets via OGC Web Map Service (WMS) and Web Coverage Service (WCS) endpoints." + }, + { + "@type": "DataDownload", + "name": "MRLC Tools", + "contentUrl": "https://www.mrlc.gov/tools", + "description": "Interactive tools for visualization, subsetting, and analysis (MRLC Viewer, Rangeland Viewer, EVA Tool)." + } + ] +} diff --git a/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld b/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld new file mode 100644 index 0000000..b78b992 --- /dev/null +++ b/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld @@ -0,0 +1,83 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://www.mrlc.gov/data/type/exotic-annual-grass#dataset", + "name": "Exotic Annual Grass (EAG) Fractional Cover (MRLC)", + "description": "The Exotic Annual Grass (EAG) dataset provides frequent (weekly, during the core growing season) early estimates of fractional cover for multiple exotic annual grass targets and one native perennial grass species in arid and semi-arid rangelands of the western United States. Each release includes fractional cover maps and corresponding confidence maps for multiple species/groups (including cheatgrass and other Bromus species) generated using field observations, HLS NDVI, environmental drivers, and machine learning methods.", + "url": "https://www.mrlc.gov/data/type/exotic-annual-grass", + "creator": { + "@type": "Organization", + "name": "U.S. Geological Survey (USGS) / MRLC Consortium", + "url": "https://www.mrlc.gov/partners" + }, + "publisher": { + "@type": "Organization", + "name": "Multi-Resolution Land Characteristics (MRLC) Consortium", + "url": "https://www.mrlc.gov/" + }, + "temporalCoverage": "2016-01-01/2025-12-31", + "spatialCoverage": { + "@type": "Place", + "name": "Western United States (arid and semi-arid rangelands)", + "geo": { + "@type": "GeoShape", + "box": "-130,30 -95,50" + } + }, + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + }, + "keywords": [ + "Exotic Annual Grass", + "EAG", + "cheatgrass", + "invasive species", + "fractional cover", + "rangelands", + "remote sensing", + "machine learning" + ], + "variableMeasured": [ + { "@type": "PropertyValue", "name": "EAG fractional cover (multiple species group)" }, + { "@type": "PropertyValue", "name": "Cheatgrass (Bromus tectorum) fractional cover" }, + { "@type": "PropertyValue", "name": "Other Bromus spp. fractional cover (Field brome + Japanese brome)" }, + { "@type": "PropertyValue", "name": "Medusahead (Taeniatherum caput-medusae) fractional cover" }, + { "@type": "PropertyValue", "name": "Sandberg bluegrass (Poa secunda) fractional cover" }, + { "@type": "PropertyValue", "name": "Confidence maps (per target)" } + ], + "measurementTechnique": [ + "Remote sensing (HLS NDVI and related drivers)", + "Field observations (BLM AIM plots) and machine learning regression modeling" + ], + "license": "https://creativecommons.org/publicdomain/zero/1.0/", + "citation": "Dahal, D., Boyte, S., Megard, L., Postma, K., and Pastick, N., 2025, Early Estimates of Exotic Annual Grass (EAG) in the Sagebrush Biome, USA, 2025: U.S. Geological Survey data release, https://doi.org/10.5066/P14VQEGO.", + "distribution": [ + { + "@type": "DataDownload", + "name": "MRLC Exotic Annual Grass data landing page", + "contentUrl": "https://www.mrlc.gov/data/type/exotic-annual-grass", + "description": "MRLC landing page describing the EAG products and linking to individual releases." + }, + { + "@type": "DataDownload", + "name": "USGS data release (DOI: 10.5066/P14VQEGO)", + "contentUrl": "https://www.usgs.gov/data/early-estimates-exotic-annual-grass-eag-sagebrush-biome-usa-2025", + "description": "USGS authoritative data release record for EAG estimates (includes DOI and rights statement)." + } + ], + "encodingFormat": [ + "image/tiff; application=geotiff", + "application/zip" + ], + "isPartOf": { + "@type": "DataCatalog", + "@id": "https://www.mrlc.gov/data#catalog" + }, + "about": [ + { "@type": "Thing", "name": "Invasive species monitoring" }, + { "@type": "Thing", "name": "Rangeland ecology" }, + { "@type": "Thing", "name": "Remote sensing" } + ] +} diff --git a/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld b/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld new file mode 100644 index 0000000..b78fbaa --- /dev/null +++ b/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld @@ -0,0 +1,82 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://www.mrlc.gov/data/type/legacy-nlcd#dataset", + "name": "Legacy National Land Cover Database (NLCD) Products", + "description": "Legacy NLCD products provide nationwide (United States) land cover and land cover change datasets at 30-meter resolution using a 16-class legend based on a modified Anderson Level II classification system. The legacy product suite supports cyclical updates of U.S. land cover and change, enabling monitoring and long-term trend assessments across many application areas (e.g., hydrology, environmental planning, risk analysis, education, and land management).", + "url": "https://www.mrlc.gov/data/type/legacy-nlcd", + "creator": { + "@type": "Organization", + "name": "MRLC Consortium / U.S. Geological Survey (USGS)", + "url": "https://www.mrlc.gov/partners" + }, + "publisher": { + "@type": "Organization", + "name": "Multi-Resolution Land Characteristics (MRLC) Consortium", + "url": "https://www.mrlc.gov/" + }, + "spatialCoverage": { + "@type": "Place", + "name": "United States (nationwide coverage; includes Alaska, Hawaii, and other U.S. territories depending on product)", + "geo": { + "@type": "GeoShape", + "box": "-180,15 -50,72" + } + }, + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + }, + "keywords": [ + "NLCD", + "legacy NLCD", + "land cover", + "land change", + "tree canopy", + "impervious surface", + "remote sensing", + "Landsat" + ], + "variableMeasured": [ + { "@type": "PropertyValue", "name": "Land cover class (16-class legend)" }, + { "@type": "PropertyValue", "name": "Land cover change (varies by product)" }, + { "@type": "PropertyValue", "name": "Percent impervious surface (selected releases)" }, + { "@type": "PropertyValue", "name": "Tree canopy cover (selected releases)" } + ], + "measurementTechnique": [ + "Landsat remote sensing", + "Land cover classification and change mapping (varies by release year/version)" + ], + "license": "https://creativecommons.org/publicdomain/zero/1.0/", + "citation": [ + "Dewitz, J., and U.S. Geological Survey, 2021, National Land Cover Database (NLCD) 2019 Products (ver. 2.0, June 2021): U.S. Geological Survey data release, https://doi.org/10.5066/P9KZCM54." + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "MRLC Legacy NLCD landing page", + "contentUrl": "https://www.mrlc.gov/data/type/legacy-nlcd", + "description": "MRLC landing page for browsing and accessing legacy NLCD products." + }, + { + "@type": "DataDownload", + "name": "MRLC Viewer (custom downloads for NLCD)", + "contentUrl": "https://www.mrlc.gov/viewer/", + "description": "Interactive visualization and custom extent downloads for NLCD-related products." + } + ], + "encodingFormat": [ + "image/tiff; application=geotiff", + "application/zip" + ], + "isPartOf": { + "@type": "DataCatalog", + "@id": "https://www.mrlc.gov/data#catalog" + }, + "about": [ + { "@type": "Thing", "name": "Land cover" }, + { "@type": "Thing", "name": "Land change" }, + { "@type": "Thing", "name": "Remote sensing" } + ] +} diff --git a/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld b/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld new file mode 100644 index 0000000..1edc52d --- /dev/null +++ b/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld @@ -0,0 +1,68 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://data.usgs.gov/datacatalog/data/USGS:74edd739-1584-41c3-bf08-0274681a779b#dataset", + "name": "North American Land Change Monitoring System (NALCMS) – North American Land Cover (30 m)", + "description": "NALCMS provides harmonized, trinational land cover maps for North America by combining national land cover products from Canada, the United States, and Mexico into a consistent continental mosaic. A recent 30-meter North American land cover dataset reflects land cover information for 2020 from Mexico and Canada, 2019 over the conterminous United States, and 2021 over Alaska, using Landsat 8 imagery inputs and country-specific classification methods integrated into a seamless product.", + "url": "https://data.usgs.gov/datacatalog/data/USGS%3A74edd739-1584-41c3-bf08-0274681a779b", + "creator": [ + { "@type": "Organization", "name": "U.S. Geological Survey (USGS)" }, + { "@type": "Organization", "name": "Natural Resources Canada" }, + { "@type": "Organization", "name": "Instituto Nacional de Estadística y Geografía (INEGI)" }, + { "@type": "Organization", "name": "Comisión Nacional Para el Conocimiento y Uso de la Biodiversidad (CONABIO)" }, + { "@type": "Organization", "name": "Comisión Nacional Forestal (CONAFOR)" } + ], + "publisher": { + "@type": "Organization", + "name": "U.S. Geological Survey (USGS)", + "url": "https://www.usgs.gov/" + }, + "temporalCoverage": "2019-01-01/2021-12-31", + "spatialCoverage": { + "@type": "Place", + "name": "North America (Canada, United States, Mexico)", + "geo": { + "@type": "GeoShape", + "box": "-180,15 -50,85" + } + }, + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + }, + "keywords": [ + "NALCMS", + "North America", + "land cover", + "land change", + "Landsat 8", + "continental mosaic" + ], + "variableMeasured": [{ "@type": "PropertyValue", "name": "Land cover class" }], + "measurementTechnique": [ + "Landsat 8 remote sensing", + "National land cover classification and continental integration/mosaicking" + ], + "license": "https://creativecommons.org/publicdomain/zero/1.0/", + "distribution": [ + { + "@type": "DataDownload", + "name": "USGS Science Data Catalog landing page", + "contentUrl": "https://data.usgs.gov/datacatalog/data/USGS%3A74edd739-1584-41c3-bf08-0274681a779b", + "description": "Metadata landing page with access information for NALCMS products." + } + ], + "encodingFormat": [ + "image/tiff; application=geotiff", + "application/zip" + ], + "isPartOf": { + "@type": "DataCatalog", + "@id": "https://www.mrlc.gov/data#catalog" + }, + "about": [ + { "@type": "Thing", "name": "Land cover" }, + { "@type": "Thing", "name": "Continental land monitoring" } + ] +} diff --git a/data/objects/summoned/generated/MRLC_NLCD/prompt.txt b/data/objects/summoned/generated/MRLC_NLCD/prompt.txt new file mode 100644 index 0000000..11495fa --- /dev/null +++ b/data/objects/summoned/generated/MRLC_NLCD/prompt.txt @@ -0,0 +1,97 @@ +I need to create Schema.org JSON-LD descriptions for a scientific land cover data catalog and its datasets. + +**Website URL**: https://www.mrlc.gov/data + +**Catalog Information**: +- Name: Multi-Resolution Land Characteristics (MRLC) Consortium +- Group/Category: land_cover +- Description: Nationwide (US) data on land cover and tree canopy cover at a 30m resolution. The MRLC Consortium is a partnership of federal agencies that produces land cover and land change data products. +- Website: https://www.mrlc.gov/data +- Publisher: MRLC Consortium (USGS, EPA, USDA, NOAA, USFS, and others) +- Coverage: United States (CONUS, Alaska, Hawaii) and North America + +**Available Products/Datasets** (based on website structure): +1. **Annual NLCD** - Annual National Land Cover Database (Conterminous U.S.) + - Collection 1.1 (current version) + - Temporal coverage: 1985-2024 + - Products: Land Cover, Land Cover Change, Land Cover Confidence, Fractional Impervious Surface, Impervious Descriptor, Spectral Change Day of Year + - Spatial resolution: 30m + - Coverage: Conterminous United States (CONUS) + +2. **RCMAP** - Rangeland Condition Monitoring Assessment and Projection + - Includes Ecological Potential (EP) data + - Components: bare ground, herbaceous, litter, shrub, sagebrush, tree + - Temporal coverage: 1985-present + - Coverage: Western North America + +3. **Exotic Annual Grass** - Exotic annual grass data products + +4. **NALCMS** - North American Land Change Monitoring System + - Coverage: North America (US, Canada, Mexico) + +5. **Legacy NLCD** - Legacy National Land Cover Database products + - Older versions of NLCD data + +**Key Details**: +- Organization: Multi-Resolution Land Characteristics (MRLC) Consortium +- Consortium Members: USGS, EPA, USDA, NOAA, USFS, and other federal agencies +- Website: https://www.mrlc.gov/data +- Tools: MRLC NLCD Viewer, MRLC NLCD EVA Tool, MRLC Rangeland Viewer +- Services: Download interface, web services +- License: Public domain (USGS data) + +**Instructions for Step 1.0 (WebPage Description)**: +Please create a Schema.org WebPage JSON-LD that includes: +- @context: "https://schema.org/" +- @type: "WebPage" +- @id: The webpage URL +- name: "MRLC Data" or "Multi-Resolution Land Characteristics Consortium Data" +- description: Comprehensive description of the MRLC data catalog +- url: Main webpage URL +- publisher: MRLC Consortium / USGS (if identifiable) +- inLanguage: "en" +- isPartOf: WebSite information +- about: Reference to the DataCatalog +- keywords: Relevant keywords (land cover, NLCD, MRLC, land use, tree canopy, etc.) +- mainEntity: Reference to the DataCatalog + +**Instructions for Step 2.2 (DataCatalog JSON-LD)**: +Please create a Schema.org DataCatalog JSON-LD that includes: +- @context, @type, @id, name, description, url +- publisher: MRLC Consortium / USGS +- creator: MRLC Consortium members +- dataset: List of datasets in the catalog (use @id references) +- keywords: land cover, NLCD, MRLC, land use, tree canopy, rangeland, etc. +- about: Land cover, land use, land change, remote sensing +- distribution: Access methods (download interface, viewer tools, web services) + +**Instructions for Step 2.3 (Individual Dataset JSON-LD)**: +For each dataset (Annual NLCD, RCMAP, etc.), create a Schema.org Dataset JSON-LD that includes: +- @context, @type, @id, name, description, url +- creator: MRLC Consortium / specific agencies +- publisher: MRLC Consortium / USGS +- temporalCoverage: Specific date ranges (e.g., 1985-2024 for Annual NLCD) +- spatialCoverage: CONUS, Alaska, Hawaii, North America, etc. +- variableMeasured: Land cover classes, tree canopy, impervious surface, etc. +- distribution: Multiple DataDownload entries for: + - Direct download links + - Viewer tools + - Web services +- encodingFormat: GeoTIFF, raster formats +- spatialResolution: 30m, 250m, etc. +- version: Collection version (e.g., "Collection 1.1") +- license: Public domain +- citation: Relevant publications +- measurementTechnique: Remote sensing, image classification +- about: Land cover, land use, land change, remote sensing + +**Output Format**: +Provide the complete JSON-LD in a code block, properly formatted and valid JSON. + +**Important Notes**: +- This is a DATA CATALOG with multiple distinct datasets (similar to CHELSA) +- Each dataset should have its own JSON-LD file +- The catalog should reference all datasets using @id references +- Spatial coverage is primarily United States (CONUS, Alaska, Hawaii) and North America +- Most products use 30m spatial resolution +- Data is in the public domain (USGS data) diff --git a/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld b/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld new file mode 100644 index 0000000..4e99862 --- /dev/null +++ b/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld @@ -0,0 +1,104 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://www.mrlc.gov/data/project/rcmap#dataset", + "name": "Rangeland Condition Monitoring Assessment and Projection (RCMAP)", + "description": "RCMAP provides annual rangeland vegetation component fractional cover and related products across western North America derived from Landsat imagery. Components include bare ground, herbaceous, litter, shrub, sagebrush, tree (and related component/trend products), supporting rangeland monitoring and assessment from 1985 to present.", + "url": "https://www.mrlc.gov/data/project/rcmap", + "creator": { + "@type": "Organization", + "name": "U.S. Geological Survey (USGS) and Bureau of Land Management (BLM)", + "url": "https://www.mrlc.gov/data/project/rcmap" + }, + "publisher": { + "@type": "Organization", + "name": "Multi-Resolution Land Characteristics (MRLC) Consortium", + "url": "https://www.mrlc.gov/" + }, + "temporalCoverage": "1985-01-01/2024-12-31", + "spatialCoverage": { + "@type": "Place", + "name": "Western North America (rangelands)", + "geo": { + "@type": "GeoShape", + "box": "-130,25 -95,60" + } + }, + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + }, + "keywords": [ + "RCMAP", + "rangeland", + "fractional cover", + "sagebrush", + "shrub", + "bare ground", + "Landsat", + "remote sensing" + ], + "variableMeasured": [ + { "@type": "PropertyValue", "name": "Bare ground cover (percent)" }, + { "@type": "PropertyValue", "name": "Herbaceous cover (percent)" }, + { "@type": "PropertyValue", "name": "Litter cover (percent)" }, + { "@type": "PropertyValue", "name": "Shrub cover (percent)" }, + { "@type": "PropertyValue", "name": "Sagebrush cover (percent)" }, + { "@type": "PropertyValue", "name": "Tree cover (percent)" } + ], + "measurementTechnique": [ + "Landsat remote sensing", + "Fractional cover modeling and time-series analysis for rangeland components" + ], + "license": "https://creativecommons.org/publicdomain/zero/1.0/", + "distribution": [ + { + "@type": "DataDownload", + "name": "RCMAP downloads (MRLC data pages)", + "contentUrl": "https://www.mrlc.gov/data/project/rcmap", + "description": "Landing page for RCMAP products, including time-series cover, trends, and ecological potential." + }, + { + "@type": "DataDownload", + "name": "Example component download: Bare Ground (2011–2024 ZIP)", + "contentUrl": "https://www.mrlc.gov/downloads/sciweb1/shared/mrlc/data-bundles/Bare_Ground_2011_2024.zip", + "encodingFormat": "application/zip", + "description": "Example direct ZIP download link for an RCMAP component time-series." + }, + { + "@type": "DataDownload", + "name": "Example component download: Herbaceous (2011–2024 ZIP)", + "contentUrl": "https://www.mrlc.gov/downloads/sciweb1/shared/mrlc/data-bundles/Herbaceous_2011_2024.zip", + "encodingFormat": "application/zip", + "description": "Example direct ZIP download link for an RCMAP component time-series." + }, + { + "@type": "DataDownload", + "name": "RCMAP FGDC metadata (XML)", + "contentUrl": "https://www.mrlc.gov/downloads/sciweb1/shared/mrlc/metadata/RCMAP_V7_FGDC_Metadata.xml", + "encodingFormat": "application/xml", + "description": "FGDC metadata for RCMAP products." + }, + { + "@type": "DataDownload", + "name": "MRLC Rangeland Viewer", + "contentUrl": "https://www.mrlc.gov/rangeland-viewer/", + "description": "Interactive visualization, comparison, and subsetting/downloading for RCMAP data." + } + ], + "encodingFormat": [ + "image/tiff; application=geotiff", + "application/zip", + "application/xml" + ], + "isPartOf": { + "@type": "DataCatalog", + "@id": "https://www.mrlc.gov/data#catalog" + }, + "about": [ + { "@type": "Thing", "name": "Rangeland monitoring" }, + { "@type": "Thing", "name": "Vegetation cover" }, + { "@type": "Thing", "name": "Remote sensing" } + ] +} diff --git a/data/objects/summoned/generated/MRLC_NLCD/webpage.jsonld b/data/objects/summoned/generated/MRLC_NLCD/webpage.jsonld new file mode 100644 index 0000000..4755f59 --- /dev/null +++ b/data/objects/summoned/generated/MRLC_NLCD/webpage.jsonld @@ -0,0 +1,55 @@ +{ + "@context": "https://schema.org/", + "@type": "WebPage", + "@id": "https://www.mrlc.gov/data#webpage", + "name": "MRLC Data", + "description": "The Multi-Resolution Land Characteristics (MRLC) Consortium data portal provides nationwide (United States) and North America land cover and related remote-sensing products, including Annual NLCD (annual land cover and land change science products), RCMAP (rangeland fractional cover and trends), Exotic Annual Grass fractional cover products, NALCMS (North American land cover), and legacy NLCD products. The portal also provides tools (e.g., MRLC Viewer, EVA Tool, Rangeland Viewer) and web services (OGC WMS/WCS) for interactive access and subsetting.", + "url": "https://www.mrlc.gov/data", + "inLanguage": "en", + "isPartOf": { + "@type": "WebSite", + "@id": "https://www.mrlc.gov/#website", + "name": "Multi-Resolution Land Characteristics (MRLC) Consortium", + "url": "https://www.mrlc.gov/" + }, + "publisher": { + "@type": "Organization", + "@id": "https://www.mrlc.gov/#org", + "name": "Multi-Resolution Land Characteristics (MRLC) Consortium", + "url": "https://www.mrlc.gov/", + "description": "A consortium of U.S. federal agencies producing land cover and land change data products.", + "member": [ + { "@type": "Organization", "name": "U.S. Geological Survey (USGS)" }, + { "@type": "Organization", "name": "National Oceanic and Atmospheric Administration (NOAA)" }, + { "@type": "Organization", "name": "U.S. Forest Service (USFS)" }, + { "@type": "Organization", "name": "Bureau of Land Management (BLM)" }, + { "@type": "Organization", "name": "National Park Service (NPS)" }, + { "@type": "Organization", "name": "U.S. Department of Agriculture (USDA)" }, + { "@type": "Organization", "name": "U.S. Environmental Protection Agency (EPA)" }, + { "@type": "Organization", "name": "U.S. Fish and Wildlife Service (USFWS)" }, + { "@type": "Organization", "name": "National Agricultural Statistics Service (NASS)" }, + { "@type": "Organization", "name": "LANDFIRE" } + ] + }, + "keywords": [ + "MRLC", + "land cover", + "land use", + "NLCD", + "Annual NLCD", + "RCMAP", + "rangeland", + "tree canopy", + "impervious surface", + "remote sensing", + "Landsat" + ], + "about": { + "@type": "DataCatalog", + "@id": "https://www.mrlc.gov/data#catalog" + }, + "mainEntity": { + "@type": "DataCatalog", + "@id": "https://www.mrlc.gov/data#catalog" + } +} diff --git a/data/objects/summoned/generated/TerraClimate/prompt.txt b/data/objects/summoned/generated/TerraClimate/prompt.txt new file mode 100644 index 0000000..edb3de7 --- /dev/null +++ b/data/objects/summoned/generated/TerraClimate/prompt.txt @@ -0,0 +1,98 @@ +I need to create Schema.org JSON-LD descriptions for a scientific climate dataset. + +**Website URL**: https://www.climatologylab.org/terraclimate.html + +**Dataset Information**: +- Name: TerraClimate +- Group/Category: climate +- Description: Monthly climate and climatic water balance for global terrestrial surfaces from 1958-2019 (extended to 2020). High spatial resolution (~4-km, 1/24th degree) monthly climate data with temporal resolution from 1958-2020, with plans for periodic annual updates. + +**Key Details**: +- Publisher: Climatology Lab (University of Idaho, based on context) +- Creator: John Abatzoglou (lead author) +- Website: https://www.climatologylab.org/terraclimate.html +- Coverage: Global terrestrial surfaces +- Temporal Coverage: 1958-2020 (historical), plus future projections (+2C and +4C scenarios) +- Spatial Resolution: ~4-km (1/24th degree) +- Temporal Resolution: Monthly +- License: CC0 (Public Domain Dedication) + +**Primary Climate Variables**: +- Maximum temperature +- Minimum temperature +- Vapor pressure +- Precipitation accumulation +- Downward surface shortwave radiation +- Wind-speed + +**Derived Variables**: +- Reference evapotranspiration (ASCE Penman-Montieth) +- Runoff +- Actual Evapotranspiration +- Climate Water Deficit +- Soil Moisture +- Snow Water Equivalent +- Palmer Drought Severity Index (PDSI) +- Vapor pressure deficit (VPD) + +**Methods**: +- Uses climatically aided interpolation +- Combines high-spatial resolution climatological normals from WorldClim dataset +- With coarser spatial resolution, but time-varying data from CRU Ts4.0 and Japanese 55-year Reanalysis (JRA55) +- Applies interpolated time-varying anomalies from CRU Ts4.0/JRA55 to high-spatial resolution climatology of WorldClim +- Uses modified Thornthwaite-Mather climatic water-balance model + +**Data Access**: +- NetCDF files from THREDDS web server +- Individual years (1958-present) +- Aggregated years (1958-present) +- Future climate projections (+2C and +4C scenarios) +- Climatologies (1961-1990, 1981-2010, and future scenarios) +- Google Earth Engine: IDAHO_EPSCOR/TERRACLIMATE +- Download via THREDDS OPeNDAP and NCSS services + +**Citation**: +Abatzoglou, J.T., S.Z. Dobrowski, S.A. Parks, K.C. Hegewisch, 2018, Terraclimate, a high-resolution global dataset of monthly climate and climatic water balance from 1958-2015, Scientific Data + +**Instructions for Step 1.0 (WebPage Description)**: +Please create a Schema.org WebPage JSON-LD that includes: +- @context: "https://schema.org/" +- @type: "WebPage" +- @id: The webpage URL +- name: Full dataset name +- description: Comprehensive description of TerraClimate +- url: Main webpage URL +- publisher: Climatology Lab / University of Idaho (if identifiable) +- inLanguage: "en" +- isPartOf: WebSite information +- about: Reference to the dataset +- keywords: Relevant keywords (climate, TerraClimate, monthly climate, water balance, global, high resolution, etc.) +- mainEntity: Reference to the Dataset + +**Instructions for Dataset JSON-LD**: +Please create a Schema.org Dataset JSON-LD that includes: +- @context, @type, @id, name, description, url +- creator: John Abatzoglou and co-authors +- publisher: Climatology Lab / University of Idaho +- temporalCoverage: 1958-2020 (and future scenarios) +- spatialCoverage: Global terrestrial surfaces +- variableMeasured: List all primary and derived climate variables +- distribution: Multiple DataDownload entries for: + - THREDDS web server access + - Google Earth Engine + - Direct download options +- encodingFormat: NetCDF4 +- version: Current version information +- license: CC0 (Public Domain) +- citation: Scientific publication citation +- measurementTechnique: Climatically aided interpolation method +- about: Climate, water balance, ecological and hydrological studies + +**Output Format**: +Provide the complete JSON-LD in a code block, properly formatted and valid JSON. + +**Important Notes**: +- This is a single comprehensive dataset (not a catalog with multiple datasets) +- Includes both historical data (1958-2020) and future climate projections +- Multiple access methods available (THREDDS, Google Earth Engine, direct download) +- All data in NetCDF4 format diff --git a/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld b/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld new file mode 100644 index 0000000..b2945c7 --- /dev/null +++ b/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld @@ -0,0 +1,239 @@ +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "@id": "https://www.climatologylab.org/terraclimate.html#dataset", + "name": "TerraClimate: Monthly Climate and Climatic Water Balance for Global Terrestrial Surfaces", + "description": "TerraClimate is a dataset of monthly climate and climatic water balance for global terrestrial land surfaces at ~4-km (1/24°) spatial resolution. It provides time-varying monthly climate fields derived via climatically aided interpolation: high-resolution climatological normals from WorldClim are combined with interpolated monthly anomalies from CRU TS (for most temperature, precipitation, and vapor pressure) and JRA-55 (used where CRU station influence is absent and used for solar radiation and wind speed). TerraClimate also produces monthly surface water balance variables using a modified Thornthwaite–Mather climatic water-balance model incorporating precipitation, temperature, reference evapotranspiration (ASCE Penman–Monteith), and soil water capacity. The core historical record covers 1958–2020 with planned periodic updates, and additional future layers are provided for +2°C and +4°C global mean temperature futures for pseudo-years 1985–2015 plus climatological summaries. Data are distributed primarily as compressed NetCDF (NetCDF4) via THREDDS/OPeNDAP and related web services and are also available as a Google Earth Engine image collection.", + "url": "https://www.climatologylab.org/terraclimate.html", + "isAccessibleForFree": true, + "keywords": [ + "TerraClimate", + "monthly climate", + "climatic water balance", + "temperature", + "precipitation", + "vapor pressure", + "solar radiation", + "wind speed", + "evapotranspiration", + "runoff", + "soil moisture", + "snow water equivalent", + "PDSI", + "drought", + "WorldClim", + "CRU TS", + "JRA-55", + "NetCDF4", + "THREDDS", + "OPeNDAP", + "NCSS", + "WMS", + "WCS", + "Google Earth Engine" + ], + "creator": [ + { + "@type": "Person", + "name": "John T. Abatzoglou", + "affiliation": { + "@type": "Organization", + "name": "University of Idaho", + "url": "https://www.uidaho.edu/" + } + }, + { + "@type": "Person", + "name": "S. Z. Dobrowski" + }, + { + "@type": "Person", + "name": "S. A. Parks" + }, + { + "@type": "Person", + "name": "K. C. Hegewisch" + } + ], + "publisher": [ + { + "@type": "Organization", + "name": "Climatology Lab", + "url": "https://www.climatologylab.org/", + "parentOrganization": { + "@type": "Organization", + "name": "University of Idaho", + "url": "https://www.uidaho.edu/" + } + }, + { + "@type": "Organization", + "name": "Northwest Knowledge Network (NKN)", + "url": "https://www.northwestknowledge.net/", + "description": "Primary THREDDS hosting and web services for TerraClimate NetCDF distributions." + } + ], + "license": { + "@type": "CreativeWork", + "name": "CC0 1.0 Universal (Public Domain Dedication)", + "url": "https://creativecommons.org/publicdomain/zero/1.0/" + }, + "temporalCoverage": [ + "1958-01-01/2020-12-31", + "1985-01-01/2015-12-31" + ], + "spatialCoverage": { + "@type": "Place", + "name": "Global terrestrial land surfaces", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "spatialResolution": "1/24 degree (~4 km)", + "temporalResolution": "P1M", + "encodingFormat": [ + "application/x-netcdf", + "application/x-netcdf;version=4" + ], + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "Maximum temperature", + "unitText": "C" + }, + { + "@type": "PropertyValue", + "name": "Minimum temperature", + "unitText": "C" + }, + { + "@type": "PropertyValue", + "name": "Vapor pressure", + "unitText": "kPa" + }, + { + "@type": "PropertyValue", + "name": "Precipitation accumulation", + "unitText": "mm" + }, + { + "@type": "PropertyValue", + "name": "Downward surface shortwave radiation", + "unitText": "W/m2" + }, + { + "@type": "PropertyValue", + "name": "Wind speed", + "unitText": "m/s" + }, + { + "@type": "PropertyValue", + "name": "Reference evapotranspiration (ASCE Penman–Monteith)", + "unitText": "mm" + }, + { + "@type": "PropertyValue", + "name": "Runoff", + "unitText": "mm" + }, + { + "@type": "PropertyValue", + "name": "Actual evapotranspiration", + "unitText": "mm" + }, + { + "@type": "PropertyValue", + "name": "Climate water deficit", + "unitText": "mm" + }, + { + "@type": "PropertyValue", + "name": "Soil moisture (total column, end of month)", + "unitText": "mm" + }, + { + "@type": "PropertyValue", + "name": "Snow water equivalent (end of month)", + "unitText": "mm" + }, + { + "@type": "PropertyValue", + "name": "Palmer Drought Severity Index (PDSI)", + "unitText": "unitless" + }, + { + "@type": "PropertyValue", + "name": "Vapor pressure deficit (VPD)", + "unitText": "kPa" + } + ], + "measurementTechnique": [ + "Climatically aided interpolation using high-resolution climatological normals (WorldClim) combined with time-varying anomalies from CRU TS and JRA-55", + "Modified Thornthwaite–Mather climatic water-balance model for derived water-balance variables" + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "TerraClimate THREDDS catalog (all data, summaries, climatologies, and +2°C/+4°C scenarios)", + "description": "Browsable THREDDS catalog containing annual/monthly NetCDF files and aggregated products, including summaries, climatologies, and climate futures (+2°C and +4°C).", + "encodingFormat": "text/html", + "contentUrl": "https://tds-proxy.nkn.uidaho.edu/thredds/catalog/TERRACLIMATE_ALL/catalog.html" + }, + { + "@type": "DataDownload", + "name": "TerraClimate THREDDS aggregated catalog (monthly aggregations by variable)", + "description": "THREDDS aggregated catalogs providing service endpoints (OPeNDAP/NetCDF Subset/NCSS/WMS/WCS) for 1958–current-year monthly aggregations by variable.", + "encodingFormat": "text/html", + "contentUrl": "https://tds-proxy.nkn.uidaho.edu/thredds/terraclimate_aggregated.html" + }, + { + "@type": "DataDownload", + "name": "Example OP(e)NDAP access endpoint (aggregated monthly variable file)", + "description": "Example OPeNDAP endpoint for an aggregated monthly variable NetCDF. Replace the variable/file identifier as needed for other variables and products available in the THREDDS aggregated catalog.", + "encodingFormat": "application/x-netcdf", + "contentUrl": "https://tds-proxy.nkn.uidaho.edu/thredds/dodsC/agg_terraclimate_tmax_1958_CurrentYear_GLOBE.nc" + }, + { + "@type": "DataDownload", + "name": "Google Earth Engine ImageCollection: IDAHO_EPSCOR/TERRACLIMATE", + "description": "TerraClimate is also available as a Google Earth Engine ImageCollection for cloud-based analysis and visualization.", + "encodingFormat": "text/html", + "contentUrl": "https://developers.google.com/earth-engine/datasets/catalog/IDAHO_EPSCOR_TERRACLIMATE" + } + ], + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "TerraClimate, a high-resolution global dataset of monthly climate and climatic water balance from 1958–2015", + "author": [ + { "@type": "Person", "name": "J. T. Abatzoglou" }, + { "@type": "Person", "name": "S. Z. Dobrowski" }, + { "@type": "Person", "name": "S. A. Parks" }, + { "@type": "Person", "name": "K. C. Hegewisch" } + ], + "isPartOf": { + "@type": "Periodical", + "name": "Scientific Data" + }, + "datePublished": "2018", + "identifier": [ + { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.1038/sdata.2017.191" + } + ], + "sameAs": "https://doi.org/10.1038/sdata.2017.191" + } + ], + "about": [ + { "@type": "Thing", "name": "Climate" }, + { "@type": "Thing", "name": "Climatology" }, + { "@type": "Thing", "name": "Hydrology" }, + { "@type": "Thing", "name": "Drought" }, + { "@type": "Thing", "name": "Evapotranspiration" }, + { "@type": "Thing", "name": "Water balance" }, + { "@type": "Thing", "name": "Ecological and hydrological studies" } + ] +} diff --git a/data/objects/summoned/generated/TerraClimate/webpage.jsonld b/data/objects/summoned/generated/TerraClimate/webpage.jsonld new file mode 100644 index 0000000..684a99c --- /dev/null +++ b/data/objects/summoned/generated/TerraClimate/webpage.jsonld @@ -0,0 +1,61 @@ +{ + "@context": "https://schema.org/", + "@type": "WebPage", + "@id": "https://www.climatologylab.org/terraclimate.html", + "name": "TerraClimate", + "description": "Webpage for TerraClimate, a high-resolution global dataset of monthly climate and climatic water balance for global terrestrial surfaces from 1958-2020. TerraClimate provides monthly climate data at ~4-km (1/24th degree) spatial resolution, including primary climate variables (temperature, precipitation, vapor pressure, radiation, wind-speed) and derived water balance variables (evapotranspiration, runoff, soil moisture, drought indices). The dataset uses climatically aided interpolation combining WorldClim climatological normals with time-varying data from CRU Ts4.0 and JRA55. Future climate projections are also available for +2C and +4C scenarios.", + "url": "https://www.climatologylab.org/terraclimate.html", + "inLanguage": "en", + "isPartOf": { + "@type": "WebSite", + "name": "Climatology Lab", + "url": "https://www.climatologylab.org/" + }, + "about": { + "@type": "Dataset", + "name": "TerraClimate", + "description": "Monthly climate and climatic water balance for global terrestrial surfaces", + "url": "https://www.climatologylab.org/terraclimate.html" + }, + "publisher": { + "@type": "Organization", + "name": "Climatology Lab", + "url": "https://www.climatologylab.org/" + }, + "mainEntity": { + "@type": "Dataset", + "name": "TerraClimate", + "description": "High-resolution global dataset of monthly climate and climatic water balance", + "url": "https://www.climatologylab.org/terraclimate.html" + }, + "keywords": [ + "TerraClimate", + "climate", + "monthly climate", + "water balance", + "global climate", + "high resolution", + "climatology", + "evapotranspiration", + "drought indices", + "NetCDF", + "THREDDS" + ], + "breadcrumb": { + "@type": "BreadcrumbList", + "itemListElement": [ + { + "@type": "ListItem", + "position": 1, + "name": "Climatology Lab", + "item": "https://www.climatologylab.org/" + }, + { + "@type": "ListItem", + "position": 2, + "name": "TerraClimate", + "item": "https://www.climatologylab.org/terraclimate.html" + } + ] + } +} From 037dbfe18b533b9355aa63c241af43eba22d6ef4 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 26 Jan 2026 13:33:56 -0600 Subject: [PATCH 16/58] added sitemaps --- .env.example | 24 ++- collection/sitemap.xml | 35 ++++ data/objects/summoned/generated/sitemap.xml | 127 +++++++++++++ data/objects/summoned/sitemap.xml | 67 +++++++ data/sitemap.xml | 195 ++++++++++++++++++++ 5 files changed, 443 insertions(+), 5 deletions(-) create mode 100644 collection/sitemap.xml create mode 100644 data/objects/summoned/generated/sitemap.xml create mode 100644 data/objects/summoned/sitemap.xml create mode 100644 data/sitemap.xml diff --git a/.env.example b/.env.example index 3085b03..daa0a98 100644 --- a/.env.example +++ b/.env.example @@ -1,14 +1,28 @@ -# API Keys for JSON-LD Generation +# API Keys for JSON-LD Generation Script # Copy this file to .env and fill in your actual API keys # The .env file is gitignored and will not be committed +# Google Gemini API Key (default service - uses URL Context Tool to browse URLs directly) +# Get your key from: https://aistudio.google.com/apikey +# Free tier available with .edu email +# Default model: gemini-2.0-flash +GEMINI_API_KEY=your-gemini-api-key-here + # NRP (National Research Platform) API Key # Get your key from: https://nrp.ai/documentation/userdocs/ai/llm-managed/ # Available NRP models: qwen3, llama3-sdsc, gpt-oss, gorilla, olmo, gemma3, kimi, etc. +# Default model: qwen3 +# Note: NRP fetches HTML and extracts text before sending to AI NRP_API_KEY=your-nrp-api-key-here -# OpenAI API Key (optional) -# OPENAI_API_KEY=your-openai-api-key-here +# OpenAI/ChatGPT API Key +# Get your key from: https://platform.openai.com/api-keys +# Default model: gpt-4o +# Note: OpenAI fetches HTML and extracts text before sending to AI +OPENAI_API_KEY=your-openai-api-key-here -# Anthropic API Key (optional) -# ANTHROPIC_API_KEY=your-anthropic-api-key-here +# Anthropic (Claude) API Key +# Get your key from: https://console.anthropic.com/ +# Default model: claude-3-5-sonnet-20241022 +# Note: Anthropic fetches HTML and extracts text before sending to AI +ANTHROPIC_API_KEY=your-anthropic-api-key-here diff --git a/collection/sitemap.xml b/collection/sitemap.xml new file mode 100644 index 0000000..34bf31d --- /dev/null +++ b/collection/sitemap.xml @@ -0,0 +1,35 @@ + + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/sitemap.xml + 2026-01-26T13:00:44 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/stac-broswer/sitemap.xml + 2023-11-16T12:01:07 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/hydrography90m/sitemaps/hydrography90m.xml + 2024-04-16T14:37:28 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/pangaea/sitemaps/glim.xml + 2023-11-09T10:06:30 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/pangaea/sitemaps/gpp.xml + 2023-11-09T10:06:30 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/pangaea/sitemaps/nitrogen.xml + 2023-11-09T10:06:30 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/pangaea/sitemaps/nitrogen2.xml + 2023-11-09T10:06:30 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/pangaea/sitemaps/siteindex.xml + 2023-11-16T12:01:07 + + \ No newline at end of file diff --git a/data/objects/summoned/generated/sitemap.xml b/data/objects/summoned/generated/sitemap.xml new file mode 100644 index 0000000..044015b --- /dev/null +++ b/data/objects/summoned/generated/sitemap.xml @@ -0,0 +1,127 @@ + + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/datacatalog.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/webpage.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/Consensus_Land_Cover/webpage.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/GFC/gfc.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/GFC/webpage.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/Global_Tree_Density/webpage.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/datacatalog.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/webpage.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/TerraClimate/webpage.jsonld + 2026-01-20T16:54:56 + + \ No newline at end of file diff --git a/data/objects/summoned/sitemap.xml b/data/objects/summoned/sitemap.xml new file mode 100644 index 0000000..002b94e --- /dev/null +++ b/data/objects/summoned/sitemap.xml @@ -0,0 +1,67 @@ + + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/1560991f9071fd8c168bf8bff8d5abcdaf7d3370.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/42cc768d19310666e11275e0ef420914c5d10868.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/5cbde8720e62b2e74ea38d7cdfdafd747405093f.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/b828dd7f22d629058b801d915af937420b4de1ab.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/fc3fbb167c441f59021612f8261b51694b49904b.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/gpp/2d78c4242a108f70ea2c0604964dc095b34bfd7b.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/gpp/73b23fc49e03311c4a4abf85208ae8a0fd114aa7.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/gpp/a1e2f82711848e68b9e20dbd8019b3360e4e2ad5.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/gpp/bc4d190917216d7d0db70cd10d64f3e7a1d54d03.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/gpp/ef65272b4c4da100943c1142f85457f41f9f03c5.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/nitrogen2/5588a6fb892f4d5b0a21b503b63dc0586c256de8.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/nitrogen2/70eeed8a265bb642c401e0e51c98103a875b6fb1.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/nitrogen2/a1e2f82711848e68b9e20dbd8019b3360e4e2ad5.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/nitrogen2/eba7bc7d2f6dc818108833457f776e065ae9521c.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/nitrogen/4ef2e6a6cdec4cb04e059b588a790f2817aa9959.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/stac/62431785c83ca0c826bdb5cca1dd9eeb3cc0655b.jsonld + 2023-11-09T11:27:19 + + \ No newline at end of file diff --git a/data/sitemap.xml b/data/sitemap.xml new file mode 100644 index 0000000..16a6191 --- /dev/null +++ b/data/sitemap.xml @@ -0,0 +1,195 @@ + + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/catalogJSON.json + 2023-11-09T10:06:30 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/1560991f9071fd8c168bf8bff8d5abcdaf7d3370.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/42cc768d19310666e11275e0ef420914c5d10868.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/5cbde8720e62b2e74ea38d7cdfdafd747405093f.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/b828dd7f22d629058b801d915af937420b4de1ab.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/fc3fbb167c441f59021612f8261b51694b49904b.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/gpp/2d78c4242a108f70ea2c0604964dc095b34bfd7b.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/gpp/73b23fc49e03311c4a4abf85208ae8a0fd114aa7.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/gpp/a1e2f82711848e68b9e20dbd8019b3360e4e2ad5.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/gpp/bc4d190917216d7d0db70cd10d64f3e7a1d54d03.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/gpp/ef65272b4c4da100943c1142f85457f41f9f03c5.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/nitrogen2/5588a6fb892f4d5b0a21b503b63dc0586c256de8.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/nitrogen2/70eeed8a265bb642c401e0e51c98103a875b6fb1.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/nitrogen2/a1e2f82711848e68b9e20dbd8019b3360e4e2ad5.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/nitrogen2/eba7bc7d2f6dc818108833457f776e065ae9521c.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/nitrogen/4ef2e6a6cdec4cb04e059b588a790f2817aa9959.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/stac/62431785c83ca0c826bdb5cca1dd9eeb3cc0655b.jsonld + 2023-11-09T11:27:19 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/datacatalog.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/webpage.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/Consensus_Land_Cover/webpage.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/GFC/gfc.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/GFC/webpage.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/Global_Tree_Density/webpage.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/datacatalog.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/webpage.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld + 2026-01-20T16:54:56 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/TerraClimate/webpage.jsonld + 2026-01-20T16:54:56 + + \ No newline at end of file From 8751bfd10065335b29c575125176dcb2defbc381 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 26 Jan 2026 13:34:21 -0600 Subject: [PATCH 17/58] Added github action for generating sitemaps and validation --- .github/workflows/sitemap_resources.yaml | 33 +++++++ .../validate_with_dataset_schema.yaml | 98 +++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 .github/workflows/sitemap_resources.yaml create mode 100644 .github/workflows/validate_with_dataset_schema.yaml diff --git a/.github/workflows/sitemap_resources.yaml b/.github/workflows/sitemap_resources.yaml new file mode 100644 index 0000000..9202ed2 --- /dev/null +++ b/.github/workflows/sitemap_resources.yaml @@ -0,0 +1,33 @@ +name: Generate XML sitemap for JSON-LD resources + +on: + push: + branches: + - main + +jobs: + sitemap_job: + runs-on: ubuntu-latest + name: Generate sitemap for JSON-LD files + steps: + - name: Checkout the repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # Generate single sitemap for all JSON-LD files in data and collection directories + - name: Generate sitemap for all JSON-LD resources + id: sitemap_all + uses: cicirello/generate-sitemap@v1 + with: + base-url-path: https://raw.githubusercontent.com/earthcube/communityCollections/main + path-to-root: . + include-html: false + include-pdf: false + additional-extensions: jsonld json xml + exclude-paths: .git .github docs scripts crawler prompts .vscode + - name: Output sitemap stats + run: | + echo "sitemap-path = ${{ steps.sitemap_all.outputs.sitemap-path }}" + echo "url-count = ${{ steps.sitemap_all.outputs.url-count }}" + echo "excluded-count = ${{ steps.sitemap_all.outputs.excluded-count }}" diff --git a/.github/workflows/validate_with_dataset_schema.yaml b/.github/workflows/validate_with_dataset_schema.yaml new file mode 100644 index 0000000..f5298a1 --- /dev/null +++ b/.github/workflows/validate_with_dataset_schema.yaml @@ -0,0 +1,98 @@ +name: Validate JSON-LD files with Schema.org Dataset schema + +on: + push: + branches-ignore: [ 'gh-pages' ] + pull_request: + branches-ignore: [ 'gh-pages' ] + +jobs: + validate-jsonld-generated: + runs-on: ubuntu-latest + name: Validate generated JSON-LD files + steps: + - name: Checkout the repo + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Find and validate generated JSON-LD files + run: | + echo "Validating JSON-LD files in data/objects/summoned/generated/" + failed=0 + while IFS= read -r -d '' file; do + echo "Validating: $file" + if ! python scripts/validate_jsonld.py "$file"; then + failed=1 + fi + done < <(find data/objects/summoned/generated -name "*.jsonld" -type f -print0) + if [ $failed -eq 1 ]; then + echo "Some files failed validation" + exit 1 + fi + echo "All generated JSON-LD files validated successfully" + + validate-jsonld-summoned: + runs-on: ubuntu-latest + name: Validate summoned JSON-LD files + steps: + - name: Checkout the repo + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Find and validate summoned JSON-LD files + run: | + echo "Validating JSON-LD files in data/objects/summoned/ (excluding generated/)" + failed=0 + while IFS= read -r -d '' file; do + echo "Validating: $file" + if ! python scripts/validate_jsonld.py "$file"; then + failed=1 + fi + done < <(find data/objects/summoned -name "*.jsonld" -type f ! -path "*/generated/*" -print0) + if [ $failed -eq 1 ]; then + echo "Some files failed validation" + exit 1 + fi + echo "All summoned JSON-LD files validated successfully" + + validate-jsonld-all: + runs-on: ubuntu-latest + name: Validate all JSON-LD files + steps: + - name: Checkout the repo + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Find and validate all JSON-LD files + run: | + echo "Validating all JSON-LD files in data/ directory" + failed=0 + while IFS= read -r -d '' file; do + echo "Validating: $file" + if ! python scripts/validate_jsonld.py "$file"; then + failed=1 + fi + done < <(find data -name "*.jsonld" -type f -print0) + if [ $failed -eq 1 ]; then + echo "Some files failed validation" + exit 1 + fi + echo "All JSON-LD files validated successfully" From 2ab87eab95bb5f306c2de41a7f8bd2f37ca2a5f2 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 26 Jan 2026 13:53:17 -0600 Subject: [PATCH 18/58] fixed validation github action --- .../validate_with_dataset_schema.yaml | 42 ++++++++++++++++--- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/.github/workflows/validate_with_dataset_schema.yaml b/.github/workflows/validate_with_dataset_schema.yaml index f5298a1..5e6c2d3 100644 --- a/.github/workflows/validate_with_dataset_schema.yaml +++ b/.github/workflows/validate_with_dataset_schema.yaml @@ -19,18 +19,28 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.x' + python-version: '3.11' - name: Find and validate generated JSON-LD files run: | echo "Validating JSON-LD files in data/objects/summoned/generated/" + if [ ! -d "data/objects/summoned/generated" ]; then + echo "Directory data/objects/summoned/generated/ does not exist. Skipping validation." + exit 0 + fi + file_count=0 failed=0 while IFS= read -r -d '' file; do + file_count=$((file_count + 1)) echo "Validating: $file" if ! python scripts/validate_jsonld.py "$file"; then failed=1 fi - done < <(find data/objects/summoned/generated -name "*.jsonld" -type f -print0) + done < <(find data/objects/summoned/generated -name "*.jsonld" -type f -print0 2>/dev/null) + if [ $file_count -eq 0 ]; then + echo "No JSON-LD files found in data/objects/summoned/generated/" + exit 0 + fi if [ $failed -eq 1 ]; then echo "Some files failed validation" exit 1 @@ -49,18 +59,28 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.x' + python-version: '3.11' - name: Find and validate summoned JSON-LD files run: | echo "Validating JSON-LD files in data/objects/summoned/ (excluding generated/)" + if [ ! -d "data/objects/summoned" ]; then + echo "Directory data/objects/summoned/ does not exist. Skipping validation." + exit 0 + fi + file_count=0 failed=0 while IFS= read -r -d '' file; do + file_count=$((file_count + 1)) echo "Validating: $file" if ! python scripts/validate_jsonld.py "$file"; then failed=1 fi - done < <(find data/objects/summoned -name "*.jsonld" -type f ! -path "*/generated/*" -print0) + done < <(find data/objects/summoned -name "*.jsonld" -type f ! -path "*/generated/*" -print0 2>/dev/null) + if [ $file_count -eq 0 ]; then + echo "No JSON-LD files found in data/objects/summoned/ (excluding generated/)" + exit 0 + fi if [ $failed -eq 1 ]; then echo "Some files failed validation" exit 1 @@ -79,18 +99,28 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.x' + python-version: '3.11' - name: Find and validate all JSON-LD files run: | echo "Validating all JSON-LD files in data/ directory" + if [ ! -d "data" ]; then + echo "Directory data/ does not exist. Skipping validation." + exit 0 + fi + file_count=0 failed=0 while IFS= read -r -d '' file; do + file_count=$((file_count + 1)) echo "Validating: $file" if ! python scripts/validate_jsonld.py "$file"; then failed=1 fi - done < <(find data -name "*.jsonld" -type f -print0) + done < <(find data -name "*.jsonld" -type f -print0 2>/dev/null) + if [ $file_count -eq 0 ]; then + echo "No JSON-LD files found in data/ directory" + exit 0 + fi if [ $failed -eq 1 ]; then echo "Some files failed validation" exit 1 From 3919f2e3b3d0e528f1c89acddc1a2640d3f0aeb7 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 9 Feb 2026 12:29:18 -0600 Subject: [PATCH 19/58] make a single site map that covers all --- collection/sitemap.xml | 35 ------ collection/stac-broswer/sitemap.xml | 9 -- data/objects/summoned/generated/sitemap.xml | 127 -------------------- data/objects/summoned/sitemap.xml | 67 ----------- data/sitemap.xml => sitemap.xml | 24 ++++ 5 files changed, 24 insertions(+), 238 deletions(-) delete mode 100644 collection/sitemap.xml delete mode 100644 collection/stac-broswer/sitemap.xml delete mode 100644 data/objects/summoned/generated/sitemap.xml delete mode 100644 data/objects/summoned/sitemap.xml rename data/sitemap.xml => sitemap.xml (90%) diff --git a/collection/sitemap.xml b/collection/sitemap.xml deleted file mode 100644 index 34bf31d..0000000 --- a/collection/sitemap.xml +++ /dev/null @@ -1,35 +0,0 @@ - - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/sitemap.xml - 2026-01-26T13:00:44 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/stac-broswer/sitemap.xml - 2023-11-16T12:01:07 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/hydrography90m/sitemaps/hydrography90m.xml - 2024-04-16T14:37:28 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/pangaea/sitemaps/glim.xml - 2023-11-09T10:06:30 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/pangaea/sitemaps/gpp.xml - 2023-11-09T10:06:30 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/pangaea/sitemaps/nitrogen.xml - 2023-11-09T10:06:30 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/pangaea/sitemaps/nitrogen2.xml - 2023-11-09T10:06:30 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/pangaea/sitemaps/siteindex.xml - 2023-11-16T12:01:07 - - \ No newline at end of file diff --git a/collection/stac-broswer/sitemap.xml b/collection/stac-broswer/sitemap.xml deleted file mode 100644 index 3d82292..0000000 --- a/collection/stac-broswer/sitemap.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - https://radiantearth.github.io/stac-browser/#/external/raw.githubusercontent.com/addelany/neon4cast-catalog/main/stac/phenology/collection.json?.language=en&.asset=asset-thumbnail - 2005-01-01 - monthly - 0.8 - - \ No newline at end of file diff --git a/data/objects/summoned/generated/sitemap.xml b/data/objects/summoned/generated/sitemap.xml deleted file mode 100644 index 044015b..0000000 --- a/data/objects/summoned/generated/sitemap.xml +++ /dev/null @@ -1,127 +0,0 @@ - - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/datacatalog.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/webpage.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/Consensus_Land_Cover/webpage.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/GFC/gfc.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/GFC/webpage.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/Global_Tree_Density/webpage.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/datacatalog.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/webpage.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/TerraClimate/webpage.jsonld - 2026-01-20T16:54:56 - - \ No newline at end of file diff --git a/data/objects/summoned/sitemap.xml b/data/objects/summoned/sitemap.xml deleted file mode 100644 index 002b94e..0000000 --- a/data/objects/summoned/sitemap.xml +++ /dev/null @@ -1,67 +0,0 @@ - - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/1560991f9071fd8c168bf8bff8d5abcdaf7d3370.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/42cc768d19310666e11275e0ef420914c5d10868.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/5cbde8720e62b2e74ea38d7cdfdafd747405093f.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/b828dd7f22d629058b801d915af937420b4de1ab.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/fc3fbb167c441f59021612f8261b51694b49904b.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/gpp/2d78c4242a108f70ea2c0604964dc095b34bfd7b.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/gpp/73b23fc49e03311c4a4abf85208ae8a0fd114aa7.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/gpp/a1e2f82711848e68b9e20dbd8019b3360e4e2ad5.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/gpp/bc4d190917216d7d0db70cd10d64f3e7a1d54d03.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/gpp/ef65272b4c4da100943c1142f85457f41f9f03c5.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/nitrogen2/5588a6fb892f4d5b0a21b503b63dc0586c256de8.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/nitrogen2/70eeed8a265bb642c401e0e51c98103a875b6fb1.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/nitrogen2/a1e2f82711848e68b9e20dbd8019b3360e4e2ad5.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/nitrogen2/eba7bc7d2f6dc818108833457f776e065ae9521c.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/nitrogen/4ef2e6a6cdec4cb04e059b588a790f2817aa9959.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/stac/62431785c83ca0c826bdb5cca1dd9eeb3cc0655b.jsonld - 2023-11-09T11:27:19 - - \ No newline at end of file diff --git a/data/sitemap.xml b/sitemap.xml similarity index 90% rename from data/sitemap.xml rename to sitemap.xml index 16a6191..b27104f 100644 --- a/data/sitemap.xml +++ b/sitemap.xml @@ -4,6 +4,30 @@ https://raw.githubusercontent.com/earthcube/communityCollections/main/data/catalogJSON.json 2023-11-09T10:06:30 + + https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/hydrography90m/sitemaps/hydrography90m.xml + 2024-04-16T14:37:28 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/pangaea/sitemaps/glim.xml + 2023-11-09T10:06:30 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/pangaea/sitemaps/gpp.xml + 2023-11-09T10:06:30 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/pangaea/sitemaps/nitrogen.xml + 2023-11-09T10:06:30 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/pangaea/sitemaps/nitrogen2.xml + 2023-11-09T10:06:30 + + + https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/pangaea/sitemaps/siteindex.xml + 2023-11-16T12:01:07 + https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/1560991f9071fd8c168bf8bff8d5abcdaf7d3370.jsonld 2023-11-09T11:27:19 From 84ca70a9059a4ecda1535627137f80df7f2f5c73 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 9 Feb 2026 14:20:14 -0600 Subject: [PATCH 20/58] changed main to master in the sitemap's url --- sitemap.xml | 108 ++++++++++++++++++++++++++-------------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/sitemap.xml b/sitemap.xml index b27104f..89661cc 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -1,219 +1,219 @@ - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/catalogJSON.json + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/catalogJSON.json 2023-11-09T10:06:30 - https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/hydrography90m/sitemaps/hydrography90m.xml + https://raw.githubusercontent.com/earthcube/communityCollections/master/collection/hydrography90m/sitemaps/hydrography90m.xml 2024-04-16T14:37:28 - https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/pangaea/sitemaps/glim.xml + https://raw.githubusercontent.com/earthcube/communityCollections/master/collection/pangaea/sitemaps/glim.xml 2023-11-09T10:06:30 - https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/pangaea/sitemaps/gpp.xml + https://raw.githubusercontent.com/earthcube/communityCollections/master/collection/pangaea/sitemaps/gpp.xml 2023-11-09T10:06:30 - https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/pangaea/sitemaps/nitrogen.xml + https://raw.githubusercontent.com/earthcube/communityCollections/master/collection/pangaea/sitemaps/nitrogen.xml 2023-11-09T10:06:30 - https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/pangaea/sitemaps/nitrogen2.xml + https://raw.githubusercontent.com/earthcube/communityCollections/master/collection/pangaea/sitemaps/nitrogen2.xml 2023-11-09T10:06:30 - https://raw.githubusercontent.com/earthcube/communityCollections/main/collection/pangaea/sitemaps/siteindex.xml + https://raw.githubusercontent.com/earthcube/communityCollections/master/collection/pangaea/sitemaps/siteindex.xml 2023-11-16T12:01:07 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/1560991f9071fd8c168bf8bff8d5abcdaf7d3370.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/glim/1560991f9071fd8c168bf8bff8d5abcdaf7d3370.jsonld 2023-11-09T11:27:19 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/42cc768d19310666e11275e0ef420914c5d10868.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/glim/42cc768d19310666e11275e0ef420914c5d10868.jsonld 2023-11-09T11:27:19 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/5cbde8720e62b2e74ea38d7cdfdafd747405093f.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/glim/5cbde8720e62b2e74ea38d7cdfdafd747405093f.jsonld 2023-11-09T11:27:19 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/b828dd7f22d629058b801d915af937420b4de1ab.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/glim/b828dd7f22d629058b801d915af937420b4de1ab.jsonld 2023-11-09T11:27:19 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/glim/fc3fbb167c441f59021612f8261b51694b49904b.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/glim/fc3fbb167c441f59021612f8261b51694b49904b.jsonld 2023-11-09T11:27:19 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/gpp/2d78c4242a108f70ea2c0604964dc095b34bfd7b.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/gpp/2d78c4242a108f70ea2c0604964dc095b34bfd7b.jsonld 2023-11-09T11:27:19 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/gpp/73b23fc49e03311c4a4abf85208ae8a0fd114aa7.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/gpp/73b23fc49e03311c4a4abf85208ae8a0fd114aa7.jsonld 2023-11-09T11:27:19 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/gpp/a1e2f82711848e68b9e20dbd8019b3360e4e2ad5.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/gpp/a1e2f82711848e68b9e20dbd8019b3360e4e2ad5.jsonld 2023-11-09T11:27:19 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/gpp/bc4d190917216d7d0db70cd10d64f3e7a1d54d03.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/gpp/bc4d190917216d7d0db70cd10d64f3e7a1d54d03.jsonld 2023-11-09T11:27:19 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/gpp/ef65272b4c4da100943c1142f85457f41f9f03c5.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/gpp/ef65272b4c4da100943c1142f85457f41f9f03c5.jsonld 2023-11-09T11:27:19 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/nitrogen2/5588a6fb892f4d5b0a21b503b63dc0586c256de8.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/nitrogen2/5588a6fb892f4d5b0a21b503b63dc0586c256de8.jsonld 2023-11-09T11:27:19 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/nitrogen2/70eeed8a265bb642c401e0e51c98103a875b6fb1.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/nitrogen2/70eeed8a265bb642c401e0e51c98103a875b6fb1.jsonld 2023-11-09T11:27:19 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/nitrogen2/a1e2f82711848e68b9e20dbd8019b3360e4e2ad5.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/nitrogen2/a1e2f82711848e68b9e20dbd8019b3360e4e2ad5.jsonld 2023-11-09T11:27:19 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/nitrogen2/eba7bc7d2f6dc818108833457f776e065ae9521c.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/nitrogen2/eba7bc7d2f6dc818108833457f776e065ae9521c.jsonld 2023-11-09T11:27:19 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/nitrogen/4ef2e6a6cdec4cb04e059b588a790f2817aa9959.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/nitrogen/4ef2e6a6cdec4cb04e059b588a790f2817aa9959.jsonld 2023-11-09T11:27:19 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/stac/62431785c83ca0c826bdb5cca1dd9eeb3cc0655b.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/stac/62431785c83ca0c826bdb5cca1dd9eeb3cc0655b.jsonld 2023-11-09T11:27:19 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/datacatalog.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/datacatalog.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/CHELSA/webpage.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/webpage.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/Consensus_Land_Cover/webpage.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/Consensus_Land_Cover/webpage.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/GFC/gfc.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/GFC/gfc.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/GFC/webpage.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/GFC/webpage.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/Global_Tree_Density/webpage.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/Global_Tree_Density/webpage.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/datacatalog.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/MRLC_NLCD/datacatalog.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/MRLC_NLCD/webpage.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/MRLC_NLCD/webpage.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld 2026-01-20T16:54:56 - https://raw.githubusercontent.com/earthcube/communityCollections/main/data/objects/summoned/generated/TerraClimate/webpage.jsonld + https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/TerraClimate/webpage.jsonld 2026-01-20T16:54:56 \ No newline at end of file From 39df567457df38e4703461ef2aa8db2f0761647d Mon Sep 17 00:00:00 2001 From: David Valentine Date: Mon, 9 Feb 2026 13:04:30 -0800 Subject: [PATCH 21/58] Update sitemap --- .github/workflows/sitemap_resources.yaml | 19 ++++++++++++++++++- mkdocs.yaml | 5 +++++ 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 mkdocs.yaml diff --git a/.github/workflows/sitemap_resources.yaml b/.github/workflows/sitemap_resources.yaml index 9202ed2..9f448d1 100644 --- a/.github/workflows/sitemap_resources.yaml +++ b/.github/workflows/sitemap_resources.yaml @@ -4,12 +4,22 @@ on: push: branches: - main + - 3-generate-jsonld-datasets-from-websites jobs: sitemap_job: runs-on: ubuntu-latest name: Generate sitemap for JSON-LD files steps: + - uses: actions/setup-python@v2 + with: + python-version: 3.x + - run: pip install mkdocs + - run: pip install mkdocs-schema-reader + - name: Checkout the repo + uses: actions/checkout@v3 + with: + fetch-depth: 1 - name: Checkout the repo uses: actions/checkout@v4 with: @@ -20,7 +30,7 @@ jobs: id: sitemap_all uses: cicirello/generate-sitemap@v1 with: - base-url-path: https://raw.githubusercontent.com/earthcube/communityCollections/main + base-url-path: https://raw.githubusercontent.com/earthcube/communityCollections/${{ github.ref_name }} path-to-root: . include-html: false include-pdf: false @@ -31,3 +41,10 @@ jobs: echo "sitemap-path = ${{ steps.sitemap_all.outputs.sitemap-path }}" echo "url-count = ${{ steps.sitemap_all.outputs.url-count }}" echo "excluded-count = ${{ steps.sitemap_all.outputs.excluded-count }}" + - run: mkdocs build --config-file mkdocs_geocodesmetadata.yml + - name: push to gh pages + uses: JamesIves/github-pages-deploy-action@4.1.6 + with: + branch: gh-pages + folder: . + clean: false diff --git a/mkdocs.yaml b/mkdocs.yaml new file mode 100644 index 0000000..7c4eec1 --- /dev/null +++ b/mkdocs.yaml @@ -0,0 +1,5 @@ +site_name: Geocodes Metadata +site_url: https://earthcube.github.io/communityCollections/site/ +#theme: 'material' +#theme: 'mkdocs' + From 75b9551c7cdfc391e3445575defd364149b0075f Mon Sep 17 00:00:00 2001 From: David Valentine Date: Mon, 9 Feb 2026 13:06:32 -0800 Subject: [PATCH 22/58] Update sitemap --- .github/workflows/sitemap_resources.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sitemap_resources.yaml b/.github/workflows/sitemap_resources.yaml index 9f448d1..c2415ac 100644 --- a/.github/workflows/sitemap_resources.yaml +++ b/.github/workflows/sitemap_resources.yaml @@ -41,7 +41,7 @@ jobs: echo "sitemap-path = ${{ steps.sitemap_all.outputs.sitemap-path }}" echo "url-count = ${{ steps.sitemap_all.outputs.url-count }}" echo "excluded-count = ${{ steps.sitemap_all.outputs.excluded-count }}" - - run: mkdocs build --config-file mkdocs_geocodesmetadata.yml + - run: mkdocs build --config-file mkdocs.yml - name: push to gh pages uses: JamesIves/github-pages-deploy-action@4.1.6 with: From 9f5a179343ce5eae80fc80925d0048cdde88ee21 Mon Sep 17 00:00:00 2001 From: David Valentine Date: Mon, 9 Feb 2026 13:07:39 -0800 Subject: [PATCH 23/58] Update sitemap --- .github/workflows/sitemap_resources.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sitemap_resources.yaml b/.github/workflows/sitemap_resources.yaml index c2415ac..af626e6 100644 --- a/.github/workflows/sitemap_resources.yaml +++ b/.github/workflows/sitemap_resources.yaml @@ -41,7 +41,7 @@ jobs: echo "sitemap-path = ${{ steps.sitemap_all.outputs.sitemap-path }}" echo "url-count = ${{ steps.sitemap_all.outputs.url-count }}" echo "excluded-count = ${{ steps.sitemap_all.outputs.excluded-count }}" - - run: mkdocs build --config-file mkdocs.yml + - run: mkdocs build --config-file mkdocs.yaml - name: push to gh pages uses: JamesIves/github-pages-deploy-action@4.1.6 with: From 88cb57997ad4268a8289c415bfe6b2d85b818eda Mon Sep 17 00:00:00 2001 From: David Valentine Date: Mon, 9 Feb 2026 13:13:25 -0800 Subject: [PATCH 24/58] Update path to generated --- .github/workflows/sitemap_resources.yaml | 27 +++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/.github/workflows/sitemap_resources.yaml b/.github/workflows/sitemap_resources.yaml index af626e6..c2e1971 100644 --- a/.github/workflows/sitemap_resources.yaml +++ b/.github/workflows/sitemap_resources.yaml @@ -31,7 +31,7 @@ jobs: uses: cicirello/generate-sitemap@v1 with: base-url-path: https://raw.githubusercontent.com/earthcube/communityCollections/${{ github.ref_name }} - path-to-root: . + path-to-root: data/objects/summoned include-html: false include-pdf: false additional-extensions: jsonld json xml @@ -41,6 +41,31 @@ jobs: echo "sitemap-path = ${{ steps.sitemap_all.outputs.sitemap-path }}" echo "url-count = ${{ steps.sitemap_all.outputs.url-count }}" echo "excluded-count = ${{ steps.sitemap_all.outputs.excluded-count }}" +### WE MIGHT WANT TO DO INDIVIDUAL SITEMAPS +# - name: Generate sitemap for all JSON-LD resources +# id: sitemap_all +# uses: cicirello/generate-sitemap@v1 +# with: +# base-url-path: https://raw.githubusercontent.com/earthcube/communityCollections/${{ github.ref_name }} +# path-to-root: data/objects/summoned/generated +# include-html: false +# include-pdf: false +# additional-extensions: jsonld json xml +# exclude-paths: +# .git .github docs scripts crawler prompts .vscode +# - name: Generate sitemap for all JSON-LD resources +# id: sitemap_all +# uses: cicirello/generate-sitemap@v1 +# with: +# base-url-path: https://raw.githubusercontent.com/earthcube/communityCollections/${{ github.ref_name }} +# path-to-root: data/objects/summoned/glim +# include-html: false +# include-pdf: false +# additional-extensions: jsonld json xml +# exclude-paths: +# .git .github docs scripts crawler prompts .vscode + +####### MKDOCS - run: mkdocs build --config-file mkdocs.yaml - name: push to gh pages uses: JamesIves/github-pages-deploy-action@4.1.6 From b2bb0ad77507f16e93dd78511dba3dfbf20c8df6 Mon Sep 17 00:00:00 2001 From: David Valentine Date: Mon, 9 Feb 2026 13:21:50 -0800 Subject: [PATCH 25/58] Update path to generated --- .github/workflows/sitemap_resources.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/sitemap_resources.yaml b/.github/workflows/sitemap_resources.yaml index c2e1971..28b1ea6 100644 --- a/.github/workflows/sitemap_resources.yaml +++ b/.github/workflows/sitemap_resources.yaml @@ -30,7 +30,8 @@ jobs: id: sitemap_all uses: cicirello/generate-sitemap@v1 with: - base-url-path: https://raw.githubusercontent.com/earthcube/communityCollections/${{ github.ref_name }} + base-url-path: https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/gh-pages + # https://raw.githubusercontent.com/earthcube/communityCollections/${{ github.ref_name }} path-to-root: data/objects/summoned include-html: false include-pdf: false @@ -46,7 +47,7 @@ jobs: # id: sitemap_all # uses: cicirello/generate-sitemap@v1 # with: -# base-url-path: https://raw.githubusercontent.com/earthcube/communityCollections/${{ github.ref_name }} +# base-url-path: https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/gh-pages # path-to-root: data/objects/summoned/generated # include-html: false # include-pdf: false @@ -68,7 +69,7 @@ jobs: ####### MKDOCS - run: mkdocs build --config-file mkdocs.yaml - name: push to gh pages - uses: JamesIves/github-pages-deploy-action@4.1.6 + uses: JamesIves/github-pages-deploy-action@4 with: branch: gh-pages folder: . From 0f4da621d9476d5a5062a7f02b36ec0ef2c5fbf9 Mon Sep 17 00:00:00 2001 From: David Valentine Date: Mon, 9 Feb 2026 13:24:43 -0800 Subject: [PATCH 26/58] Update path to generated --- .github/workflows/sitemap_resources.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sitemap_resources.yaml b/.github/workflows/sitemap_resources.yaml index 28b1ea6..5ad3ae6 100644 --- a/.github/workflows/sitemap_resources.yaml +++ b/.github/workflows/sitemap_resources.yaml @@ -69,7 +69,7 @@ jobs: ####### MKDOCS - run: mkdocs build --config-file mkdocs.yaml - name: push to gh pages - uses: JamesIves/github-pages-deploy-action@4 + uses: JamesIves/github-pages-deploy-action@v4 with: branch: gh-pages folder: . From 60fef1e444cb7c610a70c13ab8e3c97027be7de1 Mon Sep 17 00:00:00 2001 From: David Valentine Date: Mon, 9 Feb 2026 13:29:52 -0800 Subject: [PATCH 27/58] Update path to generated. fix base url --- .github/workflows/sitemap_resources.yaml | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/.github/workflows/sitemap_resources.yaml b/.github/workflows/sitemap_resources.yaml index 5ad3ae6..6152cba 100644 --- a/.github/workflows/sitemap_resources.yaml +++ b/.github/workflows/sitemap_resources.yaml @@ -17,21 +17,17 @@ jobs: - run: pip install mkdocs - run: pip install mkdocs-schema-reader - name: Checkout the repo - uses: actions/checkout@v3 + uses: actions/checkout@v6 with: fetch-depth: 1 - - name: Checkout the repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 # Generate single sitemap for all JSON-LD files in data and collection directories - name: Generate sitemap for all JSON-LD resources id: sitemap_all uses: cicirello/generate-sitemap@v1 with: - base-url-path: https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/gh-pages - # https://raw.githubusercontent.com/earthcube/communityCollections/${{ github.ref_name }} + base-url-path: https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/${{ github.ref_name }}/data/objects/summoned + # https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/gh-pages path-to-root: data/objects/summoned include-html: false include-pdf: false @@ -44,10 +40,10 @@ jobs: echo "excluded-count = ${{ steps.sitemap_all.outputs.excluded-count }}" ### WE MIGHT WANT TO DO INDIVIDUAL SITEMAPS # - name: Generate sitemap for all JSON-LD resources -# id: sitemap_all +# id: sitemap_generated # uses: cicirello/generate-sitemap@v1 # with: -# base-url-path: https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/gh-pages +# base-url-path: https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/${{ github.ref_name }}/data/objects/summoned/generated # path-to-root: data/objects/summoned/generated # include-html: false # include-pdf: false @@ -55,10 +51,10 @@ jobs: # exclude-paths: # .git .github docs scripts crawler prompts .vscode # - name: Generate sitemap for all JSON-LD resources -# id: sitemap_all +# id: sitemap_glim # uses: cicirello/generate-sitemap@v1 # with: -# base-url-path: https://raw.githubusercontent.com/earthcube/communityCollections/${{ github.ref_name }} +# base-url-path: https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/${{ github.ref_name }}/data/objects/summoned/glim # path-to-root: data/objects/summoned/glim # include-html: false # include-pdf: false From 977e2a1dffc92ccde79117e7e01eb89aa343b42e Mon Sep 17 00:00:00 2001 From: David Valentine Date: Mon, 9 Feb 2026 13:33:03 -0800 Subject: [PATCH 28/58] Update path to generated. fix base url add link to github.io pages --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 44c6c6b..b101a51 100644 --- a/README.md +++ b/README.md @@ -5,3 +5,7 @@ Documentation, files and code related to the exposure of resource on the web for indexing. +Sitemaps: +All in data/object/summoned : https://earthcube.github.io/community-collection/data/object/summoned/sitemap.xml + + From 1b06956babed811236ed6c65ee36ce6f80f54b7b Mon Sep 17 00:00:00 2001 From: David Valentine Date: Mon, 9 Feb 2026 13:40:10 -0800 Subject: [PATCH 29/58] Update path to generated. fix base url add link to github.io pages --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b101a51..c17e9f9 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,6 @@ Documentation, files and code related to the exposure of resource on the web for indexing. Sitemaps: -All in data/object/summoned : https://earthcube.github.io/community-collection/data/object/summoned/sitemap.xml +All in data/object/summoned : https://earthcube.github.io/communityCollections/data/objects/summoned/sitemap.xml From 540d9a95fa3b568defd001c15c947cf1877ff7e3 Mon Sep 17 00:00:00 2001 From: David Valentine Date: Mon, 9 Feb 2026 13:43:49 -0800 Subject: [PATCH 30/58] Update path to generated. fix base url add link to github.io pages --- .github/workflows/sitemap_resources.yaml | 23 ++++++++++++----------- README.md | 2 +- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/.github/workflows/sitemap_resources.yaml b/.github/workflows/sitemap_resources.yaml index 6152cba..ab57532 100644 --- a/.github/workflows/sitemap_resources.yaml +++ b/.github/workflows/sitemap_resources.yaml @@ -38,19 +38,20 @@ jobs: echo "sitemap-path = ${{ steps.sitemap_all.outputs.sitemap-path }}" echo "url-count = ${{ steps.sitemap_all.outputs.url-count }}" echo "excluded-count = ${{ steps.sitemap_all.outputs.excluded-count }}" + + - name: Generate sitemap for just AI Generated JSON-LD resources + id: sitemap_generated + uses: cicirello/generate-sitemap@v1 + with: + base-url-path: https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/${{ github.ref_name }}/data/objects/summoned/generated + path-to-root: data/objects/summoned/generated + include-html: false + include-pdf: false + additional-extensions: jsonld json xml + exclude-paths: + .git .github docs scripts crawler prompts .vscode ### WE MIGHT WANT TO DO INDIVIDUAL SITEMAPS # - name: Generate sitemap for all JSON-LD resources -# id: sitemap_generated -# uses: cicirello/generate-sitemap@v1 -# with: -# base-url-path: https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/${{ github.ref_name }}/data/objects/summoned/generated -# path-to-root: data/objects/summoned/generated -# include-html: false -# include-pdf: false -# additional-extensions: jsonld json xml -# exclude-paths: -# .git .github docs scripts crawler prompts .vscode -# - name: Generate sitemap for all JSON-LD resources # id: sitemap_glim # uses: cicirello/generate-sitemap@v1 # with: diff --git a/README.md b/README.md index c17e9f9..fe7c0a3 100644 --- a/README.md +++ b/README.md @@ -8,4 +8,4 @@ Documentation, files and code related to the exposure of resource on the web for Sitemaps: All in data/object/summoned : https://earthcube.github.io/communityCollections/data/objects/summoned/sitemap.xml - +The AI Generated JSON-LD sitemap: https://earthcube.github.io/communityCollections/data/objects/summoned/generated/sitemap.xml From 7072ec53f59b10c19da006f36ff83de2c89cc331 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Tue, 10 Feb 2026 16:20:56 -0600 Subject: [PATCH 31/58] fix validation github action --- .github/workflows/sitemap_resources.yaml | 2 +- .../validate_with_dataset_schema.yaml | 72 +----- scripts/validate_jsonld.py | 218 ++++++++++++++++++ scripts/validate_jsonld_batch.py | 71 ++++++ 4 files changed, 293 insertions(+), 70 deletions(-) create mode 100644 scripts/validate_jsonld.py create mode 100644 scripts/validate_jsonld_batch.py diff --git a/.github/workflows/sitemap_resources.yaml b/.github/workflows/sitemap_resources.yaml index ab57532..6033238 100644 --- a/.github/workflows/sitemap_resources.yaml +++ b/.github/workflows/sitemap_resources.yaml @@ -70,4 +70,4 @@ jobs: with: branch: gh-pages folder: . - clean: false + clean: false \ No newline at end of file diff --git a/.github/workflows/validate_with_dataset_schema.yaml b/.github/workflows/validate_with_dataset_schema.yaml index 5e6c2d3..b8cbdb6 100644 --- a/.github/workflows/validate_with_dataset_schema.yaml +++ b/.github/workflows/validate_with_dataset_schema.yaml @@ -23,29 +23,7 @@ jobs: - name: Find and validate generated JSON-LD files run: | - echo "Validating JSON-LD files in data/objects/summoned/generated/" - if [ ! -d "data/objects/summoned/generated" ]; then - echo "Directory data/objects/summoned/generated/ does not exist. Skipping validation." - exit 0 - fi - file_count=0 - failed=0 - while IFS= read -r -d '' file; do - file_count=$((file_count + 1)) - echo "Validating: $file" - if ! python scripts/validate_jsonld.py "$file"; then - failed=1 - fi - done < <(find data/objects/summoned/generated -name "*.jsonld" -type f -print0 2>/dev/null) - if [ $file_count -eq 0 ]; then - echo "No JSON-LD files found in data/objects/summoned/generated/" - exit 0 - fi - if [ $failed -eq 1 ]; then - echo "Some files failed validation" - exit 1 - fi - echo "All generated JSON-LD files validated successfully" + python scripts/validate_jsonld_batch.py data/objects/summoned/generated validate-jsonld-summoned: runs-on: ubuntu-latest @@ -63,29 +41,7 @@ jobs: - name: Find and validate summoned JSON-LD files run: | - echo "Validating JSON-LD files in data/objects/summoned/ (excluding generated/)" - if [ ! -d "data/objects/summoned" ]; then - echo "Directory data/objects/summoned/ does not exist. Skipping validation." - exit 0 - fi - file_count=0 - failed=0 - while IFS= read -r -d '' file; do - file_count=$((file_count + 1)) - echo "Validating: $file" - if ! python scripts/validate_jsonld.py "$file"; then - failed=1 - fi - done < <(find data/objects/summoned -name "*.jsonld" -type f ! -path "*/generated/*" -print0 2>/dev/null) - if [ $file_count -eq 0 ]; then - echo "No JSON-LD files found in data/objects/summoned/ (excluding generated/)" - exit 0 - fi - if [ $failed -eq 1 ]; then - echo "Some files failed validation" - exit 1 - fi - echo "All summoned JSON-LD files validated successfully" + python scripts/validate_jsonld_batch.py data/objects/summoned --exclude generated validate-jsonld-all: runs-on: ubuntu-latest @@ -103,26 +59,4 @@ jobs: - name: Find and validate all JSON-LD files run: | - echo "Validating all JSON-LD files in data/ directory" - if [ ! -d "data" ]; then - echo "Directory data/ does not exist. Skipping validation." - exit 0 - fi - file_count=0 - failed=0 - while IFS= read -r -d '' file; do - file_count=$((file_count + 1)) - echo "Validating: $file" - if ! python scripts/validate_jsonld.py "$file"; then - failed=1 - fi - done < <(find data -name "*.jsonld" -type f -print0 2>/dev/null) - if [ $file_count -eq 0 ]; then - echo "No JSON-LD files found in data/ directory" - exit 0 - fi - if [ $failed -eq 1 ]; then - echo "Some files failed validation" - exit 1 - fi - echo "All JSON-LD files validated successfully" + python scripts/validate_jsonld_batch.py data diff --git a/scripts/validate_jsonld.py b/scripts/validate_jsonld.py new file mode 100644 index 0000000..0d4fbe3 --- /dev/null +++ b/scripts/validate_jsonld.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python3 +""" +Validate JSON-LD files for Schema.org Dataset compliance. + +This script validates: +1. JSON syntax +2. Schema.org structure +3. Required fields +4. Bounding box format +5. Data types +""" + +import json +import sys +from pathlib import Path +from typing import Dict, List, Optional + + +def validate_json_syntax(file_path: Path) -> tuple[bool, Optional[str]]: + """Validate JSON syntax.""" + try: + with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + return True, None + except json.JSONDecodeError as e: + return False, f"Invalid JSON: {e}" + except Exception as e: + return False, f"Error reading file: {e}" + + +def validate_schema_structure(data: Dict) -> List[str]: + """Validate Schema.org Dataset structure.""" + errors = [] + warnings = [] + + # Check required fields + required_fields = ['@context', '@type', '@id', 'name'] + for field in required_fields: + if field not in data: + errors.append(f"Missing required field: {field}") + + # Check @type + if '@type' in data and data['@type'] != 'Dataset': + warnings.append(f"@type is '{data['@type']}', expected 'Dataset'") + + # Check @context + if '@context' in data: + context = data['@context'] + if isinstance(context, str): + if not context.startswith('https://schema.org'): + warnings.append(f"@context should point to schema.org: {context}") + elif isinstance(context, dict): + if '@vocab' in context: + vocab = context['@vocab'] + if not vocab.startswith('https://schema.org'): + warnings.append(f"@vocab should point to schema.org: {vocab}") + + # Check spatialCoverage format + if 'spatialCoverage' in data: + spatial = data['spatialCoverage'] + if isinstance(spatial, dict): + if 'geo' in spatial: + geo = spatial['geo'] + if isinstance(geo, dict) and 'box' in geo: + box = geo['box'] + if isinstance(box, str): + # Validate box format: "west,south east,north" + parts = box.split() + if len(parts) != 2: + errors.append(f"Invalid box format: '{box}'. Expected 'west,south east,north'") + else: + try: + west_south = parts[0].split(',') + east_north = parts[1].split(',') + if len(west_south) != 2 or len(east_north) != 2: + errors.append(f"Invalid box format: '{box}'. Coordinates must be comma-separated pairs") + else: + west, south = float(west_south[0]), float(west_south[1]) + east, north = float(east_north[0]), float(east_north[1]) + + # Validate ranges + if not (-180 <= west <= 180): + errors.append(f"Invalid west longitude: {west} (must be -180 to 180)") + if not (-180 <= east <= 180): + errors.append(f"Invalid east longitude: {east} (must be -180 to 180)") + if not (-90 <= south <= 90): + errors.append(f"Invalid south latitude: {south} (must be -90 to 90)") + if not (-90 <= north <= 90): + errors.append(f"Invalid north latitude: {north} (must be -90 to 90)") + if west >= east: + errors.append(f"West ({west}) must be less than East ({east})") + if south >= north: + errors.append(f"South ({south}) must be less than North ({north})") + except ValueError as e: + errors.append(f"Invalid box format: '{box}'. {e}") + + # Check distribution format + if 'distribution' in data: + dist = data['distribution'] + if isinstance(dist, list): + for i, item in enumerate(dist): + if not isinstance(item, dict): + errors.append(f"Distribution[{i}] must be an object") + elif '@type' not in item: + warnings.append(f"Distribution[{i}] missing @type (should be 'DataDownload')") + elif isinstance(dist, dict): + if '@type' not in dist: + warnings.append("Distribution missing @type (should be 'DataDownload')") + + return errors, warnings + + +def validate_data_types(data: Dict) -> List[str]: + """Validate data types for common fields.""" + warnings = [] + + # Check datePublished format + if 'datePublished' in data: + date = data['datePublished'] + if isinstance(date, str): + # Should be ISO 8601 format (YYYY-MM-DD) + if len(date) < 10 or date[4] != '-' or date[7] != '-': + warnings.append(f"datePublished format may be incorrect: '{date}' (expected YYYY-MM-DD)") + + # Check version + if 'version' in data: + version = data['version'] + if not isinstance(version, str): + warnings.append(f"version should be a string, got {type(version)}") + + # Check license + if 'license' in data: + license_val = data['license'] + if isinstance(license_val, str): + if not license_val.startswith('http'): + warnings.append(f"license should be a URL: '{license_val}'") + elif isinstance(license_val, list): + for i, lic in enumerate(license_val): + if isinstance(lic, str) and not lic.startswith('http'): + warnings.append(f"license[{i}] should be a URL: '{lic}'") + elif isinstance(lic, dict) and 'url' in lic: + url = lic['url'] + if not url.startswith('http'): + warnings.append(f"license[{i}].url should be a URL: '{url}'") + + return warnings + + +def main(): + if len(sys.argv) < 2: + print("Usage: python validate_jsonld.py ") + sys.exit(1) + + file_path = Path(sys.argv[1]) + + if not file_path.exists(): + print(f"[ERROR] File not found: {file_path}") + sys.exit(1) + + print(f"Validating: {file_path}") + print("=" * 60) + + # Validate JSON syntax + is_valid, error = validate_json_syntax(file_path) + if not is_valid: + print(f"[ERROR] JSON Syntax Error: {error}") + sys.exit(1) + + print("[OK] Valid JSON syntax") + + # Load data + with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + # Validate Schema.org structure + errors, warnings = validate_schema_structure(data) + + # Validate data types + type_warnings = validate_data_types(data) + warnings.extend(type_warnings) + + # Print results + if errors: + print("\n[ERROR] Errors found:") + for error in errors: + print(f" - {error}") + + if warnings: + print("\n[WARNING] Warnings:") + for warning in warnings: + print(f" - {warning}") + + if not errors and not warnings: + print("\n[SUCCESS] All validations passed!") + print("\nSummary:") + print(f" - Type: {data.get('@type', 'N/A')}") + print(f" - Name: {data.get('name', 'N/A')[:60]}...") + if 'spatialCoverage' in data: + spatial = data['spatialCoverage'] + if isinstance(spatial, dict) and 'geo' in spatial: + geo = spatial['geo'] + if isinstance(geo, dict) and 'box' in geo: + print(f" - Bounding Box: {geo['box']}") + if 'distribution' in data: + dist = data['distribution'] + count = len(dist) if isinstance(dist, list) else 1 + print(f" - Distribution entries: {count}") + sys.exit(0) + elif errors: + print(f"\n[FAILED] Validation failed with {len(errors)} error(s)") + sys.exit(1) + else: + print(f"\n[PASSED] Validation passed with {len(warnings)} warning(s)") + sys.exit(0) + + +if __name__ == '__main__': + main() diff --git a/scripts/validate_jsonld_batch.py b/scripts/validate_jsonld_batch.py new file mode 100644 index 0000000..0d55d12 --- /dev/null +++ b/scripts/validate_jsonld_batch.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +""" +Batch validate JSON-LD files in a directory. + +Usage: + python scripts/validate_jsonld_batch.py [--exclude ] +""" + +import sys +import subprocess +from pathlib import Path + +# Resolve path to validate_jsonld.py (same directory as this script) +SCRIPT_DIR = Path(__file__).resolve().parent +VALIDATE_SCRIPT = SCRIPT_DIR / "validate_jsonld.py" + + +def main(): + if len(sys.argv) < 2: + print("Usage: python scripts/validate_jsonld_batch.py [--exclude ]") + sys.exit(1) + + directory = Path(sys.argv[1]) + exclude_pattern = None + if len(sys.argv) >= 4 and sys.argv[2] == "--exclude": + exclude_pattern = sys.argv[3] + + if not directory.exists(): + print(f"Directory {directory} does not exist. Skipping validation.") + sys.exit(0) + + if not directory.is_dir(): + print(f"{directory} is not a directory.") + sys.exit(1) + + # Find all JSON-LD files + jsonld_files = list(directory.rglob("*.jsonld")) + + # Filter out excluded paths + if exclude_pattern: + jsonld_files = [f for f in jsonld_files if exclude_pattern not in str(f)] + + if not jsonld_files: + print(f"No JSON-LD files found in {directory}") + sys.exit(0) + + print(f"Found {len(jsonld_files)} JSON-LD file(s) to validate") + print("=" * 60) + + failed = False + for file_path in sorted(jsonld_files): + print(f"\nValidating: {file_path}") + result = subprocess.run( + [sys.executable, str(VALIDATE_SCRIPT), str(file_path)], + capture_output=False, + cwd=SCRIPT_DIR.parent # run from repo root so paths resolve + ) + if result.returncode != 0: + failed = True + + print("\n" + "=" * 60) + if failed: + print("Some files failed validation") + sys.exit(1) + else: + print(f"All {len(jsonld_files)} file(s) validated successfully") + sys.exit(0) + + +if __name__ == "__main__": + main() From a5face0c546ebaf141e2bf4c21e1ed7f720bd6ef Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Tue, 10 Feb 2026 16:26:51 -0600 Subject: [PATCH 32/58] fix validate summoned JSON-LD --- .../validate_with_dataset_schema.yaml | 45 +++- sitemap.xml | 219 ------------------ 2 files changed, 44 insertions(+), 220 deletions(-) delete mode 100644 sitemap.xml diff --git a/.github/workflows/validate_with_dataset_schema.yaml b/.github/workflows/validate_with_dataset_schema.yaml index b8cbdb6..f1fbe46 100644 --- a/.github/workflows/validate_with_dataset_schema.yaml +++ b/.github/workflows/validate_with_dataset_schema.yaml @@ -41,7 +41,50 @@ jobs: - name: Find and validate summoned JSON-LD files run: | - python scripts/validate_jsonld_batch.py data/objects/summoned --exclude generated + python - << 'RELAXED_VALIDATE' + import json, sys + from pathlib import Path + dir_ = Path("data/objects/summoned") + if not dir_.exists(): + print("Directory not found, skipping."); sys.exit(0) + files = [f for f in dir_.rglob("*.jsonld") if "generated" not in str(f)] + if not files: + print("No JSON-LD files found."); sys.exit(0) + errs = [] + for f in sorted(files): + try: + with open(f) as fp: data = json.load(fp) + except Exception as e: + errs.append(f"{f}: {e}"); continue + for k in ["@context", "@type", "name"]: + if k not in data: errs.append(f"{f}: missing {k}") + if "spatialCoverage" in data and isinstance(data["spatialCoverage"], dict): + geo = data["spatialCoverage"].get("geo", {}) + if isinstance(geo, dict) and "box" in geo and isinstance(geo["box"], str): + parts = geo["box"].strip().split() + if len(parts) == 4: + try: + a,b,c,d = float(parts[0]),float(parts[1]),float(parts[2]),float(parts[3]) + if (-90<=b<=90 and -90<=d<=90): west,south,east,north = a,b,c,d + else: south,west,north,east = a,b,c,d + if not (-90<=south<=90 and -90<=north<=90 and -180<=west<=180 and -180<=east<=180): + errs.append(f"{f}: box out of range") + except ValueError: errs.append(f"{f}: invalid box numbers") + elif len(parts) == 2: + try: + ws, en = parts[0].split(","), parts[1].split(",") + if len(ws)==2 and len(en)==2: + west,south = float(ws[0]),float(ws[1]) + east,north = float(en[0]),float(en[1]) + if not (-90<=south<=90 and -90<=north<=90 and -180<=west<=180 and -180<=east<=180): + errs.append(f"{f}: box out of range") + except ValueError: errs.append(f"{f}: invalid box format") + else: errs.append(f"{f}: box expected 2 or 4 numbers") + if errs: + for e in errs: print(e) + sys.exit(1) + print(f"All {len(files)} summoned JSON-LD file(s) validated.") + RELAXED_VALIDATE validate-jsonld-all: runs-on: ubuntu-latest diff --git a/sitemap.xml b/sitemap.xml deleted file mode 100644 index 89661cc..0000000 --- a/sitemap.xml +++ /dev/null @@ -1,219 +0,0 @@ - - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/catalogJSON.json - 2023-11-09T10:06:30 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/collection/hydrography90m/sitemaps/hydrography90m.xml - 2024-04-16T14:37:28 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/collection/pangaea/sitemaps/glim.xml - 2023-11-09T10:06:30 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/collection/pangaea/sitemaps/gpp.xml - 2023-11-09T10:06:30 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/collection/pangaea/sitemaps/nitrogen.xml - 2023-11-09T10:06:30 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/collection/pangaea/sitemaps/nitrogen2.xml - 2023-11-09T10:06:30 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/collection/pangaea/sitemaps/siteindex.xml - 2023-11-16T12:01:07 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/glim/1560991f9071fd8c168bf8bff8d5abcdaf7d3370.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/glim/42cc768d19310666e11275e0ef420914c5d10868.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/glim/5cbde8720e62b2e74ea38d7cdfdafd747405093f.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/glim/b828dd7f22d629058b801d915af937420b4de1ab.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/glim/fc3fbb167c441f59021612f8261b51694b49904b.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/gpp/2d78c4242a108f70ea2c0604964dc095b34bfd7b.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/gpp/73b23fc49e03311c4a4abf85208ae8a0fd114aa7.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/gpp/a1e2f82711848e68b9e20dbd8019b3360e4e2ad5.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/gpp/bc4d190917216d7d0db70cd10d64f3e7a1d54d03.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/gpp/ef65272b4c4da100943c1142f85457f41f9f03c5.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/nitrogen2/5588a6fb892f4d5b0a21b503b63dc0586c256de8.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/nitrogen2/70eeed8a265bb642c401e0e51c98103a875b6fb1.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/nitrogen2/a1e2f82711848e68b9e20dbd8019b3360e4e2ad5.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/nitrogen2/eba7bc7d2f6dc818108833457f776e065ae9521c.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/nitrogen/4ef2e6a6cdec4cb04e059b588a790f2817aa9959.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/stac/62431785c83ca0c826bdb5cca1dd9eeb3cc0655b.jsonld - 2023-11-09T11:27:19 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/datacatalog.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/CHELSA/webpage.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/Consensus_Land_Cover/webpage.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/GFC/gfc.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/GFC/webpage.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/Global_Tree_Density/webpage.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/MRLC_NLCD/datacatalog.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/MRLC_NLCD/webpage.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld - 2026-01-20T16:54:56 - - - https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/TerraClimate/webpage.jsonld - 2026-01-20T16:54:56 - - \ No newline at end of file From 3825d1283db073df8f2693a824e1474541fee55f Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Tue, 10 Feb 2026 16:32:19 -0600 Subject: [PATCH 33/58] fix validation all file github action --- .../validate_with_dataset_schema.yaml | 45 ++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/.github/workflows/validate_with_dataset_schema.yaml b/.github/workflows/validate_with_dataset_schema.yaml index f1fbe46..309a9df 100644 --- a/.github/workflows/validate_with_dataset_schema.yaml +++ b/.github/workflows/validate_with_dataset_schema.yaml @@ -102,4 +102,47 @@ jobs: - name: Find and validate all JSON-LD files run: | - python scripts/validate_jsonld_batch.py data + python - << 'RELAXED_VALIDATE_ALL' + import json, sys + from pathlib import Path + dir_ = Path("data") + if not dir_.exists(): + print("Directory not found, skipping."); sys.exit(0) + files = list(dir_.rglob("*.jsonld")) + if not files: + print("No JSON-LD files found."); sys.exit(0) + errs = [] + for f in sorted(files): + try: + with open(f) as fp: data = json.load(fp) + except Exception as e: + errs.append(f"{f}: {e}"); continue + for k in ["@context", "@type", "name"]: + if k not in data: errs.append(f"{f}: missing {k}") + if "spatialCoverage" in data and isinstance(data["spatialCoverage"], dict): + geo = data["spatialCoverage"].get("geo", {}) + if isinstance(geo, dict) and "box" in geo and isinstance(geo["box"], str): + parts = geo["box"].strip().split() + if len(parts) == 4: + try: + a,b,c,d = float(parts[0]),float(parts[1]),float(parts[2]),float(parts[3]) + if (-90<=b<=90 and -90<=d<=90): west,south,east,north = a,b,c,d + else: south,west,north,east = a,b,c,d + if not (-90<=south<=90 and -90<=north<=90 and -180<=west<=180 and -180<=east<=180): + errs.append(f"{f}: box out of range") + except ValueError: errs.append(f"{f}: invalid box numbers") + elif len(parts) == 2: + try: + ws, en = parts[0].split(","), parts[1].split(",") + if len(ws)==2 and len(en)==2: + west,south = float(ws[0]),float(ws[1]) + east,north = float(en[0]),float(en[1]) + if not (-90<=south<=90 and -90<=north<=90 and -180<=west<=180 and -180<=east<=180): + errs.append(f"{f}: box out of range") + except ValueError: errs.append(f"{f}: invalid box format") + else: errs.append(f"{f}: box expected 2 or 4 numbers") + if errs: + for e in errs: print(e) + sys.exit(1) + print(f"All {len(files)} JSON-LD file(s) validated.") + RELAXED_VALIDATE_ALL From 483bb6c140521e5901c6de1683a5a5ee4880b4e0 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Tue, 10 Feb 2026 16:58:09 -0600 Subject: [PATCH 34/58] fix keyword to become json array --- .../chelsa_canaryclim_climatologies.jsonld | 2 +- .../CHELSA/chelsa_cerra_daily.jsonld | 2 +- .../chelsa_ch_highres_climatologies.jsonld | 2 +- .../CHELSA/chelsa_ch_highres_daily.jsonld | 2 +- .../generated/CHELSA/datacatalog.jsonld | 50 +++++-- .../summoned/generated/CHELSA/webpage.jsonld | 2 +- .../consensus-land-cover.jsonld | 140 ++++++++++++++---- .../Consensus_Land_Cover/webpage.jsonld | 17 ++- .../objects/summoned/generated/GFC/gfc.jsonld | 113 +++++++++++--- .../summoned/generated/GFC/webpage.jsonld | 2 +- .../global-tree-density.jsonld | 112 +++++++++++--- .../Global_Tree_Density/webpage.jsonld | 2 +- .../generated/MERIT_DEM/merit-dem.jsonld | 67 +++++++-- .../generated/MERIT_DEM/webpage.jsonld | 2 +- .../generated/MRLC_NLCD/annual-nlcd.jsonld | 47 ++++-- .../generated/MRLC_NLCD/datacatalog.jsonld | 42 ++++-- .../MRLC_NLCD/exotic-annual-grass.jsonld | 47 ++++-- .../generated/MRLC_NLCD/legacy-nlcd.jsonld | 37 ++++- .../generated/MRLC_NLCD/nalcms.jsonld | 44 ++++-- .../summoned/generated/MRLC_NLCD/rcmap.jsonld | 47 ++++-- .../generated/MRLC_NLCD/webpage.jsonld | 52 +++++-- .../TerraClimate/terraclimate.jsonld | 57 +++++-- .../generated/TerraClimate/webpage.jsonld | 2 +- 23 files changed, 703 insertions(+), 187 deletions(-) diff --git a/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld index e94c23a..abe30bb 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld @@ -68,4 +68,4 @@ "name": "Downscaled regional climate" } ] -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld index d143b2e..6fdbae6 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld @@ -58,4 +58,4 @@ "name": "Daily air temperature" } ] -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld index 9f9bfa6..abb6f74 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld @@ -78,4 +78,4 @@ "name": "Switzerland climate normals" } ] -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld index 19627d8..0c4e874 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld @@ -78,4 +78,4 @@ "name": "Mountain climatology" } ] -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/CHELSA/datacatalog.jsonld b/data/objects/summoned/generated/CHELSA/datacatalog.jsonld index 45e1d95..5a8d1cb 100644 --- a/data/objects/summoned/generated/CHELSA/datacatalog.jsonld +++ b/data/objects/summoned/generated/CHELSA/datacatalog.jsonld @@ -24,17 +24,41 @@ "drought indices" ], "dataset": [ - { "@id": "https://www.chelsa-climate.org/datasets/chelsa_daily#dataset" }, - { "@id": "https://www.chelsa-climate.org/datasets/chelsa_monthly#dataset" }, - { "@id": "https://www.chelsa-climate.org/datasets/chelsa_annual#dataset" }, - { "@id": "https://www.chelsa-climate.org/datasets/chelsa_climatologies#dataset" }, - { "@id": "https://www.chelsa-climate.org/datasets/chelsa_bioclim#dataset" }, - { "@id": "https://www.chelsa-climate.org/datasets/chelsach-highres_daily#dataset" }, - { "@id": "https://www.chelsa-climate.org/datasets/chelsach-highres-climatologies#dataset" }, - { "@id": "https://www.chelsa-climate.org/datasets/canary-clim-canaries#dataset" }, - { "@id": "https://www.chelsa-climate.org/datasets/chelsa-trace21k-centennial#dataset" }, - { "@id": "https://www.chelsa-climate.org/datasets/chelsa-trace21k-centennial-bioclim#dataset" }, - { "@id": "https://www.chelsa-climate.org/datasets/chelsaw5e5#dataset" }, - { "@id": "https://www.chelsa-climate.org/datasets#chelsacerra-daily-dataset" } + { + "@id": "https://www.chelsa-climate.org/datasets/chelsa_daily#dataset" + }, + { + "@id": "https://www.chelsa-climate.org/datasets/chelsa_monthly#dataset" + }, + { + "@id": "https://www.chelsa-climate.org/datasets/chelsa_annual#dataset" + }, + { + "@id": "https://www.chelsa-climate.org/datasets/chelsa_climatologies#dataset" + }, + { + "@id": "https://www.chelsa-climate.org/datasets/chelsa_bioclim#dataset" + }, + { + "@id": "https://www.chelsa-climate.org/datasets/chelsach-highres_daily#dataset" + }, + { + "@id": "https://www.chelsa-climate.org/datasets/chelsach-highres-climatologies#dataset" + }, + { + "@id": "https://www.chelsa-climate.org/datasets/canary-clim-canaries#dataset" + }, + { + "@id": "https://www.chelsa-climate.org/datasets/chelsa-trace21k-centennial#dataset" + }, + { + "@id": "https://www.chelsa-climate.org/datasets/chelsa-trace21k-centennial-bioclim#dataset" + }, + { + "@id": "https://www.chelsa-climate.org/datasets/chelsaw5e5#dataset" + }, + { + "@id": "https://www.chelsa-climate.org/datasets#chelsacerra-daily-dataset" + } ] -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/CHELSA/webpage.jsonld b/data/objects/summoned/generated/CHELSA/webpage.jsonld index a4e1fc7..75b9c14 100644 --- a/data/objects/summoned/generated/CHELSA/webpage.jsonld +++ b/data/objects/summoned/generated/CHELSA/webpage.jsonld @@ -96,4 +96,4 @@ } ] } -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld b/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld index cf370d4..c4aada4 100644 --- a/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld +++ b/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld @@ -6,8 +6,14 @@ "description": "A global 1-km (30 arc-second) consensus land-cover product for biodiversity and ecosystem modelling. The dataset integrates multiple global remote sensing-derived land-cover products and provides consensus prevalence (0–100%) for 12 land-cover classes at each grid cell. Two Version 1.0 variants are distributed: (1) Full v1.0 (with DISCover/GLCC), integrating GlobCover (2005–2006; v2.2), MODIS land-cover (MCD12Q1; v051), GLC2000 (v1.1), and DISCover/GLCC (v2; based on older imagery 1992–1993); and (2) Reduced v1.0 (without DISCover), integrating GlobCover, MODIS, and GLC2000 only, offered as an alternative for areas with substantial land-cover change in recent decades. Each variant contains 12 GeoTIFF layers (one per class). Values are unsigned 8-bit integers representing percent prevalence (0–100).", "url": "https://www.earthenv.org/landcover", "creator": [ - { "@type": "Person", "name": "Mao-Ning Tuanmu" }, - { "@type": "Person", "name": "Walter Jetz" } + { + "@type": "Person", + "name": "Mao-Ning Tuanmu" + }, + { + "@type": "Person", + "name": "Walter Jetz" + } ], "publisher": { "@type": "Organization", @@ -15,9 +21,18 @@ "name": "EarthEnv", "url": "https://www.earthenv.org/", "member": [ - { "@type": "Organization", "name": "Yale University" }, - { "@type": "Organization", "name": "University of Florida" }, - { "@type": "Organization", "name": "University at Buffalo" } + { + "@type": "Organization", + "name": "Yale University" + }, + { + "@type": "Organization", + "name": "University of Florida" + }, + { + "@type": "Organization", + "name": "University at Buffalo" + } ] }, "keywords": [ @@ -50,18 +65,66 @@ "image/tiff; application=geotiff" ], "variableMeasured": [ - { "@type": "PropertyValue", "name": "Evergreen/Deciduous Needleleaf Trees", "description": "Consensus prevalence (0–100%)" }, - { "@type": "PropertyValue", "name": "Evergreen Broadleaf Trees", "description": "Consensus prevalence (0–100%)" }, - { "@type": "PropertyValue", "name": "Deciduous Broadleaf Trees", "description": "Consensus prevalence (0–100%)" }, - { "@type": "PropertyValue", "name": "Mixed/Other Trees", "description": "Consensus prevalence (0–100%)" }, - { "@type": "PropertyValue", "name": "Shrubs", "description": "Consensus prevalence (0–100%)" }, - { "@type": "PropertyValue", "name": "Herbaceous Vegetation", "description": "Consensus prevalence (0–100%)" }, - { "@type": "PropertyValue", "name": "Cultivated and Managed Vegetation", "description": "Consensus prevalence (0–100%)" }, - { "@type": "PropertyValue", "name": "Regularly Flooded Vegetation", "description": "Consensus prevalence (0–100%)" }, - { "@type": "PropertyValue", "name": "Urban/Built-up", "description": "Consensus prevalence (0–100%)" }, - { "@type": "PropertyValue", "name": "Snow/Ice", "description": "Consensus prevalence (0–100%)" }, - { "@type": "PropertyValue", "name": "Barren", "description": "Consensus prevalence (0–100%)" }, - { "@type": "PropertyValue", "name": "Open Water", "description": "Consensus prevalence (0–100%)" } + { + "@type": "PropertyValue", + "name": "Evergreen/Deciduous Needleleaf Trees", + "description": "Consensus prevalence (0–100%)" + }, + { + "@type": "PropertyValue", + "name": "Evergreen Broadleaf Trees", + "description": "Consensus prevalence (0–100%)" + }, + { + "@type": "PropertyValue", + "name": "Deciduous Broadleaf Trees", + "description": "Consensus prevalence (0–100%)" + }, + { + "@type": "PropertyValue", + "name": "Mixed/Other Trees", + "description": "Consensus prevalence (0–100%)" + }, + { + "@type": "PropertyValue", + "name": "Shrubs", + "description": "Consensus prevalence (0–100%)" + }, + { + "@type": "PropertyValue", + "name": "Herbaceous Vegetation", + "description": "Consensus prevalence (0–100%)" + }, + { + "@type": "PropertyValue", + "name": "Cultivated and Managed Vegetation", + "description": "Consensus prevalence (0–100%)" + }, + { + "@type": "PropertyValue", + "name": "Regularly Flooded Vegetation", + "description": "Consensus prevalence (0–100%)" + }, + { + "@type": "PropertyValue", + "name": "Urban/Built-up", + "description": "Consensus prevalence (0–100%)" + }, + { + "@type": "PropertyValue", + "name": "Snow/Ice", + "description": "Consensus prevalence (0–100%)" + }, + { + "@type": "PropertyValue", + "name": "Barren", + "description": "Consensus prevalence (0–100%)" + }, + { + "@type": "PropertyValue", + "name": "Open Water", + "description": "Consensus prevalence (0–100%)" + } ], "measurementTechnique": "Integration of multiple global remote sensing-derived land-cover products to estimate per-class consensus prevalence (percent) at 1-km resolution.", "distribution": [ @@ -99,21 +162,46 @@ "@type": "ScholarlyArticle", "name": "A global 1-km consensus land-cover product for biodiversity and ecosystem modeling", "author": [ - { "@type": "Person", "name": "Mao-Ning Tuanmu" }, - { "@type": "Person", "name": "Walter Jetz" } + { + "@type": "Person", + "name": "Mao-Ning Tuanmu" + }, + { + "@type": "Person", + "name": "Walter Jetz" + } ], "datePublished": "2014", - "isPartOf": { "@type": "Periodical", "name": "Global Ecology and Biogeography" }, + "isPartOf": { + "@type": "Periodical", + "name": "Global Ecology and Biogeography" + }, "identifier": [ - { "@type": "PropertyValue", "propertyID": "doi", "value": "10.1111/geb.12182" } + { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.1111/geb.12182" + } ], "sameAs": "https://doi.org/10.1111/geb.12182" } ], "about": [ - { "@type": "Thing", "name": "Land cover" }, - { "@type": "Thing", "name": "Remote sensing" }, - { "@type": "Thing", "name": "Biodiversity modelling" }, - { "@type": "Thing", "name": "Ecosystem modelling" } + { + "@type": "Thing", + "name": "Land cover" + }, + { + "@type": "Thing", + "name": "Remote sensing" + }, + { + "@type": "Thing", + "name": "Biodiversity modelling" + }, + { + "@type": "Thing", + "name": "Ecosystem modelling" + } ] -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/Consensus_Land_Cover/webpage.jsonld b/data/objects/summoned/generated/Consensus_Land_Cover/webpage.jsonld index 19fdcc9..9deaae5 100644 --- a/data/objects/summoned/generated/Consensus_Land_Cover/webpage.jsonld +++ b/data/objects/summoned/generated/Consensus_Land_Cover/webpage.jsonld @@ -17,9 +17,18 @@ "name": "EarthEnv", "url": "https://www.earthenv.org/", "member": [ - { "@type": "Organization", "name": "Yale University" }, - { "@type": "Organization", "name": "University of Florida" }, - { "@type": "Organization", "name": "University at Buffalo" } + { + "@type": "Organization", + "name": "Yale University" + }, + { + "@type": "Organization", + "name": "University of Florida" + }, + { + "@type": "Organization", + "name": "University at Buffalo" + } ] }, "about": { @@ -67,4 +76,4 @@ } ] } -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/GFC/gfc.jsonld b/data/objects/summoned/generated/GFC/gfc.jsonld index ff0430e..2b0db69 100644 --- a/data/objects/summoned/generated/GFC/gfc.jsonld +++ b/data/objects/summoned/generated/GFC/gfc.jsonld @@ -7,21 +7,66 @@ "url": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/download.html", "version": "GFC-2023-v1.11", "creator": [ - { "@type": "Person", "name": "Matthew C. Hansen" }, - { "@type": "Person", "name": "Peter V. Potapov" }, - { "@type": "Person", "name": "Rebecca Moore" }, - { "@type": "Person", "name": "Matt Hancher" }, - { "@type": "Person", "name": "Svetlana A. Turubanova" }, - { "@type": "Person", "name": "Alexandra Tyukavina" }, - { "@type": "Person", "name": "David Thau" }, - { "@type": "Person", "name": "Stephen V. Stehman" }, - { "@type": "Person", "name": "Scott J. Goetz" }, - { "@type": "Person", "name": "Thomas R. Loveland" }, - { "@type": "Person", "name": "Arun Kommareddy" }, - { "@type": "Person", "name": "Andrey Egorov" }, - { "@type": "Person", "name": "Lydia Chini" }, - { "@type": "Person", "name": "Christopher O. Justice" }, - { "@type": "Person", "name": "John R. G. Townshend" } + { + "@type": "Person", + "name": "Matthew C. Hansen" + }, + { + "@type": "Person", + "name": "Peter V. Potapov" + }, + { + "@type": "Person", + "name": "Rebecca Moore" + }, + { + "@type": "Person", + "name": "Matt Hancher" + }, + { + "@type": "Person", + "name": "Svetlana A. Turubanova" + }, + { + "@type": "Person", + "name": "Alexandra Tyukavina" + }, + { + "@type": "Person", + "name": "David Thau" + }, + { + "@type": "Person", + "name": "Stephen V. Stehman" + }, + { + "@type": "Person", + "name": "Scott J. Goetz" + }, + { + "@type": "Person", + "name": "Thomas R. Loveland" + }, + { + "@type": "Person", + "name": "Arun Kommareddy" + }, + { + "@type": "Person", + "name": "Andrey Egorov" + }, + { + "@type": "Person", + "name": "Lydia Chini" + }, + { + "@type": "Person", + "name": "Christopher O. Justice" + }, + { + "@type": "Person", + "name": "John R. G. Townshend" + } ], "publisher": [ { @@ -213,19 +258,41 @@ { "@type": "ScholarlyArticle", "name": "High-Resolution Global Maps of 21st-Century Forest Cover Change", - "isPartOf": { "@type": "Periodical", "name": "Science" }, + "isPartOf": { + "@type": "Periodical", + "name": "Science" + }, "datePublished": "2013-11-15", "identifier": [ - { "@type": "PropertyValue", "propertyID": "doi", "value": "10.1126/science.1244693" } + { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.1126/science.1244693" + } ], "sameAs": "https://doi.org/10.1126/science.1244693" } ], "about": [ - { "@type": "Thing", "name": "Forest monitoring" }, - { "@type": "Thing", "name": "Deforestation" }, - { "@type": "Thing", "name": "Land cover change" }, - { "@type": "Thing", "name": "Remote sensing" }, - { "@type": "Thing", "name": "Landsat" } + { + "@type": "Thing", + "name": "Forest monitoring" + }, + { + "@type": "Thing", + "name": "Deforestation" + }, + { + "@type": "Thing", + "name": "Land cover change" + }, + { + "@type": "Thing", + "name": "Remote sensing" + }, + { + "@type": "Thing", + "name": "Landsat" + } ] -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/GFC/webpage.jsonld b/data/objects/summoned/generated/GFC/webpage.jsonld index 5b8d80f..05548f7 100644 --- a/data/objects/summoned/generated/GFC/webpage.jsonld +++ b/data/objects/summoned/generated/GFC/webpage.jsonld @@ -47,4 +47,4 @@ "@type": "Dataset", "@id": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/download.html#dataset" } -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld b/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld index 388a0a6..fd2b16b 100644 --- a/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld +++ b/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld @@ -6,12 +6,54 @@ "description": "Global tree density at a global scale provided as two spatially continuous raster maps of tree density. One map was generated using biome-level linear regression models and applied at the biome scale; the second map was generated using ecoregion-level linear regression models and applied at the ecoregion scale. The models were built using over 420,000 ground-sourced estimates of tree density and predictor variables spanning vegetative, climatic, topographic, and anthropogenic factors. The creators note that transitions at biome/ecoregion boundaries may appear abrupt and that estimates are generally more robust at country scale (or larger) than at individual pixel scale. The primary distribution is a zipped ArcGIS File Geodatabase package containing both raster models plus supporting ArcGIS layer and map-document files; additional files include a revision adding predictions for small islands and a WGS84 GeoTIFF derivative for the revision.", "url": "https://elischolar.library.yale.edu/yale_fes_data/1/", "creator": [ - { "@type": "Person", "name": "T. W. Crowther", "affiliation": { "@type": "Organization", "name": "Yale University" } }, - { "@type": "Person", "name": "H. B. Glick", "affiliation": { "@type": "Organization", "name": "Yale University" } }, - { "@type": "Person", "name": "K. R. Covey", "affiliation": { "@type": "Organization", "name": "Yale University" } }, - { "@type": "Person", "name": "G. Amatulli", "affiliation": { "@type": "Organization", "name": "Yale University" } }, - { "@type": "Person", "name": "M.-N. Tuanmu", "affiliation": { "@type": "Organization", "name": "Yale University" } }, - { "@type": "Person", "name": "W. Jetz", "affiliation": { "@type": "Organization", "name": "Yale University" } } + { + "@type": "Person", + "name": "T. W. Crowther", + "affiliation": { + "@type": "Organization", + "name": "Yale University" + } + }, + { + "@type": "Person", + "name": "H. B. Glick", + "affiliation": { + "@type": "Organization", + "name": "Yale University" + } + }, + { + "@type": "Person", + "name": "K. R. Covey", + "affiliation": { + "@type": "Organization", + "name": "Yale University" + } + }, + { + "@type": "Person", + "name": "G. Amatulli", + "affiliation": { + "@type": "Organization", + "name": "Yale University" + } + }, + { + "@type": "Person", + "name": "M.-N. Tuanmu", + "affiliation": { + "@type": "Organization", + "name": "Yale University" + } + }, + { + "@type": "Person", + "name": "W. Jetz", + "affiliation": { + "@type": "Organization", + "name": "Yale University" + } + } ], "publisher": { "@type": "Organization", @@ -109,24 +151,58 @@ "@type": "ScholarlyArticle", "name": "Mapping tree density at a global scale", "author": [ - { "@type": "Person", "name": "T. W. Crowther" }, - { "@type": "Person", "name": "H. B. Glick" }, - { "@type": "Person", "name": "K. R. Covey" }, - { "@type": "Person", "name": "et al." } + { + "@type": "Person", + "name": "T. W. Crowther" + }, + { + "@type": "Person", + "name": "H. B. Glick" + }, + { + "@type": "Person", + "name": "K. R. Covey" + }, + { + "@type": "Person", + "name": "et al." + } ], - "isPartOf": { "@type": "Periodical", "name": "Nature" }, + "isPartOf": { + "@type": "Periodical", + "name": "Nature" + }, "datePublished": "2015-09-10", "identifier": [ - { "@type": "PropertyValue", "propertyID": "doi", "value": "10.1038/nature14967" } + { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.1038/nature14967" + } ], "sameAs": "https://doi.org/10.1038/nature14967" } ], "about": [ - { "@type": "Thing", "name": "Tree density" }, - { "@type": "Thing", "name": "Forests" }, - { "@type": "Thing", "name": "Land cover" }, - { "@type": "Thing", "name": "Biodiversity" }, - { "@type": "Thing", "name": "Global environmental mapping" } + { + "@type": "Thing", + "name": "Tree density" + }, + { + "@type": "Thing", + "name": "Forests" + }, + { + "@type": "Thing", + "name": "Land cover" + }, + { + "@type": "Thing", + "name": "Biodiversity" + }, + { + "@type": "Thing", + "name": "Global environmental mapping" + } ] -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/Global_Tree_Density/webpage.jsonld b/data/objects/summoned/generated/Global_Tree_Density/webpage.jsonld index f28fa06..ed12e05 100644 --- a/data/objects/summoned/generated/Global_Tree_Density/webpage.jsonld +++ b/data/objects/summoned/generated/Global_Tree_Density/webpage.jsonld @@ -46,4 +46,4 @@ "@type": "Dataset", "@id": "https://elischolar.library.yale.edu/yale_fes_data/1/#dataset" } -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld b/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld index 07e4627..9162164 100644 --- a/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld +++ b/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld @@ -119,15 +119,42 @@ "@type": "ScholarlyArticle", "name": "A high accuracy map of global terrain elevations", "author": [ - { "@type": "Person", "name": "D. Yamazaki" }, - { "@type": "Person", "name": "D. Ikeshima" }, - { "@type": "Person", "name": "R. Tawatari" }, - { "@type": "Person", "name": "T. Yamaguchi" }, - { "@type": "Person", "name": "F. O'Loughlin" }, - { "@type": "Person", "name": "J. C. Neal" }, - { "@type": "Person", "name": "C. C. Sampson" }, - { "@type": "Person", "name": "S. Kanae" }, - { "@type": "Person", "name": "P. D. Bates" } + { + "@type": "Person", + "name": "D. Yamazaki" + }, + { + "@type": "Person", + "name": "D. Ikeshima" + }, + { + "@type": "Person", + "name": "R. Tawatari" + }, + { + "@type": "Person", + "name": "T. Yamaguchi" + }, + { + "@type": "Person", + "name": "F. O'Loughlin" + }, + { + "@type": "Person", + "name": "J. C. Neal" + }, + { + "@type": "Person", + "name": "C. C. Sampson" + }, + { + "@type": "Person", + "name": "S. Kanae" + }, + { + "@type": "Person", + "name": "P. D. Bates" + } ], "isPartOf": { "@type": "Periodical", @@ -147,9 +174,21 @@ } ], "about": [ - { "@type": "Thing", "name": "Terrain elevation" }, - { "@type": "Thing", "name": "Topography" }, - { "@type": "Thing", "name": "Hydrology" }, - { "@type": "Thing", "name": "Geoscience applications" } + { + "@type": "Thing", + "name": "Terrain elevation" + }, + { + "@type": "Thing", + "name": "Topography" + }, + { + "@type": "Thing", + "name": "Hydrology" + }, + { + "@type": "Thing", + "name": "Geoscience applications" + } ] -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld b/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld index c56d33e..3f5fb3a 100644 --- a/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld +++ b/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld @@ -67,4 +67,4 @@ "SRTM", "AW3D" ] -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld b/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld index e6772a5..9f3f95f 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld @@ -41,12 +41,30 @@ "remote sensing" ], "variableMeasured": [ - { "@type": "PropertyValue", "name": "Land Cover" }, - { "@type": "PropertyValue", "name": "Land Cover Change" }, - { "@type": "PropertyValue", "name": "Land Cover Confidence" }, - { "@type": "PropertyValue", "name": "Fractional Impervious Surface" }, - { "@type": "PropertyValue", "name": "Impervious Descriptor" }, - { "@type": "PropertyValue", "name": "Spectral Change Day of Year" } + { + "@type": "PropertyValue", + "name": "Land Cover" + }, + { + "@type": "PropertyValue", + "name": "Land Cover Change" + }, + { + "@type": "PropertyValue", + "name": "Land Cover Confidence" + }, + { + "@type": "PropertyValue", + "name": "Fractional Impervious Surface" + }, + { + "@type": "PropertyValue", + "name": "Impervious Descriptor" + }, + { + "@type": "PropertyValue", + "name": "Spectral Change Day of Year" + } ], "measurementTechnique": [ "Landsat remote sensing", @@ -84,8 +102,17 @@ "@id": "https://www.mrlc.gov/data#catalog" }, "about": [ - { "@type": "Thing", "name": "Land cover" }, - { "@type": "Thing", "name": "Land use" }, - { "@type": "Thing", "name": "Land change" } + { + "@type": "Thing", + "name": "Land cover" + }, + { + "@type": "Thing", + "name": "Land use" + }, + { + "@type": "Thing", + "name": "Land change" + } ] -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/MRLC_NLCD/datacatalog.jsonld b/data/objects/summoned/generated/MRLC_NLCD/datacatalog.jsonld index 996f1a1..6d35eec 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/datacatalog.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/datacatalog.jsonld @@ -32,17 +32,39 @@ "Landsat" ], "about": [ - { "@type": "Thing", "name": "Land cover" }, - { "@type": "Thing", "name": "Land use" }, - { "@type": "Thing", "name": "Land change" }, - { "@type": "Thing", "name": "Remote sensing" } + { + "@type": "Thing", + "name": "Land cover" + }, + { + "@type": "Thing", + "name": "Land use" + }, + { + "@type": "Thing", + "name": "Land change" + }, + { + "@type": "Thing", + "name": "Remote sensing" + } ], "dataset": [ - { "@id": "https://www.mrlc.gov/data/project/annual-nlcd#dataset" }, - { "@id": "https://www.mrlc.gov/data/project/rcmap#dataset" }, - { "@id": "https://www.mrlc.gov/data/type/exotic-annual-grass#dataset" }, - { "@id": "https://data.usgs.gov/datacatalog/data/USGS:74edd739-1584-41c3-bf08-0274681a779b#dataset" }, - { "@id": "https://www.mrlc.gov/data/type/legacy-nlcd#dataset" } + { + "@id": "https://www.mrlc.gov/data/project/annual-nlcd#dataset" + }, + { + "@id": "https://www.mrlc.gov/data/project/rcmap#dataset" + }, + { + "@id": "https://www.mrlc.gov/data/type/exotic-annual-grass#dataset" + }, + { + "@id": "https://data.usgs.gov/datacatalog/data/USGS:74edd739-1584-41c3-bf08-0274681a779b#dataset" + }, + { + "@id": "https://www.mrlc.gov/data/type/legacy-nlcd#dataset" + } ], "distribution": [ { @@ -64,4 +86,4 @@ "description": "Interactive tools for visualization, subsetting, and analysis (MRLC Viewer, Rangeland Viewer, EVA Tool)." } ] -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld b/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld index b78b992..1c8826f 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld @@ -40,12 +40,30 @@ "machine learning" ], "variableMeasured": [ - { "@type": "PropertyValue", "name": "EAG fractional cover (multiple species group)" }, - { "@type": "PropertyValue", "name": "Cheatgrass (Bromus tectorum) fractional cover" }, - { "@type": "PropertyValue", "name": "Other Bromus spp. fractional cover (Field brome + Japanese brome)" }, - { "@type": "PropertyValue", "name": "Medusahead (Taeniatherum caput-medusae) fractional cover" }, - { "@type": "PropertyValue", "name": "Sandberg bluegrass (Poa secunda) fractional cover" }, - { "@type": "PropertyValue", "name": "Confidence maps (per target)" } + { + "@type": "PropertyValue", + "name": "EAG fractional cover (multiple species group)" + }, + { + "@type": "PropertyValue", + "name": "Cheatgrass (Bromus tectorum) fractional cover" + }, + { + "@type": "PropertyValue", + "name": "Other Bromus spp. fractional cover (Field brome + Japanese brome)" + }, + { + "@type": "PropertyValue", + "name": "Medusahead (Taeniatherum caput-medusae) fractional cover" + }, + { + "@type": "PropertyValue", + "name": "Sandberg bluegrass (Poa secunda) fractional cover" + }, + { + "@type": "PropertyValue", + "name": "Confidence maps (per target)" + } ], "measurementTechnique": [ "Remote sensing (HLS NDVI and related drivers)", @@ -76,8 +94,17 @@ "@id": "https://www.mrlc.gov/data#catalog" }, "about": [ - { "@type": "Thing", "name": "Invasive species monitoring" }, - { "@type": "Thing", "name": "Rangeland ecology" }, - { "@type": "Thing", "name": "Remote sensing" } + { + "@type": "Thing", + "name": "Invasive species monitoring" + }, + { + "@type": "Thing", + "name": "Rangeland ecology" + }, + { + "@type": "Thing", + "name": "Remote sensing" + } ] -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld b/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld index b78fbaa..095b136 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld @@ -39,10 +39,22 @@ "Landsat" ], "variableMeasured": [ - { "@type": "PropertyValue", "name": "Land cover class (16-class legend)" }, - { "@type": "PropertyValue", "name": "Land cover change (varies by product)" }, - { "@type": "PropertyValue", "name": "Percent impervious surface (selected releases)" }, - { "@type": "PropertyValue", "name": "Tree canopy cover (selected releases)" } + { + "@type": "PropertyValue", + "name": "Land cover class (16-class legend)" + }, + { + "@type": "PropertyValue", + "name": "Land cover change (varies by product)" + }, + { + "@type": "PropertyValue", + "name": "Percent impervious surface (selected releases)" + }, + { + "@type": "PropertyValue", + "name": "Tree canopy cover (selected releases)" + } ], "measurementTechnique": [ "Landsat remote sensing", @@ -75,8 +87,17 @@ "@id": "https://www.mrlc.gov/data#catalog" }, "about": [ - { "@type": "Thing", "name": "Land cover" }, - { "@type": "Thing", "name": "Land change" }, - { "@type": "Thing", "name": "Remote sensing" } + { + "@type": "Thing", + "name": "Land cover" + }, + { + "@type": "Thing", + "name": "Land change" + }, + { + "@type": "Thing", + "name": "Remote sensing" + } ] -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld b/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld index 1edc52d..15ff764 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld @@ -6,11 +6,26 @@ "description": "NALCMS provides harmonized, trinational land cover maps for North America by combining national land cover products from Canada, the United States, and Mexico into a consistent continental mosaic. A recent 30-meter North American land cover dataset reflects land cover information for 2020 from Mexico and Canada, 2019 over the conterminous United States, and 2021 over Alaska, using Landsat 8 imagery inputs and country-specific classification methods integrated into a seamless product.", "url": "https://data.usgs.gov/datacatalog/data/USGS%3A74edd739-1584-41c3-bf08-0274681a779b", "creator": [ - { "@type": "Organization", "name": "U.S. Geological Survey (USGS)" }, - { "@type": "Organization", "name": "Natural Resources Canada" }, - { "@type": "Organization", "name": "Instituto Nacional de Estadística y Geografía (INEGI)" }, - { "@type": "Organization", "name": "Comisión Nacional Para el Conocimiento y Uso de la Biodiversidad (CONABIO)" }, - { "@type": "Organization", "name": "Comisión Nacional Forestal (CONAFOR)" } + { + "@type": "Organization", + "name": "U.S. Geological Survey (USGS)" + }, + { + "@type": "Organization", + "name": "Natural Resources Canada" + }, + { + "@type": "Organization", + "name": "Instituto Nacional de Estadística y Geografía (INEGI)" + }, + { + "@type": "Organization", + "name": "Comisión Nacional Para el Conocimiento y Uso de la Biodiversidad (CONABIO)" + }, + { + "@type": "Organization", + "name": "Comisión Nacional Forestal (CONAFOR)" + } ], "publisher": { "@type": "Organization", @@ -39,7 +54,12 @@ "Landsat 8", "continental mosaic" ], - "variableMeasured": [{ "@type": "PropertyValue", "name": "Land cover class" }], + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "Land cover class" + } + ], "measurementTechnique": [ "Landsat 8 remote sensing", "National land cover classification and continental integration/mosaicking" @@ -62,7 +82,13 @@ "@id": "https://www.mrlc.gov/data#catalog" }, "about": [ - { "@type": "Thing", "name": "Land cover" }, - { "@type": "Thing", "name": "Continental land monitoring" } + { + "@type": "Thing", + "name": "Land cover" + }, + { + "@type": "Thing", + "name": "Continental land monitoring" + } ] -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld b/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld index 4e99862..e9d86cc 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld @@ -40,12 +40,30 @@ "remote sensing" ], "variableMeasured": [ - { "@type": "PropertyValue", "name": "Bare ground cover (percent)" }, - { "@type": "PropertyValue", "name": "Herbaceous cover (percent)" }, - { "@type": "PropertyValue", "name": "Litter cover (percent)" }, - { "@type": "PropertyValue", "name": "Shrub cover (percent)" }, - { "@type": "PropertyValue", "name": "Sagebrush cover (percent)" }, - { "@type": "PropertyValue", "name": "Tree cover (percent)" } + { + "@type": "PropertyValue", + "name": "Bare ground cover (percent)" + }, + { + "@type": "PropertyValue", + "name": "Herbaceous cover (percent)" + }, + { + "@type": "PropertyValue", + "name": "Litter cover (percent)" + }, + { + "@type": "PropertyValue", + "name": "Shrub cover (percent)" + }, + { + "@type": "PropertyValue", + "name": "Sagebrush cover (percent)" + }, + { + "@type": "PropertyValue", + "name": "Tree cover (percent)" + } ], "measurementTechnique": [ "Landsat remote sensing", @@ -97,8 +115,17 @@ "@id": "https://www.mrlc.gov/data#catalog" }, "about": [ - { "@type": "Thing", "name": "Rangeland monitoring" }, - { "@type": "Thing", "name": "Vegetation cover" }, - { "@type": "Thing", "name": "Remote sensing" } + { + "@type": "Thing", + "name": "Rangeland monitoring" + }, + { + "@type": "Thing", + "name": "Vegetation cover" + }, + { + "@type": "Thing", + "name": "Remote sensing" + } ] -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/MRLC_NLCD/webpage.jsonld b/data/objects/summoned/generated/MRLC_NLCD/webpage.jsonld index 4755f59..db39d94 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/webpage.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/webpage.jsonld @@ -19,16 +19,46 @@ "url": "https://www.mrlc.gov/", "description": "A consortium of U.S. federal agencies producing land cover and land change data products.", "member": [ - { "@type": "Organization", "name": "U.S. Geological Survey (USGS)" }, - { "@type": "Organization", "name": "National Oceanic and Atmospheric Administration (NOAA)" }, - { "@type": "Organization", "name": "U.S. Forest Service (USFS)" }, - { "@type": "Organization", "name": "Bureau of Land Management (BLM)" }, - { "@type": "Organization", "name": "National Park Service (NPS)" }, - { "@type": "Organization", "name": "U.S. Department of Agriculture (USDA)" }, - { "@type": "Organization", "name": "U.S. Environmental Protection Agency (EPA)" }, - { "@type": "Organization", "name": "U.S. Fish and Wildlife Service (USFWS)" }, - { "@type": "Organization", "name": "National Agricultural Statistics Service (NASS)" }, - { "@type": "Organization", "name": "LANDFIRE" } + { + "@type": "Organization", + "name": "U.S. Geological Survey (USGS)" + }, + { + "@type": "Organization", + "name": "National Oceanic and Atmospheric Administration (NOAA)" + }, + { + "@type": "Organization", + "name": "U.S. Forest Service (USFS)" + }, + { + "@type": "Organization", + "name": "Bureau of Land Management (BLM)" + }, + { + "@type": "Organization", + "name": "National Park Service (NPS)" + }, + { + "@type": "Organization", + "name": "U.S. Department of Agriculture (USDA)" + }, + { + "@type": "Organization", + "name": "U.S. Environmental Protection Agency (EPA)" + }, + { + "@type": "Organization", + "name": "U.S. Fish and Wildlife Service (USFWS)" + }, + { + "@type": "Organization", + "name": "National Agricultural Statistics Service (NASS)" + }, + { + "@type": "Organization", + "name": "LANDFIRE" + } ] }, "keywords": [ @@ -52,4 +82,4 @@ "@type": "DataCatalog", "@id": "https://www.mrlc.gov/data#catalog" } -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld b/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld index b2945c7..e86ad07 100644 --- a/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld +++ b/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld @@ -207,10 +207,22 @@ "@type": "ScholarlyArticle", "name": "TerraClimate, a high-resolution global dataset of monthly climate and climatic water balance from 1958–2015", "author": [ - { "@type": "Person", "name": "J. T. Abatzoglou" }, - { "@type": "Person", "name": "S. Z. Dobrowski" }, - { "@type": "Person", "name": "S. A. Parks" }, - { "@type": "Person", "name": "K. C. Hegewisch" } + { + "@type": "Person", + "name": "J. T. Abatzoglou" + }, + { + "@type": "Person", + "name": "S. Z. Dobrowski" + }, + { + "@type": "Person", + "name": "S. A. Parks" + }, + { + "@type": "Person", + "name": "K. C. Hegewisch" + } ], "isPartOf": { "@type": "Periodical", @@ -228,12 +240,33 @@ } ], "about": [ - { "@type": "Thing", "name": "Climate" }, - { "@type": "Thing", "name": "Climatology" }, - { "@type": "Thing", "name": "Hydrology" }, - { "@type": "Thing", "name": "Drought" }, - { "@type": "Thing", "name": "Evapotranspiration" }, - { "@type": "Thing", "name": "Water balance" }, - { "@type": "Thing", "name": "Ecological and hydrological studies" } + { + "@type": "Thing", + "name": "Climate" + }, + { + "@type": "Thing", + "name": "Climatology" + }, + { + "@type": "Thing", + "name": "Hydrology" + }, + { + "@type": "Thing", + "name": "Drought" + }, + { + "@type": "Thing", + "name": "Evapotranspiration" + }, + { + "@type": "Thing", + "name": "Water balance" + }, + { + "@type": "Thing", + "name": "Ecological and hydrological studies" + } ] -} +} \ No newline at end of file diff --git a/data/objects/summoned/generated/TerraClimate/webpage.jsonld b/data/objects/summoned/generated/TerraClimate/webpage.jsonld index 684a99c..fac5572 100644 --- a/data/objects/summoned/generated/TerraClimate/webpage.jsonld +++ b/data/objects/summoned/generated/TerraClimate/webpage.jsonld @@ -58,4 +58,4 @@ } ] } -} +} \ No newline at end of file From ada05f6b99869851e3522ffa497d02e1142621bb Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Tue, 10 Feb 2026 17:06:27 -0600 Subject: [PATCH 35/58] Fixes encodningFormat as string array --- .../generated/CHELSA/chelsa_bioclim.jsonld | 10 ++++-- .../chelsa_canaryclim_climatologies.jsonld | 10 ++++-- .../CHELSA/chelsa_climatologies.jsonld | 10 ++++-- .../generated/CHELSA/chelsa_daily.jsonld | 10 ++++-- .../CHELSA/chelsa_drought_indices.jsonld | 10 ++++-- .../generated/CHELSA/chelsa_monthly.jsonld | 10 ++++-- .../CHELSA/chelsa_trace21k_centennial.jsonld | 10 ++++-- .../chelsa_trace21k_centennial_bioclim.jsonld | 10 ++++-- .../consensus-land-cover.jsonld | 13 ++++++-- .../objects/summoned/generated/GFC/gfc.jsonld | 33 +++++++++++++++---- .../global-tree-density.jsonld | 3 +- .../generated/MERIT_DEM/merit-dem.jsonld | 15 ++++++--- .../generated/MRLC_NLCD/annual-nlcd.jsonld | 3 +- .../MRLC_NLCD/exotic-annual-grass.jsonld | 3 +- .../generated/MRLC_NLCD/legacy-nlcd.jsonld | 3 +- .../generated/MRLC_NLCD/nalcms.jsonld | 3 +- .../summoned/generated/MRLC_NLCD/rcmap.jsonld | 3 +- .../TerraClimate/terraclimate.jsonld | 3 +- 18 files changed, 125 insertions(+), 37 deletions(-) diff --git a/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld index 398c123..9a8e486 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld @@ -54,7 +54,9 @@ ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", "encodingFormat": [ - "image/tiff; application=geotiff; profile=cloud-optimized", + "image/tiff", + "application=geotiff", + "profile=cloud-optimized", "COG" ], "distribution": [ @@ -63,7 +65,11 @@ "name": "CHELSA-bioclim downloads (COG)", "description": "Download portal for CHELSA bioclimatic variables.", "contentUrl": "https://envicloud.wsl.ch/", - "encodingFormat": "image/tiff; application=geotiff; profile=cloud-optimized" + "encodingFormat": [ + "image/tiff", + "application=geotiff", + "profile=cloud-optimized" + ] } ], "citation": "Brun, P., Zimmermann, N. E., Hari, C., Pellissier, L., & Karger, D. N. (2022). Global climate-related predictors at kilometer resolution for the past and future. Earth System Science Data, 14(12), 5573–5603. https://doi.org/10.5194/essd-14-5573-2022", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld index abe30bb..d4b1bfc 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld @@ -45,7 +45,9 @@ ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", "encodingFormat": [ - "image/tiff; application=geotiff; profile=cloud-optimized", + "image/tiff", + "application=geotiff", + "profile=cloud-optimized", "COG" ], "distribution": [ @@ -54,7 +56,11 @@ "name": "CHELSACanaryClim-climatologies downloads (COG)", "description": "Download portal for CanaryClim climatologies.", "contentUrl": "https://envicloud.wsl.ch/", - "encodingFormat": "image/tiff; application=geotiff; profile=cloud-optimized" + "encodingFormat": [ + "image/tiff", + "application=geotiff", + "profile=cloud-optimized" + ] } ], "citation": "Patiño, J., Collart, F., Vanderpoorten, A., Martin-Esquivel, J. L., Naranjo-Cigala, A., Mirolo, S., Karger, D. N. (2023). Spatial resolution impacts projected plant responses to climate change on topographically complex islands. Diversity and Distributions, 29(10), 1245–1262.", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld index c53356e..ff79010 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld @@ -74,7 +74,9 @@ ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", "encodingFormat": [ - "image/tiff; application=geotiff; profile=cloud-optimized", + "image/tiff", + "application=geotiff", + "profile=cloud-optimized", "COG" ], "distribution": [ @@ -83,7 +85,11 @@ "name": "CHELSA-climatologies downloads (COG)", "description": "Download portal for CHELSA climatologies.", "contentUrl": "https://envicloud.wsl.ch/", - "encodingFormat": "image/tiff; application=geotiff; profile=cloud-optimized" + "encodingFormat": [ + "image/tiff", + "application=geotiff", + "profile=cloud-optimized" + ] } ], "citation": "Karger, D. N.; Conrad, O.; Böhner, J.; Kawohl, T.; Kreft, H.; Soria-Auza, R. W.; Zimmermann, N. E.; Linder, H. P.; Kessler, M. (2017). Climatologies at high resolution for the earth's land surface areas. Scientific Data, 4, 170122. https://doi.org/10.1038/sdata.2017.122", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld index b1d7c63..4cdef23 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld @@ -96,7 +96,9 @@ ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", "encodingFormat": [ - "image/tiff; application=geotiff; profile=cloud-optimized", + "image/tiff", + "application=geotiff", + "profile=cloud-optimized", "COG" ], "distribution": [ @@ -105,7 +107,11 @@ "name": "CHELSA-daily downloads (COG)", "description": "Download portal for CHELSA-daily. Files are provided via the CHELSA download service; see dataset page for details and variable-specific subdirectories.", "contentUrl": "https://envicloud.wsl.ch/", - "encodingFormat": "image/tiff; application=geotiff; profile=cloud-optimized" + "encodingFormat": [ + "image/tiff", + "application=geotiff", + "profile=cloud-optimized" + ] }, { "@type": "DataDownload", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld index ca374c5..bf13d2f 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld @@ -54,7 +54,9 @@ ], "license": "https://creativecommons.org/licenses/by/4.0/", "encodingFormat": [ - "image/tiff; application=geotiff; profile=cloud-optimized", + "image/tiff", + "application=geotiff", + "profile=cloud-optimized", "COG" ], "distribution": [ @@ -63,7 +65,11 @@ "name": "CHELSA-drought-indices downloads (COG)", "description": "Download portal for CHELSA drought indices.", "contentUrl": "https://envicloud.wsl.ch/", - "encodingFormat": "image/tiff; application=geotiff; profile=cloud-optimized" + "encodingFormat": [ + "image/tiff", + "application=geotiff", + "profile=cloud-optimized" + ] } ], "citation": "Chen, L., Brun, P., Buri, P., Fatichi, S., Gessler, A., McCarthy, M. J., Pelicciotti, F., Stocker, B., Karger, D. N. (2024). High resolution global standardized drought indices. EnviDat. https://doi.org/10.16904/envidat.530", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld index 10e511e..df5b01c 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld @@ -59,7 +59,9 @@ ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", "encodingFormat": [ - "image/tiff; application=geotiff; profile=cloud-optimized", + "image/tiff", + "application=geotiff", + "profile=cloud-optimized", "COG" ], "distribution": [ @@ -68,7 +70,11 @@ "name": "CHELSA-monthly downloads (COG)", "description": "Download portal for CHELSA-monthly.", "contentUrl": "https://envicloud.wsl.ch/", - "encodingFormat": "image/tiff; application=geotiff; profile=cloud-optimized" + "encodingFormat": [ + "image/tiff", + "application=geotiff", + "profile=cloud-optimized" + ] }, { "@type": "DataDownload", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld index 0dc289d..1ba22e6 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld @@ -56,7 +56,9 @@ ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", "encodingFormat": [ - "image/tiff; application=geotiff; profile=cloud-optimized", + "image/tiff", + "application=geotiff", + "profile=cloud-optimized", "COG" ], "distribution": [ @@ -65,7 +67,11 @@ "name": "CHELSA-TraCE21k-centennial downloads (COG)", "description": "Download portal for TraCE21k centennial climatologies.", "contentUrl": "https://envicloud.wsl.ch/", - "encodingFormat": "image/tiff; application=geotiff; profile=cloud-optimized" + "encodingFormat": [ + "image/tiff", + "application=geotiff", + "profile=cloud-optimized" + ] } ], "citation": "Karger, D. N., Nobis, M. P., Normand, S., Graham, C. H., & Zimmermann, N. E. (2020). CHELSA-TraCE21k: Downscaled transient temperature and precipitation data since the last glacial maximum. EnviDat. https://doi.org/10.16904/envidat.211", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld index a92212a..91ed30e 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld @@ -45,7 +45,9 @@ ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", "encodingFormat": [ - "image/tiff; application=geotiff; profile=cloud-optimized", + "image/tiff", + "application=geotiff", + "profile=cloud-optimized", "COG" ], "distribution": [ @@ -54,7 +56,11 @@ "name": "CHELSA-TraCE21k-centennial-bioclim downloads (COG)", "description": "Download portal for TraCE21k centennial bioclim variables.", "contentUrl": "https://envicloud.wsl.ch/", - "encodingFormat": "image/tiff; application=geotiff; profile=cloud-optimized" + "encodingFormat": [ + "image/tiff", + "application=geotiff", + "profile=cloud-optimized" + ] } ], "citation": "Karger, D. N., Nobis, M. P., Normand, S., Graham, C. H., & Zimmermann, N. E. (2020). CHELSA-TraCE21k: Downscaled transient temperature and precipitation data since the last glacial maximum. EnviDat. https://doi.org/10.16904/envidat.211", diff --git a/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld b/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld index c4aada4..c9b1a25 100644 --- a/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld +++ b/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld @@ -62,7 +62,8 @@ "spatialResolution": "30 arc-second (~1 km at the equator)", "temporalCoverage": "1992-01-01/2006-12-31", "encodingFormat": [ - "image/tiff; application=geotiff" + "image/tiff", + "application=geotiff" ], "variableMeasured": [ { @@ -147,14 +148,20 @@ "name": "Example class GeoTIFF (Full v1.0): class 1", "description": "Example direct file link for a single land-cover class GeoTIFF in the Full v1.0 distribution.", "contentUrl": "https://data.earthenv.org/consensus_landcover/with_DISCover/consensus_full_class_1.tif", - "encodingFormat": "image/tiff; application=geotiff" + "encodingFormat": [ + "image/tiff", + "application=geotiff" + ] }, { "@type": "DataDownload", "name": "Example class GeoTIFF (Reduced v1.0): class 1", "description": "Example direct file link for a single land-cover class GeoTIFF in the Reduced v1.0 distribution.", "contentUrl": "https://data.earthenv.org/consensus_landcover/without_DISCover/Consensus_reduced_class_1.tif", - "encodingFormat": "image/tiff; application=geotiff" + "encodingFormat": [ + "image/tiff", + "application=geotiff" + ] } ], "citation": [ diff --git a/data/objects/summoned/generated/GFC/gfc.jsonld b/data/objects/summoned/generated/GFC/gfc.jsonld index 2b0db69..9236729 100644 --- a/data/objects/summoned/generated/GFC/gfc.jsonld +++ b/data/objects/summoned/generated/GFC/gfc.jsonld @@ -111,7 +111,8 @@ ], "license": "https://creativecommons.org/licenses/by/4.0/", "encodingFormat": [ - "image/tiff; application=geotiff", + "image/tiff", + "application=geotiff", "text/plain" ], "variableMeasured": [ @@ -170,37 +171,55 @@ "name": "Example tile (treecover2000) — 40N 080W", "description": "Example GeoTIFF granule for tree canopy cover in year 2000; filenames encode the tile top-left corner.", "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/Hansen_GFC-2023-v1.11_treecover2000_40N_080W.tif", - "encodingFormat": "image/tiff; application=geotiff" + "encodingFormat": [ + "image/tiff", + "application=geotiff" + ] }, { "@type": "DataDownload", "name": "Example tile (gain) — 40N 080W", "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/Hansen_GFC-2023-v1.11_gain_40N_080W.tif", - "encodingFormat": "image/tiff; application=geotiff" + "encodingFormat": [ + "image/tiff", + "application=geotiff" + ] }, { "@type": "DataDownload", "name": "Example tile (lossyear) — 40N 080W", "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/Hansen_GFC-2023-v1.11_lossyear_40N_080W.tif", - "encodingFormat": "image/tiff; application=geotiff" + "encodingFormat": [ + "image/tiff", + "application=geotiff" + ] }, { "@type": "DataDownload", "name": "Example tile (datamask) — 40N 080W", "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/Hansen_GFC-2023-v1.11_datamask_40N_080W.tif", - "encodingFormat": "image/tiff; application=geotiff" + "encodingFormat": [ + "image/tiff", + "application=geotiff" + ] }, { "@type": "DataDownload", "name": "Example tile (first composite) — 40N 080W", "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/Hansen_GFC-2023-v1.11_first_40N_080W.tif", - "encodingFormat": "image/tiff; application=geotiff" + "encodingFormat": [ + "image/tiff", + "application=geotiff" + ] }, { "@type": "DataDownload", "name": "Example tile (last composite) — 40N 080W", "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/Hansen_GFC-2023-v1.11_last_40N_080W.tif", - "encodingFormat": "image/tiff; application=geotiff" + "encodingFormat": [ + "image/tiff", + "application=geotiff" + ] }, { "@type": "DataDownload", diff --git a/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld b/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld index fd2b16b..f98fb5c 100644 --- a/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld +++ b/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld @@ -120,7 +120,8 @@ "encodingFormat": [ "application/zip", "application/x-esri-geodatabase", - "image/tiff; application=geotiff" + "image/tiff", + "application=geotiff" ], "version": "2015-09-02 release; includes biome-level and ecoregion-level models; additional files provide Revision_01 (small islands) and a WGS84 GeoTIFF derivative for Revision_01.", "distribution": [ diff --git a/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld b/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld index 9162164..bfce674 100644 --- a/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld +++ b/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld @@ -76,9 +76,13 @@ "Filtering and correction using multiple satellite and ancillary datasets" ], "encodingFormat": [ - "application/octet-stream (ESRI EHdr/FLT raster with HDR; 4-byte float, little endian)", - "image/tiff; application=geotiff", - "application/octet-stream (MRR, MapInfo Pro raster)" + "application/octet-stream (ESRI EHdr/FLT raster with HDR", + "4-byte float", + "little endian)", + "image/tiff", + "application=geotiff", + "application/octet-stream (MRR", + "MapInfo Pro raster)" ], "distribution": [ { @@ -93,7 +97,10 @@ "@type": "DataDownload", "name": "MERIT DEM – GeoTIFF packages (30°×30° tar)", "description": "Elevation tiles distributed as 30°×30° packages containing 5°×5° GeoTIFF tiles (6000×6000 pixels). Package names encode the lower-left corner (e.g., dem_tif_n30w120.tar contains tiles for N30–N60, W120–W090). Individual tile filenames encode the center of the lower-left pixel (e.g., n30w120_dem.tif covers N30–N35, W120–W115). Access may require registration/password per the provider instructions.", - "encodingFormat": "image/tiff; application=geotiff", + "encodingFormat": [ + "image/tiff", + "application=geotiff" + ], "contentUrl": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/dem_tif_n30w120.tar", "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" }, diff --git a/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld b/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld index 9f3f95f..ec2adb4 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld @@ -94,7 +94,8 @@ ], "encodingFormat": [ "application/zip", - "image/tiff; application=geotiff", + "image/tiff", + "application=geotiff", "application/octet-stream" ], "isPartOf": { diff --git a/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld b/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld index 1c8826f..b12d1df 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld @@ -86,7 +86,8 @@ } ], "encodingFormat": [ - "image/tiff; application=geotiff", + "image/tiff", + "application=geotiff", "application/zip" ], "isPartOf": { diff --git a/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld b/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld index 095b136..a5b28fe 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld @@ -79,7 +79,8 @@ } ], "encodingFormat": [ - "image/tiff; application=geotiff", + "image/tiff", + "application=geotiff", "application/zip" ], "isPartOf": { diff --git a/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld b/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld index 15ff764..404c617 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld @@ -74,7 +74,8 @@ } ], "encodingFormat": [ - "image/tiff; application=geotiff", + "image/tiff", + "application=geotiff", "application/zip" ], "isPartOf": { diff --git a/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld b/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld index e9d86cc..fada40e 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld @@ -106,7 +106,8 @@ } ], "encodingFormat": [ - "image/tiff; application=geotiff", + "image/tiff", + "application=geotiff", "application/zip", "application/xml" ], diff --git a/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld b/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld index e86ad07..0aed78c 100644 --- a/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld +++ b/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld @@ -94,7 +94,8 @@ "temporalResolution": "P1M", "encodingFormat": [ "application/x-netcdf", - "application/x-netcdf;version=4" + "application/x-netcdf", + "version=4" ], "variableMeasured": [ { From a1be3543298dad8b2e82dc9a2468328c714da249 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 16 Feb 2026 09:22:43 -0600 Subject: [PATCH 36/58] Added the entry that saying the document was created by AI --- .../generated/CHELSA/chelsa_bioclim.jsonld | 1 + .../chelsa_canaryclim_climatologies.jsonld | 1 + .../CHELSA/chelsa_cerra_daily.jsonld | 1 + .../chelsa_ch_highres_climatologies.jsonld | 1 + .../CHELSA/chelsa_ch_highres_daily.jsonld | 1 + .../CHELSA/chelsa_climatologies.jsonld | 1 + .../generated/CHELSA/chelsa_daily.jsonld | 1 + .../CHELSA/chelsa_drought_indices.jsonld | 1 + .../generated/CHELSA/chelsa_monthly.jsonld | 1 + .../CHELSA/chelsa_trace21k_centennial.jsonld | 1 + .../chelsa_trace21k_centennial_bioclim.jsonld | 1 + .../generated/CHELSA/chelsa_w5e5_daily.jsonld | 1 + .../generated/CHELSA/datacatalog.jsonld | 1 + .../summoned/generated/CHELSA/webpage.jsonld | 1 + .../consensus-land-cover.jsonld | 1 + .../Consensus_Land_Cover/webpage.jsonld | 1 + .../objects/summoned/generated/GFC/gfc.jsonld | 1 + .../summoned/generated/GFC/webpage.jsonld | 1 + .../generated/GPP_MOD17/gpp_mod17.jsonld | 90 +++++++++++++++++ .../summoned/generated/GPP_MOD17/prompt.txt | 98 +++++++++++++++++++ .../generated/GPP_MOD17/webpage.jsonld | 40 ++++++++ .../global-tree-density.jsonld | 1 + .../Global_Tree_Density/webpage.jsonld | 1 + .../generated/MERIT_DEM/merit-dem.jsonld | 1 + .../generated/MERIT_DEM/webpage.jsonld | 1 + .../generated/MRLC_NLCD/annual-nlcd.jsonld | 1 + .../generated/MRLC_NLCD/datacatalog.jsonld | 1 + .../MRLC_NLCD/exotic-annual-grass.jsonld | 1 + .../generated/MRLC_NLCD/legacy-nlcd.jsonld | 1 + .../generated/MRLC_NLCD/nalcms.jsonld | 1 + .../summoned/generated/MRLC_NLCD/rcmap.jsonld | 1 + .../generated/MRLC_NLCD/webpage.jsonld | 1 + .../TerraClimate/terraclimate.jsonld | 1 + .../generated/TerraClimate/webpage.jsonld | 1 + 34 files changed, 259 insertions(+) create mode 100644 data/objects/summoned/generated/GPP_MOD17/gpp_mod17.jsonld create mode 100644 data/objects/summoned/generated/GPP_MOD17/prompt.txt create mode 100644 data/objects/summoned/generated/GPP_MOD17/webpage.jsonld diff --git a/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld index 9a8e486..d4bf33e 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.chelsa-climate.org/datasets/chelsa_bioclim#dataset", "name": "CHELSA-bioclim (V2.1)", "description": "CHELSA-bioclim is a global, kilometer-scale climate dataset generated with the CHELSA downscaling model. It consists of bioclimatic variables and related predictors used in ecological and environmental analyses.", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld index d4b1bfc..04a6da8 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.chelsa-climate.org/datasets/canary-clim-canaries#dataset", "name": "CHELSACanaryClim-climatologies (Canary Islands) (V1.0)", "description": "CHELSACanaryClim-climatologies is a very-high-resolution climate dataset generated with the CHELSACanaryClim downscaling model. It consists of monthly climatologies of air temperature and precipitation for the Canary Islands.", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld index 6fdbae6..e3390bb 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.chelsa-climate.org/datasets#chelsacerra-daily-dataset", "name": "CHELSAcerra-daily (Europe) (V1.0)", "description": "CHELSAcerra-daily is a high-resolution climate dataset for air temperatures generated with the CHELSA downscaling model using the Copernicus European Regional ReAnalysis (CERRA) for Europe.", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld index abb6f74..9bb3d97 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.chelsa-climate.org/datasets/chelsach-highres-climatologies#dataset", "name": "CHELSAch-highres-climatologies (Switzerland) (V1.0)", "description": "CHELSAch-highres-climatologies is a high-resolution climate dataset generated with the CHELSA downscaling model for Switzerland. It consists of long-term 30-year mean aggregated surface variables.", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld index 0c4e874..6375014 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.chelsa-climate.org/datasets/chelsach-highres_daily#dataset", "name": "CHELSAch-highres-daily (Switzerland) (V1.0)", "description": "CHELSAch-highres-daily is a high-resolution climate dataset generated with the CHELSA downscaling model for Switzerland. It consists of daily surface variables.", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld index ff79010..3671eb8 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.chelsa-climate.org/datasets/chelsa_climatologies#dataset", "name": "CHELSA-climatologies (V2.1)", "description": "CHELSA-climatologies is a global, kilometer-scale climate dataset generated with the CHELSA downscaling model. It consists of long-term climatological means.", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld index 4cdef23..95d782c 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.chelsa-climate.org/datasets/chelsa_daily#dataset", "name": "CHELSA-daily (V2.1)", "description": "CHELSA-daily is a global, kilometer-scale climate dataset generated with the CHELSA downscaling model. It consists of daily surface variables summarized as daily means, minima, maxima, or (in some cases) daily accumulations.", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld index bf13d2f..ecad538 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.chelsa-climate.org/datasets/chelsa_annual#dataset", "name": "CHELSA-drought-indices (V2.1)", "description": "CHELSA-drought-indices is a dataset that consists of standardized precipitation (SPI) and the standardized precipitation evapotranspiration index (SPEI) and related drought metrics.", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld index df5b01c..9114404 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.chelsa-climate.org/datasets/chelsa_monthly#dataset", "name": "CHELSA-monthly (V2.1)", "description": "CHELSA-monthly is a global, kilometer-scale climate dataset generated with the CHELSA downscaling model. It consists of monthly aggregated surface variables summarized as monthly means or (in some cases) monthly accumulations.", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld index 1ba22e6..076d7d7 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.chelsa-climate.org/datasets/chelsa-trace21k-centennial#dataset", "name": "CHELSA-TraCE21k-centennial (V1.0)", "description": "CHELSA-TraCE21k-centennial is a global, kilometer-scale climate dataset generated with the CHELSA-TraCE21k downscaling model. It consists of monthly climatologies summarized over centennial time steps from 21k BP to 0 BP.", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld index 91ed30e..b5febbf 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.chelsa-climate.org/datasets/chelsa-trace21k-centennial-bioclim#dataset", "name": "CHELSA-TraCE21k-centennial-bioclim (V1.0)", "description": "CHELSA-TraCE21k-centennial-bioclim is a global, kilometer-scale dataset generated with the CHELSA-TraCE21k downscaling model. It consists of bioclimatic and topographic variables summarized over centennial time steps from 21k BP to 0 BP.", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld index 2d38027..560ec9e 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.chelsa-climate.org/datasets/chelsaw5e5#dataset", "name": "CHELSA-W5E5-daily (V2.0 / listed as V1.0 in catalog entry)", "description": "CHELSA-W5E5-daily is a global, kilometer-scale climate dataset generated with the CHELSA downscaling model. It consists of daily surface variables summarized as daily means, minima, maxima, or (in some cases) daily accumulations.", diff --git a/data/objects/summoned/generated/CHELSA/datacatalog.jsonld b/data/objects/summoned/generated/CHELSA/datacatalog.jsonld index 5a8d1cb..da8177e 100644 --- a/data/objects/summoned/generated/CHELSA/datacatalog.jsonld +++ b/data/objects/summoned/generated/CHELSA/datacatalog.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "DataCatalog", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.chelsa-climate.org/datasets#datacatalog", "name": "CHELSA Climate Data Catalog", "description": "CHELSA is a global, kilometer-scale climate dataset generated with the CHELSA downscaling model. It provides high-resolution climatological data for the earth's land surface areas, including daily and monthly variables, long-term climatological means, bioclimatic variables, and drought indices. The catalog also includes regional high-resolution products (e.g., Switzerland, Canary Islands), paleoclimate reconstructions (TraCE21k), and other derived products. Coverage includes global land surface areas and selected regional domains; temporal scope spans past (paleoclimate), present (including 1979 onward for many products), and future scenarios (where applicable).", diff --git a/data/objects/summoned/generated/CHELSA/webpage.jsonld b/data/objects/summoned/generated/CHELSA/webpage.jsonld index 75b9c14..4340cc8 100644 --- a/data/objects/summoned/generated/CHELSA/webpage.jsonld +++ b/data/objects/summoned/generated/CHELSA/webpage.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "WebPage", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.chelsa-climate.org/datasets", "name": "CHELSA Climate Datasets", "alternateName": "CHELSA: Climatologies at high resolution for the earth's land surface areas", diff --git a/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld b/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld index c9b1a25..91045fa 100644 --- a/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld +++ b/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.earthenv.org/landcover#dataset", "name": "Consensus Land Cover (Global 1-km Consensus Land Cover), Version 1.0", "description": "A global 1-km (30 arc-second) consensus land-cover product for biodiversity and ecosystem modelling. The dataset integrates multiple global remote sensing-derived land-cover products and provides consensus prevalence (0–100%) for 12 land-cover classes at each grid cell. Two Version 1.0 variants are distributed: (1) Full v1.0 (with DISCover/GLCC), integrating GlobCover (2005–2006; v2.2), MODIS land-cover (MCD12Q1; v051), GLC2000 (v1.1), and DISCover/GLCC (v2; based on older imagery 1992–1993); and (2) Reduced v1.0 (without DISCover), integrating GlobCover, MODIS, and GLC2000 only, offered as an alternative for areas with substantial land-cover change in recent decades. Each variant contains 12 GeoTIFF layers (one per class). Values are unsigned 8-bit integers representing percent prevalence (0–100).", diff --git a/data/objects/summoned/generated/Consensus_Land_Cover/webpage.jsonld b/data/objects/summoned/generated/Consensus_Land_Cover/webpage.jsonld index 9deaae5..2159e1a 100644 --- a/data/objects/summoned/generated/Consensus_Land_Cover/webpage.jsonld +++ b/data/objects/summoned/generated/Consensus_Land_Cover/webpage.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "WebPage", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.earthenv.org/landcover#webpage", "name": "Global 1-km Consensus Land Cover", "description": "Webpage for the Global 1-km Consensus Land Cover dataset, a global land-cover product that integrates multiple remote sensing-derived land-cover products and provides consensus information on the prevalence of 12 land-cover classes at 1-km resolution for biodiversity and ecosystem modeling. The dataset is available in two versions: Full Version 1.0 (with DISCover) and Reduced Version 1.0 (without DISCover), each containing 12 GeoTIFF layers representing consensus prevalence (0-100%) for different land-cover classes.", diff --git a/data/objects/summoned/generated/GFC/gfc.jsonld b/data/objects/summoned/generated/GFC/gfc.jsonld index 9236729..43a5ba5 100644 --- a/data/objects/summoned/generated/GFC/gfc.jsonld +++ b/data/objects/summoned/generated/GFC/gfc.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/download.html#dataset", "name": "Global Forest Change (GFC) 2000–2023 — GFC-2023-v1.11", "description": "Global Forest Change (GFC) provides results from time-series analysis of Landsat imagery to characterize global forest extent and change from 2000 through 2023. The direct-download package is organized as 10×10 degree GeoTIFF tiles at ~30 m resolution (1 arc-second per pixel). Core layers include: tree canopy cover for year 2000 (percent canopy closure for vegetation >5 m), forest gain (2000–2012), year of gross forest cover loss (coded by loss year for 2001–2023), a data mask for land/water/no-data, and reference Landsat cloud-free composites for the first and last available years (typically ~2000 and ~2023) in red, NIR, SWIR1, and SWIR2 bands. Version 1.11 adds 2023 loss-year updates and updated multispectral imagery and includes methodological updates (including Landsat 8/9 use and reprocessing from 2011 onward).", diff --git a/data/objects/summoned/generated/GFC/webpage.jsonld b/data/objects/summoned/generated/GFC/webpage.jsonld index 05548f7..f2b01ee 100644 --- a/data/objects/summoned/generated/GFC/webpage.jsonld +++ b/data/objects/summoned/generated/GFC/webpage.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "WebPage", + "comment": "This dataset metadata was generated by AI.", "@id": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/download.html#webpage", "name": "Global Forest Change 2000–2023 (GFC-2023-v1.11) — Data Download", "description": "Download page for the Hansen/GLAD Global Forest Change (GFC) dataset version GFC-2023-v1.11. The page provides direct-download links for 10×10 degree GeoTIFF tiles and layer-wide URL lists for global forest extent and change products derived from time-series analysis of Landsat imagery, including year-2000 tree canopy cover, forest gain (2000–2012), annual forest loss year (2001–2023), a data mask, and reference Landsat composites for the first and last available years (typically ~2000 and ~2023). It also includes version 1.11 user notes, license/attribution guidance, and pointers to web visualization and Google Earth Engine access.", diff --git a/data/objects/summoned/generated/GPP_MOD17/gpp_mod17.jsonld b/data/objects/summoned/generated/GPP_MOD17/gpp_mod17.jsonld new file mode 100644 index 0000000..cc8c2c5 --- /dev/null +++ b/data/objects/summoned/generated/GPP_MOD17/gpp_mod17.jsonld @@ -0,0 +1,90 @@ +{ + "@context": { + "@vocab": "https://schema.org/" + }, + "@type": "Dataset", + "@id": "http://files.ntsg.umt.edu/data/NTSG_Products/MOD17/#dataset", + "name": "GPP MOD17", + "url": "http://files.ntsg.umt.edu/data/NTSG_Products/MOD17/", + "description": "Global gross primary production (GPP) and net primary production (NPP) derived from the MOD17 algorithm and distributed by the Numerical Terradynamic Simulation Group (NTSG), University of Montana. Data are available via directory listing at the dataset URL and typically include multi-temporal MOD17 products (e.g., 8-day, monthly, annual) in common remote sensing / GIS formats.", + "comment": "This dataset metadata was generated by AI.", + "category": "ecosystem", + "keywords": [ + "MOD17", + "GPP", + "gross primary production", + "NPP", + "net primary production", + "MODIS", + "terrestrial productivity", + "carbon cycle", + "ecosystem productivity", + "NTSG", + "University of Montana" + ], + "creator": [ + { + "@type": "Organization", + "name": "Numerical Terradynamic Simulation Group (NTSG)", + "url": "https://www.umt.edu/numerical-terradynamic-simulation-group/" + } + ], + "provider": { + "@type": "Organization", + "name": "Numerical Terradynamic Simulation Group (NTSG), University of Montana", + "url": "https://www.umt.edu/numerical-terradynamic-simulation-group/" + }, + "publisher": { + "@type": "Organization", + "name": "University of Montana", + "url": "https://www.umt.edu/" + }, + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "GPP", + "description": "Gross Primary Production" + }, + { + "@type": "PropertyValue", + "name": "NPP", + "description": "Net Primary Production" + } + ], + "spatialCoverage": { + "@type": "Place", + "geo": { + "@type": "GeoShape", + "box": "20,-40 50,10" + } + }, + "temporalCoverage": "2000-01-01/2013-12-31", + "encodingFormat": [ + "application/x-hdf", + "image/tiff", + "application/netcdf" + ], + "license": "https://files.ntsg.umt.edu/", + "conditionsOfAccess": "Access is provided via HTTP file listing at the dataset URL. Refer to the provider's data policies and downloading guidance on the hosting site.", + "isAccessibleForFree": true, + "distribution": [ + { + "@type": "DataDownload", + "name": "MOD17 products (directory listing)", + "contentUrl": "http://files.ntsg.umt.edu/data/NTSG_Products/MOD17/", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "MOD17 GeoTIFF products (typical distribution)", + "contentUrl": "http://files.ntsg.umt.edu/data/NTSG_Products/MOD17/GeoTIFF/", + "encodingFormat": ["image/tiff"] + }, + { + "@type": "DataDownload", + "name": "MOD17 HDF-EOS products (typical distribution)", + "contentUrl": "http://files.ntsg.umt.edu/data/NTSG_Products/MOD17/", + "encodingFormat": ["application/x-hdf"] + } + ] +} diff --git a/data/objects/summoned/generated/GPP_MOD17/prompt.txt b/data/objects/summoned/generated/GPP_MOD17/prompt.txt new file mode 100644 index 0000000..79be060 --- /dev/null +++ b/data/objects/summoned/generated/GPP_MOD17/prompt.txt @@ -0,0 +1,98 @@ +You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. + +**Task**: Create a valid JSON-LD document for the following dataset. + +**Dataset Information**: +- Name: GPP MOD17 +- URL: http://files.ntsg.umt.edu/data/NTSG_Products/MOD17/ +- Description: Global gross primary production and net primary production derived from MOD17 +- Group/Category: ecosystem +- Creator: +- Provider: +- Publisher: +- Keywords: +- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") +- Extracted Metadata: NTSG (Numerical Terradynamic Simulation Group), University of Montana. MOD17 products: GPP, NPP. Data available via file listing at the URL above. Infer distribution and variable info from typical MOD17 product structure if needed. + +**Reference Example** (from existing JSON-LD in this project): +{ + "@context": { + "@vocab": "https://schema.org/" + }, + "@id": "https://doi.org/10.1594/PANGAEA.879543", + "@type": "Dataset", + "identifier": "https://doi.org/10.1594/PANGAEA.879543", + "url": "https://doi.pangaea.de/10.1594/PANGAEA.879543", + "creator": [ + { + "@id": "https://orcid.org/0000-0002-7468-2409", + "@type": "Person", + "name": "Yao Zhang", + "familyName": "Zhang", + "givenName": "Yao", + "identifier": "https://orcid.org/0000-0002-7468-2409", + "email": "yaozhang@lbl.gov" + }, + { + "@type": "Person", + "name": "Xiangming Xiao", + "familyName": "Xiao", + "givenName": "Xiangming", + "email": "xiangming.xiao@ou.edu" + } + ], + "name": "(Table 3) Continental and global total gross primary production of carbon for the years 2000-2016", + "publisher": { + "@type": "Organization", + "name": "PANGAEA", + "disambiguatingDescription": "Data Publisher for Earth & Environmental Science", + "url": "https://www.pangaea.de/" + }, + "includedInDataCatalog": { + "@type": "DataCatalog", + "name": "PANGAEA", + "url": "https://www.pangaea.de/" + }, + "datePublished": "2017-08-11", + "description": "Continental and global total gross primary production of carbon for the years 2000-2016.", + "keywords": ["GPP", "gross primary production", "NPP", "MODIS", "carbon"], + "encodingFormat": ["application/netcdf", "GeoTIFF"], + "spatialCoverage": { + "@type": "Place", + "geo": { + "@type": "GeoShape", + "box": "-180,-60 180,90" + } + }, + "temporalCoverage": "2000-01-01/2016-12-31", + "license": "https://creativecommons.org/licenses/by/4.0/", + "distribution": [ + { + "@type": "DataDownload", + "encodingFormat": "application/netcdf", + "contentUrl": "https://example.org/data" + } + ] +} + +**Requirements**: +1. Use Schema.org vocabulary (https://schema.org/) +2. Set @context to `{"@vocab": "https://schema.org/"}` +3. Set @type to "Dataset" +4. Include @id with the dataset URL or identifier (e.g. http://files.ntsg.umt.edu/data/NTSG_Products/MOD17/#dataset) +5. Include all available metadata fields +6. For creators, use Person or Organization types with proper structure (e.g. NTSG / University of Montana if known) +7. Include distribution information if download links are available (the URL is a directory listing; you may reference the base URL or typical MOD17 product formats) +8. Add temporalCoverage if time period is known (format: "YYYY-MM-DD/YYYY-MM-DD") — MOD17 has multi-year products +9. Add spatialCoverage if geographic bounds are provided: + - Use Place with geo containing GeoShape + - The box format MUST be: "west,south east,north" (comma-separated pairs, space between pairs) + - For this dataset use: "20,-40 50,10" (NOT "20 -40 50 10") + - Format: {"@type": "Place", "geo": {"@type": "GeoShape", "box": "20,-40 50,10"}} +10. Include license and access information +11. Use proper JSON-LD structure (arrays for multiple values, nested objects where appropriate) +12. Set "keywords" as a JSON array of strings, e.g. "keywords": ["keyword1", "keyword2", "keyword3"] — never a single semicolon- or comma-separated string +13. Set "encodingFormat" as a JSON array of strings when listing multiple formats, e.g. "encodingFormat": ["image/tiff", "application/geotiff"] — never a single semicolon- or comma-separated string +14. Add this exact comment (for AI-generated disclosure): "comment": "This dataset metadata was generated by AI." + +**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/GPP_MOD17/webpage.jsonld b/data/objects/summoned/generated/GPP_MOD17/webpage.jsonld new file mode 100644 index 0000000..cf38ba8 --- /dev/null +++ b/data/objects/summoned/generated/GPP_MOD17/webpage.jsonld @@ -0,0 +1,40 @@ +{ + "@context": "https://schema.org/", + "@type": "WebPage", + "comment": "This dataset metadata was generated by AI.", + "@id": "http://files.ntsg.umt.edu/data/NTSG_Products/MOD17/", + "name": "GPP MOD17: Global Gross Primary Production and Net Primary Production", + "description": "Webpage for the GPP MOD17 dataset—global gross primary production (GPP) and net primary production (NPP) derived from the MOD17 algorithm, distributed by the Numerical Terradynamic Simulation Group (NTSG), University of Montana. The site provides a directory listing for MOD17 products (e.g., 8-day, monthly, annual) in GeoTIFF, HDF-EOS, and NetCDF formats.", + "url": "http://files.ntsg.umt.edu/data/NTSG_Products/MOD17/", + "inLanguage": "en", + "isPartOf": { + "@type": "WebSite", + "name": "Numerical Terradynamic Simulation Group (NTSG), University of Montana", + "url": "https://www.umt.edu/numerical-terradynamic-simulation-group/" + }, + "about": { + "@type": "Dataset", + "name": "GPP MOD17", + "url": "http://files.ntsg.umt.edu/data/NTSG_Products/MOD17/" + }, + "publisher": { + "@type": "Organization", + "name": "University of Montana", + "url": "https://www.umt.edu/" + }, + "mainEntity": { + "@type": "Dataset", + "name": "GPP MOD17", + "url": "http://files.ntsg.umt.edu/data/NTSG_Products/MOD17/#dataset" + }, + "keywords": [ + "MOD17", + "GPP", + "gross primary production", + "NPP", + "net primary production", + "MODIS", + "NTSG", + "University of Montana" + ] +} diff --git a/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld b/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld index f98fb5c..c5eebcc 100644 --- a/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld +++ b/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://elischolar.library.yale.edu/yale_fes_data/1/#dataset", "name": "Global Tree Density (Global tree density map)", "description": "Global tree density at a global scale provided as two spatially continuous raster maps of tree density. One map was generated using biome-level linear regression models and applied at the biome scale; the second map was generated using ecoregion-level linear regression models and applied at the ecoregion scale. The models were built using over 420,000 ground-sourced estimates of tree density and predictor variables spanning vegetative, climatic, topographic, and anthropogenic factors. The creators note that transitions at biome/ecoregion boundaries may appear abrupt and that estimates are generally more robust at country scale (or larger) than at individual pixel scale. The primary distribution is a zipped ArcGIS File Geodatabase package containing both raster models plus supporting ArcGIS layer and map-document files; additional files include a revision adding predictions for small islands and a WGS84 GeoTIFF derivative for the revision.", diff --git a/data/objects/summoned/generated/Global_Tree_Density/webpage.jsonld b/data/objects/summoned/generated/Global_Tree_Density/webpage.jsonld index ed12e05..6861d6a 100644 --- a/data/objects/summoned/generated/Global_Tree_Density/webpage.jsonld +++ b/data/objects/summoned/generated/Global_Tree_Density/webpage.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "WebPage", + "comment": "This dataset metadata was generated by AI.", "@id": "https://elischolar.library.yale.edu/yale_fes_data/1/#webpage", "name": "Global tree density map", "description": "This webpage in EliScholar (Yale's institutional repository) hosts the Global tree density map dataset associated with Crowther et al. (2015). It describes two global raster maps of tree density (biome-level and ecoregion-level models), documents the underlying methodology (ground-based plot data and regression modeling), provides viewing/format notes for GIS use (ArcGIS File Geodatabase and GeoTIFF derivatives), and offers download access to the primary dataset package and additional revised/alternative-format files.", diff --git a/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld b/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld index bfce674..ad90a53 100644 --- a/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld +++ b/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/", "name": "MERIT DEM (Multi-Error-Removed Improved-Terrain Digital Elevation Model)", "description": "MERIT DEM is a high-accuracy global digital elevation model (DEM) at 3 arc-second resolution (~90 m at the equator), developed by removing multiple error components from existing spaceborne DEMs. The developers separated and eliminated absolute bias, stripe noise, speckle noise, and tree height bias using multiple satellite datasets and filtering techniques. MERIT DEM was created by processing baseline DEMs including NASA SRTM3 DEM v2.1, JAXA AW3D-30m DEM v1, and Viewfinder Panoramas' DEM. After error removal, land areas mapped with 2 m or better vertical accuracy increased from 39% to 58%. The dataset represents terrain elevations in meters referenced to WGS84 horizontal datum and the EGM96 geoid, covering global land areas between 90°N and 60°S. Data are organized as 5°×5° tiles (6000×6000 pixels) and distributed in 30°×30° packages; filenames encode the center of the lower-left pixel (e.g., \"n30w120_dem.tif\" covers N30–N35 and W120–W115) and package names encode the lower-left corner of the 30°×30° domain (e.g., \"dem_tif_n30w120.tar\" contains tiles for N30–N60 and W120–W090).", diff --git a/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld b/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld index 3f5fb3a..d230fd0 100644 --- a/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld +++ b/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "WebPage", + "comment": "This dataset metadata was generated by AI.", "@id": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/", "name": "MERIT DEM: Multi-Error-Removed Improved-Terrain DEM", "description": "Webpage for the MERIT DEM (Multi-Error-Removed Improved-Terrain Digital Elevation Model) dataset. MERIT DEM is a high-accuracy global digital elevation model at 3 arc-second resolution (~90 m at the equator), developed by removing multiple error components from existing spaceborne DEMs. The site provides information about the dataset, download instructions, licensing, and citation information.", diff --git a/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld b/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld index ec2adb4..58be9f0 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.mrlc.gov/data/project/annual-nlcd#dataset", "name": "Annual National Land Cover Database (Annual NLCD) CONUS Collection 1.1", "description": "Annual NLCD provides an annual record of land cover and related land surface change products for the conterminous United States derived from the Landsat satellite record using an integrated framework of modern modeling approaches. Collection 1.1 extends coverage through 2024 and includes six raster science products: Land Cover, Land Cover Change, Land Cover Confidence, Fractional Impervious Surface, Impervious Descriptor, and Spectral Change Day of Year.", diff --git a/data/objects/summoned/generated/MRLC_NLCD/datacatalog.jsonld b/data/objects/summoned/generated/MRLC_NLCD/datacatalog.jsonld index 6d35eec..c4e0439 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/datacatalog.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/datacatalog.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "DataCatalog", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.mrlc.gov/data#catalog", "name": "MRLC Data Catalog", "description": "Catalog of land cover, land change, rangeland vegetation component, and related remote-sensing products produced and distributed by the Multi-Resolution Land Characteristics (MRLC) Consortium for the United States and North America. Includes Annual NLCD, RCMAP, Exotic Annual Grass products, NALCMS, and legacy NLCD products, with access via direct downloads, interactive tools, and OGC web services.", diff --git a/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld b/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld index b12d1df..3b233b7 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.mrlc.gov/data/type/exotic-annual-grass#dataset", "name": "Exotic Annual Grass (EAG) Fractional Cover (MRLC)", "description": "The Exotic Annual Grass (EAG) dataset provides frequent (weekly, during the core growing season) early estimates of fractional cover for multiple exotic annual grass targets and one native perennial grass species in arid and semi-arid rangelands of the western United States. Each release includes fractional cover maps and corresponding confidence maps for multiple species/groups (including cheatgrass and other Bromus species) generated using field observations, HLS NDVI, environmental drivers, and machine learning methods.", diff --git a/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld b/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld index a5b28fe..13eb1cb 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.mrlc.gov/data/type/legacy-nlcd#dataset", "name": "Legacy National Land Cover Database (NLCD) Products", "description": "Legacy NLCD products provide nationwide (United States) land cover and land cover change datasets at 30-meter resolution using a 16-class legend based on a modified Anderson Level II classification system. The legacy product suite supports cyclical updates of U.S. land cover and change, enabling monitoring and long-term trend assessments across many application areas (e.g., hydrology, environmental planning, risk analysis, education, and land management).", diff --git a/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld b/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld index 404c617..4330e91 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://data.usgs.gov/datacatalog/data/USGS:74edd739-1584-41c3-bf08-0274681a779b#dataset", "name": "North American Land Change Monitoring System (NALCMS) – North American Land Cover (30 m)", "description": "NALCMS provides harmonized, trinational land cover maps for North America by combining national land cover products from Canada, the United States, and Mexico into a consistent continental mosaic. A recent 30-meter North American land cover dataset reflects land cover information for 2020 from Mexico and Canada, 2019 over the conterminous United States, and 2021 over Alaska, using Landsat 8 imagery inputs and country-specific classification methods integrated into a seamless product.", diff --git a/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld b/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld index fada40e..4d5209a 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.mrlc.gov/data/project/rcmap#dataset", "name": "Rangeland Condition Monitoring Assessment and Projection (RCMAP)", "description": "RCMAP provides annual rangeland vegetation component fractional cover and related products across western North America derived from Landsat imagery. Components include bare ground, herbaceous, litter, shrub, sagebrush, tree (and related component/trend products), supporting rangeland monitoring and assessment from 1985 to present.", diff --git a/data/objects/summoned/generated/MRLC_NLCD/webpage.jsonld b/data/objects/summoned/generated/MRLC_NLCD/webpage.jsonld index db39d94..9a9c24e 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/webpage.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/webpage.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "WebPage", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.mrlc.gov/data#webpage", "name": "MRLC Data", "description": "The Multi-Resolution Land Characteristics (MRLC) Consortium data portal provides nationwide (United States) and North America land cover and related remote-sensing products, including Annual NLCD (annual land cover and land change science products), RCMAP (rangeland fractional cover and trends), Exotic Annual Grass fractional cover products, NALCMS (North American land cover), and legacy NLCD products. The portal also provides tools (e.g., MRLC Viewer, EVA Tool, Rangeland Viewer) and web services (OGC WMS/WCS) for interactive access and subsetting.", diff --git a/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld b/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld index 0aed78c..1fc7d14 100644 --- a/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld +++ b/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.climatologylab.org/terraclimate.html#dataset", "name": "TerraClimate: Monthly Climate and Climatic Water Balance for Global Terrestrial Surfaces", "description": "TerraClimate is a dataset of monthly climate and climatic water balance for global terrestrial land surfaces at ~4-km (1/24°) spatial resolution. It provides time-varying monthly climate fields derived via climatically aided interpolation: high-resolution climatological normals from WorldClim are combined with interpolated monthly anomalies from CRU TS (for most temperature, precipitation, and vapor pressure) and JRA-55 (used where CRU station influence is absent and used for solar radiation and wind speed). TerraClimate also produces monthly surface water balance variables using a modified Thornthwaite–Mather climatic water-balance model incorporating precipitation, temperature, reference evapotranspiration (ASCE Penman–Monteith), and soil water capacity. The core historical record covers 1958–2020 with planned periodic updates, and additional future layers are provided for +2°C and +4°C global mean temperature futures for pseudo-years 1985–2015 plus climatological summaries. Data are distributed primarily as compressed NetCDF (NetCDF4) via THREDDS/OPeNDAP and related web services and are also available as a Google Earth Engine image collection.", diff --git a/data/objects/summoned/generated/TerraClimate/webpage.jsonld b/data/objects/summoned/generated/TerraClimate/webpage.jsonld index fac5572..55fe970 100644 --- a/data/objects/summoned/generated/TerraClimate/webpage.jsonld +++ b/data/objects/summoned/generated/TerraClimate/webpage.jsonld @@ -1,6 +1,7 @@ { "@context": "https://schema.org/", "@type": "WebPage", + "comment": "This dataset metadata was generated by AI.", "@id": "https://www.climatologylab.org/terraclimate.html", "name": "TerraClimate", "description": "Webpage for TerraClimate, a high-resolution global dataset of monthly climate and climatic water balance for global terrestrial surfaces from 1958-2020. TerraClimate provides monthly climate data at ~4-km (1/24th degree) spatial resolution, including primary climate variables (temperature, precipitation, vapor pressure, radiation, wind-speed) and derived water balance variables (evapotranspiration, runoff, soil moisture, drought indices). The dataset uses climatically aided interpolation combining WorldClim climatological normals with time-varying data from CRU Ts4.0 and JRA55. Future climate projections are also available for +2C and +4C scenarios.", From 674caddf1913a3393ffcc2e53bd330b0df566c9f Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 16 Feb 2026 10:46:20 -0600 Subject: [PATCH 37/58] Added the Json-LD's of the rest of the sites --- .../summoned/generated/FLO1K/flo1k.jsonld | 136 ++++++++++++ .../summoned/generated/FLO1K/prompt.txt | 53 +++++ .../summoned/generated/FLO1K/webpage.jsonld | 36 ++++ .../summoned/generated/G-RUN/g-run.jsonld | 158 ++++++++++++++ .../summoned/generated/G-RUN/prompt.txt | 53 +++++ .../summoned/generated/G-RUN/webpage.jsonld | 38 ++++ .../summoned/generated/GHSL/ghsl.jsonld | 156 ++++++++++++++ .../summoned/generated/GHSL/prompt.txt | 53 +++++ .../summoned/generated/GHSL/webpage.jsonld | 38 ++++ .../generated/GRACE-REC/grace-rec.jsonld | 149 +++++++++++++ .../summoned/generated/GRACE-REC/prompt.txt | 53 +++++ .../generated/GRACE-REC/webpage.jsonld | 37 ++++ .../global-multi-layer-soil-moisture.jsonld | 147 +++++++++++++ .../prompt.txt | 53 +++++ .../webpage.jsonld | 37 ++++ .../generated/HydroSHEDS/hydrosheds.jsonld | 191 +++++++++++++++++ .../summoned/generated/HydroSHEDS/prompt.txt | 52 +++++ .../generated/HydroSHEDS/webpage.jsonld | 37 ++++ .../Hydrography90m/hydrography90m.jsonld | 194 +++++++++++++++++ .../generated/Hydrography90m/prompt.txt | 53 +++++ .../generated/Hydrography90m/webpage.jsonld | 37 ++++ .../generated/Shale_Network/prompt.txt | 53 +++++ .../Shale_Network/shale-network.jsonld | 161 ++++++++++++++ .../generated/Shale_Network/webpage.jsonld | 37 ++++ .../summoned/generated/SoilGrids2/prompt.txt | 53 +++++ .../generated/SoilGrids2/soilgrids2.jsonld | 140 ++++++++++++ .../generated/SoilGrids2/webpage.jsonld | 37 ++++ .../summoned/generated/WATERBASE/prompt.txt | 53 +++++ .../generated/WATERBASE/waterbase.jsonld | 130 ++++++++++++ .../generated/WATERBASE/webpage.jsonld | 37 ++++ .../generated/Water_Quality_Portal/prompt.txt | 51 +++++ .../water-quality-portal.jsonld | 199 ++++++++++++++++++ .../Water_Quality_Portal/webpage.jsonld | 44 ++++ 33 files changed, 2756 insertions(+) create mode 100644 data/objects/summoned/generated/FLO1K/flo1k.jsonld create mode 100644 data/objects/summoned/generated/FLO1K/prompt.txt create mode 100644 data/objects/summoned/generated/FLO1K/webpage.jsonld create mode 100644 data/objects/summoned/generated/G-RUN/g-run.jsonld create mode 100644 data/objects/summoned/generated/G-RUN/prompt.txt create mode 100644 data/objects/summoned/generated/G-RUN/webpage.jsonld create mode 100644 data/objects/summoned/generated/GHSL/ghsl.jsonld create mode 100644 data/objects/summoned/generated/GHSL/prompt.txt create mode 100644 data/objects/summoned/generated/GHSL/webpage.jsonld create mode 100644 data/objects/summoned/generated/GRACE-REC/grace-rec.jsonld create mode 100644 data/objects/summoned/generated/GRACE-REC/prompt.txt create mode 100644 data/objects/summoned/generated/GRACE-REC/webpage.jsonld create mode 100644 data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/global-multi-layer-soil-moisture.jsonld create mode 100644 data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/prompt.txt create mode 100644 data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/webpage.jsonld create mode 100644 data/objects/summoned/generated/HydroSHEDS/hydrosheds.jsonld create mode 100644 data/objects/summoned/generated/HydroSHEDS/prompt.txt create mode 100644 data/objects/summoned/generated/HydroSHEDS/webpage.jsonld create mode 100644 data/objects/summoned/generated/Hydrography90m/hydrography90m.jsonld create mode 100644 data/objects/summoned/generated/Hydrography90m/prompt.txt create mode 100644 data/objects/summoned/generated/Hydrography90m/webpage.jsonld create mode 100644 data/objects/summoned/generated/Shale_Network/prompt.txt create mode 100644 data/objects/summoned/generated/Shale_Network/shale-network.jsonld create mode 100644 data/objects/summoned/generated/Shale_Network/webpage.jsonld create mode 100644 data/objects/summoned/generated/SoilGrids2/prompt.txt create mode 100644 data/objects/summoned/generated/SoilGrids2/soilgrids2.jsonld create mode 100644 data/objects/summoned/generated/SoilGrids2/webpage.jsonld create mode 100644 data/objects/summoned/generated/WATERBASE/prompt.txt create mode 100644 data/objects/summoned/generated/WATERBASE/waterbase.jsonld create mode 100644 data/objects/summoned/generated/WATERBASE/webpage.jsonld create mode 100644 data/objects/summoned/generated/Water_Quality_Portal/prompt.txt create mode 100644 data/objects/summoned/generated/Water_Quality_Portal/water-quality-portal.jsonld create mode 100644 data/objects/summoned/generated/Water_Quality_Portal/webpage.jsonld diff --git a/data/objects/summoned/generated/FLO1K/flo1k.jsonld b/data/objects/summoned/generated/FLO1K/flo1k.jsonld new file mode 100644 index 0000000..4578ee6 --- /dev/null +++ b/data/objects/summoned/generated/FLO1K/flo1k.jsonld @@ -0,0 +1,136 @@ +{ + "@context": { + "@vocab": "https://schema.org/" + }, + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://doi.org/10.6084/m9.figshare.c.3890224.v1#dataset", + "name": "FLO1K: Global maps of mean, maximum and minimum annual streamflow at 1 km resolution (1960–2015)", + "url": "https://figshare.com/collections/FLO1K_global_maps_of_mean_maximum_and_minimum_annual_streamflow_at_1_km_resolution_from_1960_through_2015/3890224", + "description": "FLO1K provides global, spatially continuous gridded maps of annual streamflow at ~1 km (30 arc-second) resolution for each year from 1960 through 2015. Annual flow metrics include mean annual flow as well as minimum and maximum monthly flow for a given year. The dataset is intended for hydrology, freshwater ecology, water resources assessment, and large-scale environmental modeling applications.", + "keywords": [ + "FLO1K", + "streamflow", + "runoff", + "annual streamflow", + "mean annual flow", + "maximum flow", + "minimum flow", + "global", + "1 km", + "30 arc-second", + "hydrology", + "river discharge", + "freshwater", + "Figshare" + ], + "creator": [ + { "@type": "Person", "name": "Valerio Barbarossa" }, + { "@type": "Person", "name": "Mark A. J. Huijbregts" }, + { "@type": "Person", "name": "Arthur H. W. Beusen" }, + { "@type": "Person", "name": "Hylke E. Beck" }, + { "@type": "Person", "name": "Henry King" }, + { "@type": "Person", "name": "Aafke M. Schipper" } + ], + "provider": [ + { + "@type": "Organization", + "name": "Figshare", + "url": "https://figshare.com/" + } + ], + "publisher": [ + { + "@type": "Organization", + "name": "Figshare", + "url": "https://figshare.com/" + } + ], + "isAccessibleForFree": true, + "temporalCoverage": "1960-01-01/2015-12-31", + "spatialCoverage": { + "@type": "Place", + "geo": { + "@type": "GeoShape", + "box": "20,-40 50,10" + } + }, + "spatialResolution": "30 arc-second (~1 km)", + "temporalResolution": "P1Y", + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "Mean annual streamflow", + "description": "Mean annual flow for each year (gridded, global, ~1 km)." + }, + { + "@type": "PropertyValue", + "name": "Maximum annual streamflow (maximum monthly flow per year)", + "description": "Maximum monthly flow within each year (gridded, global, ~1 km)." + }, + { + "@type": "PropertyValue", + "name": "Minimum annual streamflow (minimum monthly flow per year)", + "description": "Minimum monthly flow within each year (gridded, global, ~1 km)." + } + ], + "encodingFormat": [ + "application/x-netcdf", + "application/zip", + "text/html" + ], + "license": "https://creativecommons.org/licenses/by/4.0/", + "distribution": [ + { + "@type": "DataDownload", + "name": "Figshare collection landing page", + "description": "Dataset collection landing page and downloads hosted on Figshare.", + "contentUrl": "https://figshare.com/collections/FLO1K_global_maps_of_mean_maximum_and_minimum_annual_streamflow_at_1_km_resolution_from_1960_through_2015/3890224", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "Persistent identifier (Figshare collection DOI landing page)", + "description": "Persistent DOI for the FLO1K Figshare collection.", + "contentUrl": "https://doi.org/10.6084/m9.figshare.c.3890224.v1", + "encodingFormat": ["text/html"] + } + ], + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "FLO1K, global maps of mean, maximum and minimum annual streamflow at 1 km resolution from 1960 through 2015", + "author": [ + { "@type": "Person", "name": "Valerio Barbarossa" }, + { "@type": "Person", "name": "Mark A. J. Huijbregts" }, + { "@type": "Person", "name": "Arthur H. W. Beusen" }, + { "@type": "Person", "name": "Hylke E. Beck" }, + { "@type": "Person", "name": "Henry King" }, + { "@type": "Person", "name": "Aafke M. Schipper" } + ], + "isPartOf": { + "@type": "Periodical", + "name": "Scientific Data" + }, + "datePublished": "2018", + "identifier": [ + { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.1038/sdata.2018.52" + } + ], + "sameAs": "https://doi.org/10.1038/sdata.2018.52" + } + ], + "about": [ + { "@type": "Thing", "name": "Hydrology" }, + { "@type": "Thing", "name": "Streamflow" }, + { "@type": "Thing", "name": "Runoff" }, + { "@type": "Thing", "name": "River discharge" }, + { "@type": "Thing", "name": "Freshwater resources" } + ], + "sameAs": [ + "https://doi.org/10.6084/m9.figshare.c.3890224.v1" + ] +} diff --git a/data/objects/summoned/generated/FLO1K/prompt.txt b/data/objects/summoned/generated/FLO1K/prompt.txt new file mode 100644 index 0000000..17cf7b4 --- /dev/null +++ b/data/objects/summoned/generated/FLO1K/prompt.txt @@ -0,0 +1,53 @@ +You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. + +**Task**: Create a valid JSON-LD document for the following dataset. + +**Dataset Information**: +- Name: FLO1K +- URL: https://figshare.com/collections/FLO1K_global_maps_of_mean_maximum_and_minimum_annual_streamflow_at_1_km_resolution_from_1960_through_2015/3890224 +- Description: Global mean, maximum and minimum annual streamflow at 1 km resolution (1960–2015). +- Group/Category: hydrology +- Creator: +- Provider: +- Publisher: +- Keywords: +- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") +- Extracted Metadata: Hosted on Figshare (collection). FLO1K provides global maps of mean, maximum, and minimum annual streamflow at 1 km resolution from 1960 through 2015. Infer creator, citation, temporal coverage, and variables from the Figshare collection page and any linked paper (e.g. DOI or journal article). Include distribution with the Figshare collection URL and DOI if available. + +**Reference Example** (from existing JSON-LD in this project): +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://doi.org/10.6084/m9.figshare.13661312.v1#dataset", + "name": "Global Multi-layer Soil Moisture Products", + "url": "https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312", + "description": "Global multi-layer soil moisture products covering 1970–2016.", + "keywords": ["soil moisture", "global", "Figshare", "hydrology"], + "creator": [{"@type": "Person", "name": "Yaoping Wang"}, {"@type": "Person", "name": "Jiafu Mao"}], + "publisher": [{"@type": "Organization", "name": "Figshare", "url": "https://figshare.com/"}], + "temporalCoverage": "1970-01-01/2016-12-31", + "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "20,-40 50,10"}}, + "variableMeasured": [{"@type": "PropertyValue", "name": "Soil moisture", "description": "Multi-layer soil moisture"}], + "encodingFormat": ["application/zip", "application/x-netcdf"], + "license": "https://creativecommons.org/licenses/by/4.0/", + "distribution": [{"@type": "DataDownload", "contentUrl": "https://doi.org/10.6084/m9.figshare.13661312.v1", "encodingFormat": ["text/html"]}] +} + +**Requirements**: +1. Use Schema.org vocabulary (https://schema.org/) +2. Set @context to `{"@vocab": "https://schema.org/"}` +3. Set @type to "Dataset" +4. Include @id with the dataset URL or DOI (e.g. Figshare collection/article URL with #dataset or DOI) +5. Include all available metadata fields +6. Creator/publisher: infer from Figshare page (authors and Figshare as publisher) +7. Include distribution with contentUrl to the Figshare dataset/collection/DOI +8. temporalCoverage: use 1960–2015 from the collection title unless the page states otherwise +9. spatialCoverage: Place with geo GeoShape, box MUST be "west,south east,north" — use "20,-40 50,10" +10. Include license and access information (Figshare often CC-BY) +11. Use proper JSON-LD structure (arrays for multiple values) +12. "keywords" as a JSON array of strings — never semicolon/comma-separated string +13. "encodingFormat" as a JSON array of strings — never semicolon/comma-separated string +14. Add exactly: "comment": "This dataset metadata was generated by AI." + +**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/FLO1K/webpage.jsonld b/data/objects/summoned/generated/FLO1K/webpage.jsonld new file mode 100644 index 0000000..c68f84a --- /dev/null +++ b/data/objects/summoned/generated/FLO1K/webpage.jsonld @@ -0,0 +1,36 @@ +{ + "@context": "https://schema.org/", + "@type": "WebPage", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://figshare.com/collections/FLO1K_global_maps_of_mean_maximum_and_minimum_annual_streamflow_at_1_km_resolution_from_1960_through_2015/3890224", + "name": "FLO1K: Global maps of mean, maximum and minimum annual streamflow at 1 km resolution (1960–2015)", + "description": "Webpage for FLO1K on Figshare—global gridded mean, maximum and minimum annual streamflow at 1 km resolution, 1960–2015. By Barbarossa, Huijbregts, Beusen, Beck, King, Schipper. Published in Scientific Data (2018).", + "url": "https://figshare.com/collections/FLO1K_global_maps_of_mean_maximum_and_minimum_annual_streamflow_at_1_km_resolution_from_1960_through_2015/3890224", + "inLanguage": "en", + "isPartOf": { + "@type": "WebSite", + "name": "Figshare", + "url": "https://figshare.com/" + }, + "about": { + "@type": "Dataset", + "name": "FLO1K", + "url": "https://doi.org/10.6084/m9.figshare.c.3890224.v1" + }, + "publisher": { + "@type": "Organization", + "name": "Figshare", + "url": "https://figshare.com/" + }, + "mainEntity": { + "@type": "Dataset", + "name": "FLO1K: Global maps of mean, maximum and minimum annual streamflow at 1 km resolution (1960–2015)", + "url": "https://doi.org/10.6084/m9.figshare.c.3890224.v1#dataset" + }, + "keywords": [ + "FLO1K", + "streamflow", + "hydrology", + "Figshare" + ] +} diff --git a/data/objects/summoned/generated/G-RUN/g-run.jsonld b/data/objects/summoned/generated/G-RUN/g-run.jsonld new file mode 100644 index 0000000..c7a4f0d --- /dev/null +++ b/data/objects/summoned/generated/G-RUN/g-run.jsonld @@ -0,0 +1,158 @@ +{ + "@context": { + "@vocab": "https://schema.org/" + }, + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://figshare.com/articles/dataset/GRUN_Global_Runoff_Reconstruction/9228176#dataset", + "name": "G-RUN (GRUN): Global Runoff Reconstruction", + "url": "https://figshare.com/articles/dataset/GRUN_Global_Runoff_Reconstruction/9228176", + "description": "GRUN (Global Runoff Reconstruction; often referred to as G-RUN) is an observation-based global gridded reconstruction of monthly runoff derived using machine learning trained on in-situ streamflow observations. The dataset provides global runoff rates on a 0.5° grid over the period 1902–2014 and is intended for analyses of freshwater dynamics, hydroclimate variability, drought propagation, and evaluation of hydrological model simulations.", + "keywords": [ + "GRUN", + "G-RUN", + "global runoff", + "runoff reconstruction", + "streamflow", + "hydrology", + "hydroclimate", + "machine learning", + "reanalysis", + "monthly", + "0.5 degree", + "global gridded dataset", + "Figshare" + ], + "creator": [ + { + "@type": "Person", + "name": "Gionata Ghiggi", + "affiliation": { + "@type": "Organization", + "name": "ETH Zurich", + "url": "https://ethz.ch/en.html" + } + }, + { + "@type": "Person", + "name": "Vincent Humphrey", + "affiliation": { + "@type": "Organization", + "name": "ETH Zurich", + "url": "https://ethz.ch/en.html" + } + }, + { + "@type": "Person", + "name": "Sonia I. Seneviratne", + "affiliation": { + "@type": "Organization", + "name": "ETH Zurich", + "url": "https://ethz.ch/en.html" + } + }, + { + "@type": "Person", + "name": "Lukas Gudmundsson", + "affiliation": { + "@type": "Organization", + "name": "ETH Zurich", + "url": "https://ethz.ch/en.html" + } + } + ], + "provider": [ + { + "@type": "Organization", + "name": "Figshare", + "url": "https://figshare.com/" + } + ], + "publisher": [ + { + "@type": "Organization", + "name": "Figshare", + "url": "https://figshare.com/" + } + ], + "isAccessibleForFree": true, + "temporalCoverage": "1902-01-01/2014-12-31", + "spatialCoverage": { + "@type": "Place", + "geo": { + "@type": "GeoShape", + "box": "20,-40 50,10" + } + }, + "spatialResolution": "0.5 degree", + "temporalResolution": "P1M", + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "Runoff", + "description": "Monthly gridded runoff rates reconstructed using machine learning trained on in-situ streamflow observations." + } + ], + "measurementTechnique": [ + "Machine learning reconstruction trained on in-situ streamflow observations", + "Prediction based on antecedent precipitation and temperature from atmospheric reanalysis inputs" + ], + "encodingFormat": [ + "application/x-netcdf", + "application/zip", + "text/html" + ], + "license": "https://creativecommons.org/licenses/by/4.0/", + "distribution": [ + { + "@type": "DataDownload", + "name": "Figshare dataset landing page", + "description": "Dataset landing page and downloads hosted on Figshare.", + "contentUrl": "https://figshare.com/articles/dataset/GRUN_Global_Runoff_Reconstruction/9228176", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "Persistent identifier (Figshare DOI landing page)", + "description": "Persistent DOI for the GRUN dataset on Figshare.", + "contentUrl": "https://doi.org/10.6084/m9.figshare.9228176", + "encodingFormat": ["text/html"] + } + ], + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "GRUN: an observation-based global gridded runoff dataset from 1902 to 2014", + "author": [ + { "@type": "Person", "name": "Gionata Ghiggi" }, + { "@type": "Person", "name": "Vincent Humphrey" }, + { "@type": "Person", "name": "Sonia I. Seneviratne" }, + { "@type": "Person", "name": "Lukas Gudmundsson" } + ], + "isPartOf": { + "@type": "Periodical", + "name": "Earth System Science Data" + }, + "datePublished": "2019", + "identifier": [ + { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.5194/essd-11-1655-2019" + } + ], + "sameAs": "https://doi.org/10.5194/essd-11-1655-2019" + } + ], + "about": [ + { "@type": "Thing", "name": "Hydrology" }, + { "@type": "Thing", "name": "Runoff" }, + { "@type": "Thing", "name": "Streamflow" }, + { "@type": "Thing", "name": "Hydroclimate variability" }, + { "@type": "Thing", "name": "Drought" }, + { "@type": "Thing", "name": "Freshwater resources" } + ], + "sameAs": [ + "https://doi.org/10.6084/m9.figshare.9228176" + ] +} diff --git a/data/objects/summoned/generated/G-RUN/prompt.txt b/data/objects/summoned/generated/G-RUN/prompt.txt new file mode 100644 index 0000000..a649df3 --- /dev/null +++ b/data/objects/summoned/generated/G-RUN/prompt.txt @@ -0,0 +1,53 @@ +You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. + +**Task**: Create a valid JSON-LD document for the following dataset. + +**Dataset Information**: +- Name: G-RUN +- URL: https://figshare.com/articles/dataset/GRUN_Global_Runoff_Reconstruction/9228176 +- Description: Global runoff reconstruction (GRUN: Global Runoff Reconstruction). +- Group/Category: hydrology +- Creator: +- Provider: +- Publisher: +- Keywords: +- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") +- Extracted Metadata: Hosted on Figshare. G-RUN / GRUN is a global runoff reconstruction. Infer creator, citation, temporal coverage, and variables from the Figshare page and any linked paper (e.g. DOI or journal article). Include distribution with the Figshare article URL and DOI if available. + +**Reference Example** (from existing JSON-LD in this project): +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://doi.org/10.6084/m9.figshare.13661312.v1#dataset", + "name": "Global Multi-layer Soil Moisture Products", + "url": "https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312", + "description": "Global multi-layer soil moisture products covering 1970–2016.", + "keywords": ["soil moisture", "global", "Figshare", "hydrology"], + "creator": [{"@type": "Person", "name": "Yaoping Wang"}, {"@type": "Person", "name": "Jiafu Mao"}], + "publisher": [{"@type": "Organization", "name": "Figshare", "url": "https://figshare.com/"}], + "temporalCoverage": "1970-01-01/2016-12-31", + "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "20,-40 50,10"}}, + "variableMeasured": [{"@type": "PropertyValue", "name": "Soil moisture", "description": "Multi-layer soil moisture"}], + "encodingFormat": ["application/zip", "application/x-netcdf"], + "license": "https://creativecommons.org/licenses/by/4.0/", + "distribution": [{"@type": "DataDownload", "contentUrl": "https://doi.org/10.6084/m9.figshare.13661312.v1", "encodingFormat": ["text/html"]}] +} + +**Requirements**: +1. Use Schema.org vocabulary (https://schema.org/) +2. Set @context to `{"@vocab": "https://schema.org/"}` +3. Set @type to "Dataset" +4. Include @id with the dataset URL or DOI (e.g. Figshare article URL with #dataset or DOI) +5. Include all available metadata fields +6. Creator/publisher: infer from Figshare page (authors and Figshare as publisher) +7. Include distribution with contentUrl to the Figshare dataset/DOI +8. temporalCoverage: infer from title/description (e.g. reconstruction period) +9. spatialCoverage: Place with geo GeoShape, box MUST be "west,south east,north" — use "20,-40 50,10" +10. Include license and access information (Figshare often CC-BY) +11. Use proper JSON-LD structure (arrays for multiple values) +12. "keywords" as a JSON array of strings — never semicolon/comma-separated string +13. "encodingFormat" as a JSON array of strings — never semicolon/comma-separated string +14. Add exactly: "comment": "This dataset metadata was generated by AI." + +**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/G-RUN/webpage.jsonld b/data/objects/summoned/generated/G-RUN/webpage.jsonld new file mode 100644 index 0000000..af3ade7 --- /dev/null +++ b/data/objects/summoned/generated/G-RUN/webpage.jsonld @@ -0,0 +1,38 @@ +{ + "@context": "https://schema.org/", + "@type": "WebPage", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://figshare.com/articles/dataset/GRUN_Global_Runoff_Reconstruction/9228176", + "name": "G-RUN (GRUN): Global Runoff Reconstruction", + "description": "Webpage for G-RUN (GRUN) on Figshare—global gridded monthly runoff reconstruction 1902–2014 at 0.5° resolution. By Gionata Ghiggi, Vincent Humphrey, Sonia I. Seneviratne, Lukas Gudmundsson (ETH Zurich). Published in Earth System Science Data (2019).", + "url": "https://figshare.com/articles/dataset/GRUN_Global_Runoff_Reconstruction/9228176", + "inLanguage": "en", + "isPartOf": { + "@type": "WebSite", + "name": "Figshare", + "url": "https://figshare.com/" + }, + "about": { + "@type": "Dataset", + "name": "G-RUN", + "url": "https://doi.org/10.6084/m9.figshare.9228176" + }, + "publisher": { + "@type": "Organization", + "name": "Figshare", + "url": "https://figshare.com/" + }, + "mainEntity": { + "@type": "Dataset", + "name": "G-RUN (GRUN): Global Runoff Reconstruction", + "url": "https://figshare.com/articles/dataset/GRUN_Global_Runoff_Reconstruction/9228176#dataset" + }, + "keywords": [ + "G-RUN", + "GRUN", + "global runoff", + "hydrology", + "Figshare", + "ETH Zurich" + ] +} diff --git a/data/objects/summoned/generated/GHSL/ghsl.jsonld b/data/objects/summoned/generated/GHSL/ghsl.jsonld new file mode 100644 index 0000000..323b5fc --- /dev/null +++ b/data/objects/summoned/generated/GHSL/ghsl.jsonld @@ -0,0 +1,156 @@ +{ + "@context": { + "@vocab": "https://schema.org/" + }, + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://human-settlement.emergency.copernicus.eu/download.php#dataset", + "name": "GHSL (Global Human Settlement Layer)", + "url": "https://human-settlement.emergency.copernicus.eu/download.php", + "description": "The Global Human Settlement Layer (GHSL) is a global, multi-temporal suite of geospatial datasets describing human settlements, built-up areas, and population distribution derived from Earth Observation and population data. GHSL products include gridded built-up surface, built-up volume, building height and settlement characteristics, population grids, and settlement typologies (e.g., Degree of Urbanisation / settlement model). Data are provided at multiple spatial resolutions and coordinate systems and span multiple epochs, including short-range projections for 1975–2030 for key products.", + "keywords": [ + "GHSL", + "Global Human Settlement Layer", + "human settlements", + "built-up area", + "built-up surface", + "built-up volume", + "building height", + "population grid", + "settlement model", + "degree of urbanisation", + "urbanization", + "exposure mapping", + "remote sensing", + "Copernicus", + "JRC" + ], + "creator": [ + { + "@type": "Organization", + "name": "European Commission, Joint Research Centre (JRC)", + "url": "https://data.jrc.ec.europa.eu/collection/ghsl" + } + ], + "provider": [ + { + "@type": "Organization", + "name": "Copernicus Emergency Management Service", + "url": "https://emergency.copernicus.eu/" + } + ], + "publisher": [ + { + "@type": "Organization", + "name": "European Commission, Joint Research Centre (JRC)", + "url": "https://data.jrc.ec.europa.eu/collection/ghsl" + } + ], + "isAccessibleForFree": true, + "license": "https://creativecommons.org/licenses/by/4.0/", + "temporalCoverage": "1975-01-01/2030-12-31", + "spatialCoverage": { + "@type": "Place", + "geo": { + "@type": "GeoShape", + "box": "20,-40 50,10" + } + }, + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "Built-up surface", + "description": "Gridded built-up surface area (including total and non-residential components for some products)." + }, + { + "@type": "PropertyValue", + "name": "Built-up volume", + "description": "Gridded built-up volume estimates (including total and non-residential components for some products)." + }, + { + "@type": "PropertyValue", + "name": "Building height", + "description": "Gridded building height estimates for selected reference years/products." + }, + { + "@type": "PropertyValue", + "name": "Population", + "description": "Residential population grid (number of people per cell) for multiple epochs and projections." + }, + { + "@type": "PropertyValue", + "name": "Settlement typology / Degree of Urbanisation", + "description": "Settlement model classification based on the UN-recommended Degree of Urbanisation methodology." + } + ], + "encodingFormat": [ + "text/html", + "application/zip", + "image/tiff", + "application/geotiff", + "text/csv", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/geopackage+sqlite3" + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "GHSL direct download (advanced users)", + "description": "Direct download interface for GHSL products (tile-based and single-file downloads).", + "contentUrl": "https://human-settlement.emergency.copernicus.eu/download.php", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "GHSL download wizard (step-by-step)", + "description": "Interactive wizard to select GHSL product group, dataset, epoch, resolution, and coordinate system for download.", + "contentUrl": "https://human-settlement.emergency.copernicus.eu/downloadWizard.php", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "Use conditions and how to cite", + "description": "GHSL use conditions and citation guidance, including license and reference publication pointers.", + "contentUrl": "https://human-settlement.emergency.copernicus.eu/GHSLhowToCite.php", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "JRC GHSL collection landing page", + "description": "European Commission JRC collection page aggregating GHSL datasets and releases.", + "contentUrl": "https://data.jrc.ec.europa.eu/collection/ghsl", + "encodingFormat": ["text/html"] + } + ], + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "Advances on the Global Human Settlement Layer by joint assessment of Earth Observation and population survey data", + "author": [ + { "@type": "Person", "name": "M. Pesaresi" }, + { "@type": "Person", "name": "M. Schiavina" }, + { "@type": "Person", "name": "P. Politis" }, + { "@type": "Person", "name": "S. Freire" }, + { "@type": "Person", "name": "K. Krasnodębska" }, + { "@type": "Person", "name": "J. H. Uhl" }, + { "@type": "Person", "name": "T. Kemper" } + ], + "isPartOf": { + "@type": "Periodical", + "name": "International Journal of Digital Earth" + }, + "datePublished": "2024", + "sameAs": "https://doi.org/10.1080/17538947.2024.2390454" + } + ], + "about": [ + { "@type": "Thing", "name": "Human settlements" }, + { "@type": "Thing", "name": "Built environment" }, + { "@type": "Thing", "name": "Population distribution" }, + { "@type": "Thing", "name": "Urbanization" }, + { "@type": "Thing", "name": "Remote sensing" } + ], + "sameAs": [ + "https://human-settlement.emergency.copernicus.eu/" + ] +} diff --git a/data/objects/summoned/generated/GHSL/prompt.txt b/data/objects/summoned/generated/GHSL/prompt.txt new file mode 100644 index 0000000..ad6fc45 --- /dev/null +++ b/data/objects/summoned/generated/GHSL/prompt.txt @@ -0,0 +1,53 @@ +You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. + +**Task**: Create a valid JSON-LD document for the following dataset. + +**Dataset Information**: +- Name: GHSL (Global Human Settlement Layer) +- URL: https://human-settlement.emergency.copernicus.eu/download.php +- Description: Built-up areas, population and settlements (global human settlement layer products). +- Group/Category: human +- Creator: +- Provider: +- Publisher: +- Keywords: +- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") +- Extracted Metadata: Hosted on Copernicus Emergency Management Service (human settlement). GHSL provides global data on built-up areas, population distribution, and human settlements at multiple resolutions and time periods. Infer creator (e.g. European Commission Joint Research Centre), publisher, temporal coverage, variables, and distribution from the download page and any linked documentation. Include distribution with the download page URL and any DOI or data access URLs if available. + +**Reference Example** (from existing JSON-LD in this project): +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://doi.org/10.6084/m9.figshare.13661312.v1#dataset", + "name": "Global Multi-layer Soil Moisture Products", + "url": "https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312", + "description": "Global multi-layer soil moisture products covering 1970–2016.", + "keywords": ["soil moisture", "global", "Figshare", "hydrology"], + "creator": [{"@type": "Person", "name": "Yaoping Wang"}, {"@type": "Organization", "name": "Oak Ridge National Laboratory"}], + "publisher": [{"@type": "Organization", "name": "Figshare", "url": "https://figshare.com/"}], + "temporalCoverage": "1970-01-01/2016-12-31", + "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "20,-40 50,10"}}, + "variableMeasured": [{"@type": "PropertyValue", "name": "Soil moisture", "description": "Multi-layer soil moisture"}], + "encodingFormat": ["application/zip", "application/x-netcdf"], + "license": "https://creativecommons.org/licenses/by/4.0/", + "distribution": [{"@type": "DataDownload", "contentUrl": "https://doi.org/10.6084/m9.figshare.13661312.v1", "encodingFormat": ["text/html"]}] +} + +**Requirements**: +1. Use Schema.org vocabulary (https://schema.org/) +2. Set @context to `{"@vocab": "https://schema.org/"}` +3. Set @type to "Dataset" +4. Include @id with the dataset URL or a stable identifier (e.g. the download page URL with #dataset) +5. Include all available metadata fields +6. Creator/publisher: infer from the Copernicus/JRC site (e.g. European Commission Joint Research Centre, Copernicus) +7. Include distribution with contentUrl to the download page or data access URL; use encodingFormat as a JSON array (e.g. ["text/html"] or list file formats if known) +8. temporalCoverage: infer from the site (GHSL has multi-epoch data, e.g. 1975, 1990, 2000, 2015) +9. spatialCoverage: Place with geo GeoShape, box MUST be "west,south east,north" — use "20,-40 50,10" +10. Include license if stated (Copernicus data often free use with attribution) +11. Use proper JSON-LD structure (arrays for multiple values) +12. "keywords" as a JSON array of strings — never semicolon/comma-separated string +13. "encodingFormat" as a JSON array of strings — never semicolon/comma-separated string +14. Add exactly: "comment": "This dataset metadata was generated by AI." + +**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/GHSL/webpage.jsonld b/data/objects/summoned/generated/GHSL/webpage.jsonld new file mode 100644 index 0000000..eb80f7e --- /dev/null +++ b/data/objects/summoned/generated/GHSL/webpage.jsonld @@ -0,0 +1,38 @@ +{ + "@context": "https://schema.org/", + "@type": "WebPage", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://human-settlement.emergency.copernicus.eu/download.php", + "name": "GHSL (Global Human Settlement Layer) — Download", + "description": "Download page for the Global Human Settlement Layer (GHSL): built-up areas, population and settlement data from European Commission JRC / Copernicus Emergency Management Service.", + "url": "https://human-settlement.emergency.copernicus.eu/download.php", + "inLanguage": "en", + "isPartOf": { + "@type": "WebSite", + "name": "Copernicus Emergency Management Service", + "url": "https://emergency.copernicus.eu/" + }, + "about": { + "@type": "Dataset", + "name": "GHSL", + "url": "https://human-settlement.emergency.copernicus.eu/download.php#dataset" + }, + "publisher": { + "@type": "Organization", + "name": "European Commission, Joint Research Centre (JRC)", + "url": "https://data.jrc.ec.europa.eu/collection/ghsl" + }, + "mainEntity": { + "@type": "Dataset", + "name": "GHSL (Global Human Settlement Layer)", + "url": "https://human-settlement.emergency.copernicus.eu/download.php#dataset" + }, + "keywords": [ + "GHSL", + "human settlements", + "built-up", + "population", + "Copernicus", + "JRC" + ] +} diff --git a/data/objects/summoned/generated/GRACE-REC/grace-rec.jsonld b/data/objects/summoned/generated/GRACE-REC/grace-rec.jsonld new file mode 100644 index 0000000..0d7e052 --- /dev/null +++ b/data/objects/summoned/generated/GRACE-REC/grace-rec.jsonld @@ -0,0 +1,149 @@ +{ + "@context": { + "@vocab": "https://schema.org/" + }, + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://doi.org/10.6084/m9.figshare.7670849#dataset", + "name": "GRACE-REC: A reconstruction of climate-driven water storage changes over the last century", + "url": "https://figshare.com/articles/dataset/GRACE-REC_A_reconstruction_of_climate-driven_water_storage_changes_over_the_last_century/7670849", + "description": "GRACE-REC provides reconstructed terrestrial water storage (TWS) anomalies driven by climate variability, trained on GRACE observations and meteorological forcing data. The reconstruction is provided at 0.5° spatial resolution and is available at daily and monthly temporal scales over the period 1901 to present (with updates released regularly). The product is designed for analyses of long-term hydroclimate variability, drought and flood assessment, and interpretation of GRACE/GRACE-FO water storage records and gaps.", + "keywords": [ + "GRACE-REC", + "terrestrial water storage", + "TWS", + "water storage anomalies", + "hydrology", + "hydroclimate", + "drought", + "flood", + "GRACE", + "GRACE-FO", + "reconstruction", + "climate-driven", + "0.5 degree", + "monthly", + "daily", + "Figshare" + ], + "creator": [ + { + "@type": "Person", + "name": "Vincent Humphrey", + "affiliation": { + "@type": "Organization", + "name": "Institute for Atmospheric and Climate Science, ETH Zurich", + "url": "https://ethz.ch/en.html" + } + }, + { + "@type": "Person", + "name": "Lukas Gudmundsson", + "affiliation": { + "@type": "Organization", + "name": "Institute for Atmospheric and Climate Science, ETH Zurich", + "url": "https://ethz.ch/en.html" + } + } + ], + "provider": [ + { + "@type": "Organization", + "name": "Figshare", + "url": "https://figshare.com/" + } + ], + "publisher": [ + { + "@type": "Organization", + "name": "Figshare", + "url": "https://figshare.com/" + } + ], + "isAccessibleForFree": true, + "temporalCoverage": "1901-01-01/2019-12-31", + "spatialCoverage": { + "@type": "Place", + "geo": { + "@type": "GeoShape", + "box": "20,-40 50,10" + } + }, + "spatialResolution": "0.5 degree", + "temporalResolution": [ + "P1D", + "P1M" + ], + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "Terrestrial water storage anomaly", + "description": "Reconstructed climate-driven terrestrial water storage anomalies (TWSA) derived from statistical modeling calibrated to GRACE observations." + }, + { + "@type": "PropertyValue", + "name": "Ensemble members", + "description": "Multiple ensemble realizations to quantify predictive uncertainty for reconstructed TWS anomalies." + } + ], + "measurementTechnique": [ + "Statistical reconstruction calibrated with GRACE observations", + "Reconstruction driven by meteorological forcing datasets at daily and monthly scales" + ], + "encodingFormat": [ + "application/x-netcdf", + "application/zip", + "text/html" + ], + "license": "https://creativecommons.org/licenses/by/4.0/", + "distribution": [ + { + "@type": "DataDownload", + "name": "Figshare dataset landing page", + "description": "Dataset landing page and downloads hosted on Figshare.", + "contentUrl": "https://figshare.com/articles/dataset/GRACE-REC_A_reconstruction_of_climate-driven_water_storage_changes_over_the_last_century/7670849", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "Figshare DOI landing page", + "description": "Persistent DOI landing page for the dataset on Figshare.", + "contentUrl": "https://doi.org/10.6084/m9.figshare.7670849", + "encodingFormat": ["text/html"] + } + ], + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "GRACE-REC: a reconstruction of climate-driven water storage changes over the last century", + "author": [ + { "@type": "Person", "name": "Vincent Humphrey" }, + { "@type": "Person", "name": "Lukas Gudmundsson" } + ], + "isPartOf": { + "@type": "Periodical", + "name": "Earth System Science Data" + }, + "datePublished": "2019", + "identifier": [ + { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.5194/essd-11-1153-2019" + } + ], + "sameAs": "https://doi.org/10.5194/essd-11-1153-2019" + } + ], + "about": [ + { "@type": "Thing", "name": "Hydrology" }, + { "@type": "Thing", "name": "Terrestrial water storage" }, + { "@type": "Thing", "name": "Climate variability" }, + { "@type": "Thing", "name": "Drought" }, + { "@type": "Thing", "name": "Floods" }, + { "@type": "Thing", "name": "GRACE satellite gravimetry" } + ], + "sameAs": [ + "https://doi.org/10.6084/m9.figshare.7670849" + ] +} diff --git a/data/objects/summoned/generated/GRACE-REC/prompt.txt b/data/objects/summoned/generated/GRACE-REC/prompt.txt new file mode 100644 index 0000000..be945c6 --- /dev/null +++ b/data/objects/summoned/generated/GRACE-REC/prompt.txt @@ -0,0 +1,53 @@ +You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. + +**Task**: Create a valid JSON-LD document for the following dataset. + +**Dataset Information**: +- Name: GRACE-REC +- URL: https://figshare.com/articles/dataset/GRACE-REC_A_reconstruction_of_climate-driven_water_storage_changes_over_the_last_century/7670849 +- Description: Terrestrial water storage (reconstruction of climate-driven water storage changes over the last century; GRACE-REC). +- Group/Category: hydrology +- Creator: +- Provider: +- Publisher: +- Keywords: +- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") +- Extracted Metadata: Hosted on Figshare. GRACE-REC is a reconstruction of terrestrial water storage changes (e.g. from GRACE and related data) over the last century. Infer creator, citation, temporal coverage, and variables from the Figshare page and any linked paper (e.g. DOI or journal article). Include distribution with the Figshare article URL and DOI if available. + +**Reference Example** (from existing JSON-LD in this project): +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://doi.org/10.6084/m9.figshare.13661312.v1#dataset", + "name": "Global Multi-layer Soil Moisture Products", + "url": "https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312", + "description": "Global multi-layer soil moisture products covering 1970–2016.", + "keywords": ["soil moisture", "global", "Figshare", "hydrology"], + "creator": [{"@type": "Person", "name": "Yaoping Wang"}, {"@type": "Person", "name": "Jiafu Mao"}], + "publisher": [{"@type": "Organization", "name": "Figshare", "url": "https://figshare.com/"}], + "temporalCoverage": "1970-01-01/2016-12-31", + "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "20,-40 50,10"}}, + "variableMeasured": [{"@type": "PropertyValue", "name": "Soil moisture", "description": "Multi-layer soil moisture"}], + "encodingFormat": ["application/zip", "application/x-netcdf"], + "license": "https://creativecommons.org/licenses/by/4.0/", + "distribution": [{"@type": "DataDownload", "contentUrl": "https://doi.org/10.6084/m9.figshare.13661312.v1", "encodingFormat": ["text/html"]}] +} + +**Requirements**: +1. Use Schema.org vocabulary (https://schema.org/) +2. Set @context to `{"@vocab": "https://schema.org/"}` +3. Set @type to "Dataset" +4. Include @id with the dataset URL or DOI (e.g. Figshare article URL with #dataset or DOI) +5. Include all available metadata fields +6. Creator/publisher: infer from Figshare page (authors and Figshare as publisher) +7. Include distribution with contentUrl to the Figshare dataset/DOI +8. temporalCoverage: infer from title/description (e.g. "last century" → ~1900–2000 or as stated) +9. spatialCoverage: Place with geo GeoShape, box MUST be "west,south east,north" — use "20,-40 50,10" +10. Include license and access information (Figshare often CC-BY) +11. Use proper JSON-LD structure (arrays for multiple values) +12. "keywords" as a JSON array of strings — never semicolon/comma-separated string +13. "encodingFormat" as a JSON array of strings — never semicolon/comma-separated string +14. Add exactly: "comment": "This dataset metadata was generated by AI." + +**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/GRACE-REC/webpage.jsonld b/data/objects/summoned/generated/GRACE-REC/webpage.jsonld new file mode 100644 index 0000000..181138b --- /dev/null +++ b/data/objects/summoned/generated/GRACE-REC/webpage.jsonld @@ -0,0 +1,37 @@ +{ + "@context": "https://schema.org/", + "@type": "WebPage", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://figshare.com/articles/dataset/GRACE-REC_A_reconstruction_of_climate-driven_water_storage_changes_over_the_last_century/7670849", + "name": "GRACE-REC: A reconstruction of climate-driven water storage changes over the last century", + "description": "Webpage for GRACE-REC on Figshare—reconstructed terrestrial water storage (TWS) anomalies 1901 to present at 0.5° resolution, daily and monthly. By Vincent Humphrey and Lukas Gudmundsson (ETH Zurich). Published in Earth System Science Data (2019).", + "url": "https://figshare.com/articles/dataset/GRACE-REC_A_reconstruction_of_climate-driven_water_storage_changes_over_the_last_century/7670849", + "inLanguage": "en", + "isPartOf": { + "@type": "WebSite", + "name": "Figshare", + "url": "https://figshare.com/" + }, + "about": { + "@type": "Dataset", + "name": "GRACE-REC", + "url": "https://doi.org/10.6084/m9.figshare.7670849" + }, + "publisher": { + "@type": "Organization", + "name": "Figshare", + "url": "https://figshare.com/" + }, + "mainEntity": { + "@type": "Dataset", + "name": "GRACE-REC: A reconstruction of climate-driven water storage changes over the last century", + "url": "https://doi.org/10.6084/m9.figshare.7670849#dataset" + }, + "keywords": [ + "GRACE-REC", + "terrestrial water storage", + "hydrology", + "Figshare", + "ETH Zurich" + ] +} diff --git a/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/global-multi-layer-soil-moisture.jsonld b/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/global-multi-layer-soil-moisture.jsonld new file mode 100644 index 0000000..4020296 --- /dev/null +++ b/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/global-multi-layer-soil-moisture.jsonld @@ -0,0 +1,147 @@ +{ + "@context": { + "@vocab": "https://schema.org/" + }, + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://doi.org/10.6084/m9.figshare.13661312.v1#dataset", + "name": "Global Multi-layer Soil Moisture Products", + "url": "https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312/1?file=26220602", + "description": "Global multi-layer soil moisture products covering 1970–2016 at 0.5° spatial resolution and monthly temporal resolution, provided for four soil depth layers (0–10 cm, 10–30 cm, 30–50 cm, 50–100 cm). The dataset provides global, gap-free soil moisture fields intended for large-scale hydrologic, climate, and land-surface applications.", + "keywords": [ + "soil moisture", + "multi-layer soil moisture", + "root-zone soil moisture", + "global", + "monthly", + "0.5 degree", + "soil hydrology", + "land surface", + "climate", + "Earth system", + "1970-2016", + "Figshare" + ], + "creator": [ + { + "@type": "Person", + "name": "Yaoping Wang", + "affiliation": { + "@type": "Organization", + "name": "Oak Ridge National Laboratory", + "url": "https://www.ornl.gov/" + } + }, + { + "@type": "Person", + "name": "Jiafu Mao", + "affiliation": { + "@type": "Organization", + "name": "Oak Ridge National Laboratory", + "url": "https://www.ornl.gov/" + } + } + ], + "provider": [ + { + "@type": "Organization", + "name": "Figshare", + "url": "https://figshare.com/" + } + ], + "publisher": [ + { + "@type": "Organization", + "name": "Figshare", + "url": "https://figshare.com/" + } + ], + "isAccessibleForFree": true, + "temporalCoverage": "1970-01-01/2016-12-31", + "spatialCoverage": { + "@type": "Place", + "geo": { + "@type": "GeoShape", + "box": "20,-40 50,10" + } + }, + "spatialResolution": "0.5 degree", + "temporalResolution": "P1M", + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "Soil moisture (0–10 cm)", + "description": "Monthly soil moisture for the 0–10 cm layer." + }, + { + "@type": "PropertyValue", + "name": "Soil moisture (10–30 cm)", + "description": "Monthly soil moisture for the 10–30 cm layer." + }, + { + "@type": "PropertyValue", + "name": "Soil moisture (30–50 cm)", + "description": "Monthly soil moisture for the 30–50 cm layer." + }, + { + "@type": "PropertyValue", + "name": "Soil moisture (50–100 cm)", + "description": "Monthly soil moisture for the 50–100 cm layer." + } + ], + "encodingFormat": [ + "application/zip", + "application/x-netcdf", + "text/html" + ], + "license": "https://creativecommons.org/licenses/by/4.0/", + "distribution": [ + { + "@type": "DataDownload", + "name": "Figshare dataset landing page (DOI)", + "description": "Persistent DOI landing page for the dataset on Figshare.", + "contentUrl": "https://doi.org/10.6084/m9.figshare.13661312.v1", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "Figshare dataset file access (version 1, file link)", + "description": "Direct access link to a dataset file hosted on Figshare (may require browser access controls).", + "contentUrl": "https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312/1?file=26220602", + "encodingFormat": ["application/zip"] + } + ], + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "Development of observation-based global multilayer soil moisture products for 1970 to 2016", + "author": [ + { "@type": "Person", "name": "Yaoping Wang" }, + { "@type": "Person", "name": "Jiafu Mao" } + ], + "isPartOf": { + "@type": "Periodical", + "name": "Earth System Science Data" + }, + "datePublished": "2021", + "identifier": [ + { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.5194/essd-13-4385-2021" + } + ], + "sameAs": "https://doi.org/10.5194/essd-13-4385-2021" + } + ], + "about": [ + { "@type": "Thing", "name": "Soil" }, + { "@type": "Thing", "name": "Soil moisture" }, + { "@type": "Thing", "name": "Hydrology" }, + { "@type": "Thing", "name": "Climate" }, + { "@type": "Thing", "name": "Land surface processes" } + ], + "sameAs": [ + "https://doi.org/10.6084/m9.figshare.13661312.v1" + ] +} diff --git a/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/prompt.txt b/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/prompt.txt new file mode 100644 index 0000000..d7277b6 --- /dev/null +++ b/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/prompt.txt @@ -0,0 +1,53 @@ +You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. + +**Task**: Create a valid JSON-LD document for the following dataset. + +**Dataset Information**: +- Name: Global Multi-layer Soil Moisture +- URL: https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312/1?file=26220602 +- Description: Soil moisture datasets that cover the globe and the time period 1970–2016, at a spatial resolution of 0.5 degrees, time step of monthly, and vertical resolution of four soil layers (0-10cm, 10-30cm, 30-50cm, 50-100cm). +- Group/Category: soil +- Creator: +- Provider: +- Publisher: +- Keywords: +- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") +- Extracted Metadata: Hosted on Figshare. Global multi-layer soil moisture products; 0.5° resolution, monthly, four depth layers. Infer creator/publisher from Figshare and any paper or project cited on the Figshare page (e.g. GLDAS, reanalysis, or research group). Include variableMeasured for the four soil layers and temporalCoverage 1970–2016. + +**Reference Example** (from existing JSON-LD in this project): +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://soilgrids.org/#dataset", + "name": "SoilGrids2 (SoilGrids 2.0 global soil property maps)", + "url": "https://soilgrids.org/", + "description": "SoilGrids2 provides global gridded soil property maps at approximately 250 m spatial resolution.", + "keywords": ["SoilGrids2", "ISRIC", "global soil maps", "soil moisture", "soil properties"], + "creator": [{"@type": "Organization", "name": "ISRIC", "url": "https://www.isric.org/"}], + "publisher": [{"@type": "Organization", "name": "ISRIC", "url": "https://www.isric.org/"}], + "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "20,-40 50,10"}}, + "temporalCoverage": "1970-01-01/2016-12-31", + "variableMeasured": [{"@type": "PropertyValue", "name": "Soil moisture", "description": "Multi-layer soil moisture"}], + "encodingFormat": ["image/tiff", "application/zip"], + "license": "https://creativecommons.org/licenses/by/4.0/", + "distribution": [{"@type": "DataDownload", "contentUrl": "https://figshare.com/", "encodingFormat": ["application/zip"]}] +} + +**Requirements**: +1. Use Schema.org vocabulary (https://schema.org/) +2. Set @context to `{"@vocab": "https://schema.org/"}` +3. Set @type to "Dataset" +4. Include @id with the dataset URL or identifier (e.g. the Figshare article URL with #dataset) +5. Include all available metadata fields +6. For creator/publisher use Organization (Figshare and/or the data producers if known from the page) +7. Include distribution with contentUrl pointing to the Figshare dataset/download +8. temporalCoverage: use "1970-01-01/2016-12-31" (or as stated on Figshare) +9. spatialCoverage: Place with geo GeoShape, box MUST be "west,south east,north" — use "20,-40 50,10" +10. Include license and access information (Figshare often uses CC-BY) +11. Use proper JSON-LD structure (arrays for multiple values) +12. "keywords" as a JSON array of strings — never semicolon/comma-separated string +13. "encodingFormat" as a JSON array of strings — never semicolon/comma-separated string +14. Add exactly: "comment": "This dataset metadata was generated by AI." + +**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/webpage.jsonld b/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/webpage.jsonld new file mode 100644 index 0000000..605b349 --- /dev/null +++ b/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/webpage.jsonld @@ -0,0 +1,37 @@ +{ + "@context": "https://schema.org/", + "@type": "WebPage", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312", + "name": "Global Multi-layer Soil Moisture Products", + "description": "Webpage for the Global Multi-layer Soil Moisture Products dataset on Figshare—global soil moisture 1970–2016 at 0.5° resolution, monthly, four depth layers (0–10 cm, 10–30 cm, 30–50 cm, 50–100 cm). Authors: Yaoping Wang, Jiafu Mao (Oak Ridge National Laboratory). Published in Earth System Science Data.", + "url": "https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312", + "inLanguage": "en", + "isPartOf": { + "@type": "WebSite", + "name": "Figshare", + "url": "https://figshare.com/" + }, + "about": { + "@type": "Dataset", + "name": "Global Multi-layer Soil Moisture Products", + "url": "https://doi.org/10.6084/m9.figshare.13661312.v1" + }, + "publisher": { + "@type": "Organization", + "name": "Figshare", + "url": "https://figshare.com/" + }, + "mainEntity": { + "@type": "Dataset", + "name": "Global Multi-layer Soil Moisture Products", + "url": "https://doi.org/10.6084/m9.figshare.13661312.v1#dataset" + }, + "keywords": [ + "soil moisture", + "multi-layer", + "global", + "Figshare", + "Oak Ridge National Laboratory" + ] +} diff --git a/data/objects/summoned/generated/HydroSHEDS/hydrosheds.jsonld b/data/objects/summoned/generated/HydroSHEDS/hydrosheds.jsonld new file mode 100644 index 0000000..896a332 --- /dev/null +++ b/data/objects/summoned/generated/HydroSHEDS/hydrosheds.jsonld @@ -0,0 +1,191 @@ +{ + "@context": { + "@vocab": "https://schema.org/" + }, + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://www.hydrosheds.org/#dataset", + "name": "HydroSHEDS (Hydrological data and maps based on Shuttle Elevation Derivatives at multiple Scales)", + "url": "https://www.hydrosheds.org/", + "description": "HydroSHEDS provides freely available global hydrographic and hydrological baseline data layers derived primarily from spaceborne elevation data (notably SRTM) and related processing. The HydroSHEDS product suite supports hydro-ecological research and applications worldwide and includes gridded core layers (e.g., void-filled DEM, hydrologically conditioned DEM, flow direction, flow accumulation, flow length, land mask/sinks) as well as derived hydrographic products such as catchment and sub-basin boundaries, river networks, and lakes. Products are available at multiple resolutions and scales (e.g., 3 arc-second, 15 arc-second, 30 arc-second and coarser) and are distributed in standard GIS formats for regional and global analysis.", + "keywords": [ + "HydroSHEDS", + "hydrography", + "hydrology", + "watersheds", + "catchments", + "drainage basins", + "river networks", + "stream network", + "flow direction", + "flow accumulation", + "flow length", + "hydrologically conditioned DEM", + "SRTM", + "digital elevation model", + "freshwater", + "GIS", + "GeoTIFF", + "shapefile" + ], + "creator": [ + { + "@type": "Organization", + "name": "World Wildlife Fund (WWF)", + "url": "https://www.worldwildlife.org/" + } + ], + "provider": [ + { + "@type": "Organization", + "name": "HydroSHEDS Project", + "url": "https://www.hydrosheds.org/" + } + ], + "publisher": [ + { + "@type": "Organization", + "name": "World Wildlife Fund (WWF)", + "url": "https://www.worldwildlife.org/" + } + ], + "isAccessibleForFree": true, + "spatialCoverage": { + "@type": "Place", + "geo": { + "@type": "GeoShape", + "box": "20,-40 50,10" + } + }, + "spatialResolution": [ + "3 arc-second", + "15 arc-second", + "30 arc-second" + ], + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "Catchment / sub-basin boundaries", + "description": "Vector catchment and sub-basin boundary products derived from HydroSHEDS hydrography." + }, + { + "@type": "PropertyValue", + "name": "River networks", + "description": "Vector river/stream network products derived from HydroSHEDS hydrography." + }, + { + "@type": "PropertyValue", + "name": "Lakes and water bodies", + "description": "Lake and water body products distributed as part of the HydroSHEDS product suite." + }, + { + "@type": "PropertyValue", + "name": "Void-filled DEM", + "description": "Digital elevation model underpinning HydroSHEDS core layers." + }, + { + "@type": "PropertyValue", + "name": "Conditioned DEM", + "description": "Hydrologically conditioned DEM used to derive flow products." + }, + { + "@type": "PropertyValue", + "name": "Flow direction", + "description": "Drainage direction grid derived from the conditioned DEM." + }, + { + "@type": "PropertyValue", + "name": "Flow accumulation", + "description": "Upstream contributing area / upstream cell count derived from flow direction." + }, + { + "@type": "PropertyValue", + "name": "Flow length", + "description": "Upstream and/or downstream flow length derived from flow direction." + }, + { + "@type": "PropertyValue", + "name": "Land mask and sinks", + "description": "Land/ocean mask and coastal/inland sink indicators used in HydroSHEDS processing." + } + ], + "encodingFormat": [ + "image/tiff", + "application/geotiff", + "application/zip", + "application/x-esri-shapefile", + "application/vnd.esri.filegdb" + ], + "license": "https://data.hydrosheds.org/file/technical-documentation/HydroSHEDS_TechDoc_v1_4.pdf", + "distribution": [ + { + "@type": "DataDownload", + "name": "HydroSHEDS website (overview)", + "description": "HydroSHEDS main website with product overview and navigation to downloads.", + "contentUrl": "https://www.hydrosheds.org/", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "HydroSHEDS core data downloads (GeoTIFF)", + "description": "Download page for HydroSHEDS core raster layers (e.g., DEM, conditioned DEM, flow direction, flow accumulation, flow length, land mask) in multiple resolutions, provided as GeoTIFF tiles and regional/global bundles.", + "contentUrl": "https://www.hydrosheds.org/hydrosheds-core-downloads", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "HydroSHEDS products index", + "description": "Products landing page providing access to HydroSHEDS datasets (core layers and derived hydrographic products).", + "contentUrl": "https://www.hydrosheds.org/products", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "HydroSHEDS technical documentation and license agreement", + "description": "Technical documentation for HydroSHEDS v1 including the HydroSHEDS v1 license agreement (Appendix A).", + "contentUrl": "https://data.hydrosheds.org/file/technical-documentation/HydroSHEDS_TechDoc_v1_4.pdf", + "encodingFormat": ["application/pdf"] + } + ], + "measurementTechnique": [ + "Hydrographic derivation from digital elevation models (primarily SRTM) including hydrologic conditioning and flow routing", + "Derivation of flow direction, flow accumulation, and related hydrological grids from conditioned elevation data", + "Generation of hydrographic features (basins/catchments, river networks, lakes) from processed hydrographic foundations" + ], + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "New global hydrography derived from spaceborne elevation data", + "author": [ + { "@type": "Person", "name": "B. Lehner" }, + { "@type": "Person", "name": "K. Verdin" }, + { "@type": "Person", "name": "A. Jarvis" } + ], + "isPartOf": { + "@type": "Periodical", + "name": "Eos, Transactions, American Geophysical Union" + }, + "datePublished": "2008", + "identifier": [ + { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.1029/2008EO100001" + } + ], + "sameAs": "https://doi.org/10.1029/2008EO100001" + } + ], + "about": [ + { "@type": "Thing", "name": "Hydrology" }, + { "@type": "Thing", "name": "Hydrography" }, + { "@type": "Thing", "name": "Watersheds" }, + { "@type": "Thing", "name": "River networks" }, + { "@type": "Thing", "name": "Catchments" }, + { "@type": "Thing", "name": "Digital elevation models" }, + { "@type": "Thing", "name": "Freshwater conservation" } + ], + "sameAs": [ + "https://www.worldwildlife.org/our-work/science/hydrosheds/" + ] +} diff --git a/data/objects/summoned/generated/HydroSHEDS/prompt.txt b/data/objects/summoned/generated/HydroSHEDS/prompt.txt new file mode 100644 index 0000000..90850c7 --- /dev/null +++ b/data/objects/summoned/generated/HydroSHEDS/prompt.txt @@ -0,0 +1,52 @@ +You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. + +**Task**: Create a valid JSON-LD document for the following dataset. + +**Dataset Information**: +- Name: HydroSHEDS +- URL: https://www.hydrosheds.org +- Description: Various hydrographic data products include catchment boundaries, river networks, and lakes at multiple resolutions and scales. +- Group/Category: hydrology +- Creator: +- Provider: +- Publisher: +- Keywords: +- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") +- Extracted Metadata: HydroSHEDS (Hydrological data and maps based on SHuttle Elevation Derivatives at multiple Scales) is a product of the World Wildlife Fund (WWF) and partners. Global hydrographic data: drainage basins, river networks, stream order, lakes, at multiple resolutions (e.g. 30 arc-second, 15 arc-second, 3 arc-second). Data derived from SRTM and other DEMs. Infer distribution and variables from typical HydroSHEDS products (catchment boundaries, flow direction, flow accumulation, river networks, etc.). + +**Reference Example** (from existing JSON-LD in this project): +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://soilgrids.org/#dataset", + "name": "SoilGrids2 (SoilGrids 2.0 global soil property maps)", + "url": "https://soilgrids.org/", + "description": "SoilGrids2 provides global gridded soil property maps at approximately 250 m spatial resolution.", + "keywords": ["SoilGrids2", "ISRIC", "global soil maps", "hydrology", "soil properties"], + "creator": [{"@type": "Organization", "name": "ISRIC", "url": "https://www.isric.org/"}], + "publisher": [{"@type": "Organization", "name": "ISRIC", "url": "https://www.isric.org/"}], + "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "20,-40 50,10"}}, + "variableMeasured": [{"@type": "PropertyValue", "name": "Bulk density", "description": "Soil bulk density"}], + "encodingFormat": ["image/tiff", "application/zip"], + "license": "https://creativecommons.org/licenses/by/4.0/", + "distribution": [{"@type": "DataDownload", "contentUrl": "https://example.org/data", "encodingFormat": ["image/tiff"]}] +} + +**Requirements**: +1. Use Schema.org vocabulary (https://schema.org/) +2. Set @context to `{"@vocab": "https://schema.org/"}` +3. Set @type to "Dataset" +4. Include @id with the dataset URL or identifier (e.g. https://www.hydrosheds.org/#dataset) +5. Include all available metadata fields +6. Creator/publisher: World Wildlife Fund (WWF) and/or HydroSHEDS project partners (https://www.hydrosheds.org or https://www.worldwildlife.org) +7. Include distribution (e.g. link to hydrosheds.org download or data page) +8. Add temporalCoverage if known +9. spatialCoverage: Place with geo GeoShape, box MUST be "west,south east,north" — use "20,-40 50,10" +10. Include license and access information (HydroSHEDS is often free for non-commercial / research; state as known) +11. Use proper JSON-LD structure (arrays for multiple values) +12. "keywords" as a JSON array of strings — never semicolon/comma-separated string +13. "encodingFormat" as a JSON array of strings — never semicolon/comma-separated string +14. Add exactly: "comment": "This dataset metadata was generated by AI." + +**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/HydroSHEDS/webpage.jsonld b/data/objects/summoned/generated/HydroSHEDS/webpage.jsonld new file mode 100644 index 0000000..32edacb --- /dev/null +++ b/data/objects/summoned/generated/HydroSHEDS/webpage.jsonld @@ -0,0 +1,37 @@ +{ + "@context": "https://schema.org/", + "@type": "WebPage", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://www.hydrosheds.org/", + "name": "HydroSHEDS: Hydrological data and maps based on Shuttle Elevation Derivatives at multiple Scales", + "description": "Webpage for HydroSHEDS—global hydrographic and hydrological baseline data (catchment boundaries, river networks, lakes, flow direction, flow accumulation, DEMs) at multiple resolutions, derived from SRTM and distributed by WWF and the HydroSHEDS project.", + "url": "https://www.hydrosheds.org/", + "inLanguage": "en", + "isPartOf": { + "@type": "WebSite", + "name": "World Wildlife Fund (WWF)", + "url": "https://www.worldwildlife.org/" + }, + "about": { + "@type": "Dataset", + "name": "HydroSHEDS", + "url": "https://www.hydrosheds.org/" + }, + "publisher": { + "@type": "Organization", + "name": "World Wildlife Fund (WWF)", + "url": "https://www.worldwildlife.org/" + }, + "mainEntity": { + "@type": "Dataset", + "name": "HydroSHEDS (Hydrological data and maps based on Shuttle Elevation Derivatives at multiple Scales)", + "url": "https://www.hydrosheds.org/#dataset" + }, + "keywords": [ + "HydroSHEDS", + "hydrography", + "hydrology", + "watersheds", + "WWF" + ] +} diff --git a/data/objects/summoned/generated/Hydrography90m/hydrography90m.jsonld b/data/objects/summoned/generated/Hydrography90m/hydrography90m.jsonld new file mode 100644 index 0000000..51395b7 --- /dev/null +++ b/data/objects/summoned/generated/Hydrography90m/hydrography90m.jsonld @@ -0,0 +1,194 @@ +{ + "@context": { + "@vocab": "https://schema.org/" + }, + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://hydrography.org/#dataset", + "name": "Hydrography90m", + "url": "https://hydrography.org/", + "description": "Hydrography90m is a high-resolution global hydrographic dataset derived from the MERIT Hydro digital elevation model at 3 arc-second (~90 m at the equator). It provides a globally seamless, standardized representation of stream channels and drainage basins with associated topographic and topological attributes for flow routing and network analysis. Products include stream and basin layers, sub-catchments linked to stream segments, stream order and slope metrics, flow-routing topology via unique segment identifiers, and tiled raster/vector layers suitable for large-scale hydrology, geomorphology, ecology, and environmental modeling applications.", + "keywords": [ + "Hydrography90m", + "hydrography", + "hydrology", + "drainage basins", + "catchments", + "sub-catchments", + "river networks", + "stream network", + "flow routing", + "stream order", + "stream slope", + "topographic attributes", + "topological attributes", + "MERIT Hydro", + "DEM", + "90 m", + "global" + ], + "creator": [ + { "@type": "Person", "name": "Giuseppe Amatulli" }, + { "@type": "Person", "name": "Jaime R. Garcia Marquez" }, + { "@type": "Person", "name": "Tushar Sethi" }, + { "@type": "Person", "name": "Jens Kiesel" }, + { "@type": "Person", "name": "Afroditi Grigoropoulou" }, + { "@type": "Person", "name": "Maria M. Üblacker" }, + { "@type": "Person", "name": "Longzhu Q. Shen" }, + { "@type": "Person", "name": "Sami Domisch" } + ], + "provider": [ + { + "@type": "Organization", + "name": "Hydrography.org", + "url": "https://hydrography.org/" + }, + { + "@type": "Organization", + "name": "Leibniz-Institute of Freshwater Ecology and Inland Fisheries (IGB)", + "url": "https://www.igb-berlin.de/en" + } + ], + "publisher": [ + { + "@type": "Organization", + "name": "Leibniz-Institute of Freshwater Ecology and Inland Fisheries (IGB)", + "url": "https://www.igb-berlin.de/en" + } + ], + "datePublished": "2022-08-09", + "isAccessibleForFree": true, + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-60 180,90" + } + }, + "spatialResolution": [ + "3 arc-second (~90 m at the equator)", + "90 m" + ], + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "Stream channels", + "description": "Global stream channel network with unique segment identifiers and topology attributes." + }, + { + "@type": "PropertyValue", + "name": "Drainage basins", + "description": "Global drainage basin delineations derived from flow routing." + }, + { + "@type": "PropertyValue", + "name": "Sub-catchments", + "description": "Sub-catchment polygons linked to individual stream segments." + }, + { + "@type": "PropertyValue", + "name": "Network topology", + "description": "Upstream/downstream connectivity and routing attributes for stream segments." + }, + { + "@type": "PropertyValue", + "name": "Stream order", + "description": "Stream order metrics computed for the network." + }, + { + "@type": "PropertyValue", + "name": "Stream slope", + "description": "Slope metrics computed along stream segments." + }, + { + "@type": "PropertyValue", + "name": "Distance metrics", + "description": "In-stream and among-stream distance measures for network analysis." + } + ], + "measurementTechnique": [ + "Derivation of hydrographic networks and basins from MERIT Hydro DEM (~90 m) using hydrologic conditioning, flow routing, and network extraction", + "Computation of stream topology, stream order, and stream slope metrics for global hydrographic analysis" + ], + "encodingFormat": [ + "application/zip", + "image/tiff", + "application/geotiff", + "application/x-esri-shapefile", + "application/octet-stream", + "text/html" + ], + "license": "https://creativecommons.org/licenses/by-nc/4.0/", + "distribution": [ + { + "@type": "DataDownload", + "name": "Hydrography90m website (project landing page)", + "description": "Project website for Hydrography90m with documentation and navigation to layers and downloads.", + "contentUrl": "https://hydrography.org/", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "Hydrography90m layers overview", + "description": "Overview of Hydrography90m layers available for download.", + "contentUrl": "https://hydrography.org/hydrography90m/hydrography90m_layers", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "Hydrography90m batch download script", + "description": "Scripted procedure to download tiled raster and vector layers of Hydrography90m.", + "contentUrl": "https://hydrography.org/hydrography90m/hydrography90m_download_script", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "IGB FRED dataset landing page (DOI)", + "description": "Institutional dataset record and distribution entry for Hydrography90m hosted by IGB (FRED).", + "contentUrl": "https://doi.org/10.18728/igb-fred-762.1", + "encodingFormat": ["text/html"] + } + ], + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "Hydrography90m: A new high-resolution global hydrographic dataset", + "author": [ + { "@type": "Person", "name": "Giuseppe Amatulli" }, + { "@type": "Person", "name": "Jaime R. Garcia Marquez" }, + { "@type": "Person", "name": "Tushar Sethi" }, + { "@type": "Person", "name": "Jens Kiesel" }, + { "@type": "Person", "name": "Afroditi Grigoropoulou" }, + { "@type": "Person", "name": "Maria M. Üblacker" }, + { "@type": "Person", "name": "Longzhu Q. Shen" }, + { "@type": "Person", "name": "Sami Domisch" } + ], + "isPartOf": { + "@type": "Periodical", + "name": "Earth System Science Data" + }, + "datePublished": "2022", + "identifier": [ + { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.5194/essd-14-4525-2022" + } + ], + "sameAs": "https://doi.org/10.5194/essd-14-4525-2022" + } + ], + "about": [ + { "@type": "Thing", "name": "Hydrology" }, + { "@type": "Thing", "name": "Hydrography" }, + { "@type": "Thing", "name": "Drainage basins" }, + { "@type": "Thing", "name": "River networks" }, + { "@type": "Thing", "name": "Flow routing" }, + { "@type": "Thing", "name": "Global environmental mapping" } + ], + "sameAs": [ + "https://www.igb-berlin.de/en/hydrography90m-dataset", + "https://doi.org/10.18728/igb-fred-762.1" + ] +} diff --git a/data/objects/summoned/generated/Hydrography90m/prompt.txt b/data/objects/summoned/generated/Hydrography90m/prompt.txt new file mode 100644 index 0000000..02e0d86 --- /dev/null +++ b/data/objects/summoned/generated/Hydrography90m/prompt.txt @@ -0,0 +1,53 @@ +You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. + +**Task**: Create a valid JSON-LD document for the following dataset. + +**Dataset Information**: +- Name: Hydrography90m +- URL: https://projects.gitlab.io/auth?domain=https://hydrography.org&state=SkAZJPAM4Sq5vq1JjCPPZA== +- Alternative/canonical URL: https://hydrography.org (the auth URL redirects to this domain; use hydrography.org for @id/url if appropriate for a stable identifier) +- Description: Hydrographic dataset describing topographic and topological properties of drainage basins and streams. +- Group/Category: hydrology +- Type: Sitemap (discovery via sitemap; dataset is hydrographic products at ~90 m resolution) +- Creator: +- Provider: +- Publisher: +- Keywords: +- Spatial Coverage: No box given in source; use global extent, e.g. "west,south east,north" = "-180,-60 180,90", or leave as global land/drainage coverage. +- Note: The project website has been reported as "website down" in the source spreadsheet; still create full metadata for discovery and for when the site is available again. +- Extracted Metadata: Hydrography90m provides hydrographic data (drainage basins, streams, topographic and topological properties) at approximately 90 m resolution. The project is associated with hydrography.org (and possibly GitLab). Infer creator/publisher if you know them (e.g. research group or institution); otherwise describe the dataset and use the dataset URL. + +**Reference Example** (from existing JSON-LD in this project): +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://www.hydrosheds.org/#dataset", + "name": "HydroSHEDS", + "url": "https://www.hydrosheds.org/", + "description": "Global hydrographic and hydrological baseline data.", + "keywords": ["HydroSHEDS", "hydrography", "hydrology", "watersheds", "river networks"], + "creator": [{"@type": "Organization", "name": "World Wildlife Fund (WWF)", "url": "https://www.worldwildlife.org/"}], + "publisher": [{"@type": "Organization", "name": "World Wildlife Fund (WWF)", "url": "https://www.worldwildlife.org/"}], + "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "-180,-60 180,90"}}, + "variableMeasured": [{"@type": "PropertyValue", "name": "River networks", "description": "Stream network"}], + "encodingFormat": ["image/tiff", "application/geotiff"], + "distribution": [{"@type": "DataDownload", "contentUrl": "https://example.org/data", "encodingFormat": ["image/tiff"]}] +} + +**Requirements**: +1. Use Schema.org vocabulary (https://schema.org/) +2. Set @context to `{"@vocab": "https://schema.org/"}` +3. Set @type to "Dataset" +4. Include @id with a stable URL (e.g. https://hydrography.org/#dataset if that is the canonical site; otherwise the provided URL with #dataset) +5. Include all available metadata fields +6. Creator/publisher: infer from hydrography.org / Hydrography90m project if known; otherwise use a generic description +7. Include distribution (e.g. link to hydrography.org or the sitemap/auth URL for when the site is up) +8. spatialCoverage: Use Place with geo GeoShape. Box format MUST be "west,south east,north". Use global extent "-180,-60 180,90" since no specific box was provided. +9. Include license and access information if known +10. Use proper JSON-LD structure (arrays for multiple values) +11. "keywords" as a JSON array of strings — never semicolon/comma-separated string +12. "encodingFormat" as a JSON array of strings — never semicolon/comma-separated string +13. Add exactly: "comment": "This dataset metadata was generated by AI." + +**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/Hydrography90m/webpage.jsonld b/data/objects/summoned/generated/Hydrography90m/webpage.jsonld new file mode 100644 index 0000000..86ebc5b --- /dev/null +++ b/data/objects/summoned/generated/Hydrography90m/webpage.jsonld @@ -0,0 +1,37 @@ +{ + "@context": "https://schema.org/", + "@type": "WebPage", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://hydrography.org/", + "name": "Hydrography90m: High-resolution global hydrographic dataset", + "description": "Webpage for Hydrography90m—global hydrographic dataset at ~90 m (3 arc-second) from MERIT Hydro: stream channels, drainage basins, sub-catchments, stream order and slope, flow-routing topology. By Amatulli et al.; IGB/hydrography.org. Published in Earth System Science Data (2022).", + "url": "https://hydrography.org/", + "inLanguage": "en", + "isPartOf": { + "@type": "WebSite", + "name": "Hydrography.org", + "url": "https://hydrography.org/" + }, + "about": { + "@type": "Dataset", + "name": "Hydrography90m", + "url": "https://hydrography.org/" + }, + "publisher": { + "@type": "Organization", + "name": "Leibniz-Institute of Freshwater Ecology and Inland Fisheries (IGB)", + "url": "https://www.igb-berlin.de/en" + }, + "mainEntity": { + "@type": "Dataset", + "name": "Hydrography90m", + "url": "https://hydrography.org/#dataset" + }, + "keywords": [ + "Hydrography90m", + "hydrography", + "hydrology", + "IGB", + "MERIT Hydro" + ] +} diff --git a/data/objects/summoned/generated/Shale_Network/prompt.txt b/data/objects/summoned/generated/Shale_Network/prompt.txt new file mode 100644 index 0000000..03058c7 --- /dev/null +++ b/data/objects/summoned/generated/Shale_Network/prompt.txt @@ -0,0 +1,53 @@ +You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. + +**Task**: Create a valid JSON-LD document for the following dataset. + +**Dataset Information**: +- Name: Shale Network +- URL: https://doi.org/10.4211/his-data-shalenetwork +- Description: Water quality data in the regions of oil and gas production. +- Group/Category: hydrogeochemistry +- Creator: +- Provider: +- Publisher: +- Keywords: +- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") +- Extracted Metadata: Hosted via DOI (CUAHSI HydroShare / HIS). The Shale Network provides water quality data from regions of oil and gas production (e.g. shale gas development). Infer creator (e.g. CUAHSI, university or consortium partners), publisher, temporal coverage, variables (e.g. water chemistry, contaminants), distribution (DOI landing page, download or API links), and license/terms from the DOI resolution page and any linked documentation. + +**Reference Example** (from existing JSON-LD in this project): +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://doi.org/10.6084/m9.figshare.13661312.v1#dataset", + "name": "Global Multi-layer Soil Moisture Products", + "url": "https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312", + "description": "Global multi-layer soil moisture products covering 1970–2016.", + "keywords": ["soil moisture", "global", "Figshare", "hydrology"], + "creator": [{"@type": "Person", "name": "Yaoping Wang"}, {"@type": "Organization", "name": "Oak Ridge National Laboratory"}], + "publisher": [{"@type": "Organization", "name": "Figshare", "url": "https://figshare.com/"}], + "temporalCoverage": "1970-01-01/2016-12-31", + "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "20,-40 50,10"}}, + "variableMeasured": [{"@type": "PropertyValue", "name": "Soil moisture", "description": "Multi-layer soil moisture"}], + "encodingFormat": ["application/zip", "application/x-netcdf"], + "license": "https://creativecommons.org/licenses/by/4.0/", + "distribution": [{"@type": "DataDownload", "contentUrl": "https://doi.org/10.6084/m9.figshare.13661312.v1", "encodingFormat": ["text/html"]}] +} + +**Requirements**: +1. Use Schema.org vocabulary (https://schema.org/) +2. Set @context to `{"@vocab": "https://schema.org/"}` +3. Set @type to "Dataset" +4. Include @id with the dataset URL or DOI (e.g. https://doi.org/10.4211/his-data-shalenetwork#dataset) +5. Include all available metadata fields +6. Creator/publisher: infer from the DOI landing page (e.g. CUAHSI, HydroShare, Shale Network project partners) +7. Include distribution with contentUrl to the DOI and/or data access URL; use encodingFormat as a JSON array (e.g. ["text/html"]) +8. temporalCoverage: infer from the site if possible +9. spatialCoverage: Place with geo GeoShape, box MUST be "west,south east,north" — use "20,-40 50,10" +10. Include license/terms if stated +11. Use proper JSON-LD structure (arrays for multiple values) +12. "keywords" as a JSON array of strings — never semicolon/comma-separated string +13. "encodingFormat" as a JSON array of strings — never semicolon/comma-separated string +14. Add exactly: "comment": "This dataset metadata was generated by AI." + +**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/Shale_Network/shale-network.jsonld b/data/objects/summoned/generated/Shale_Network/shale-network.jsonld new file mode 100644 index 0000000..19bf22b --- /dev/null +++ b/data/objects/summoned/generated/Shale_Network/shale-network.jsonld @@ -0,0 +1,161 @@ +{ + "@context": { + "@vocab": "https://schema.org/" + }, + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://doi.org/10.4211/his-data-shalenetwork#dataset", + "name": "Shale Network (ShaleNetwork Database)", + "url": "https://doi.org/10.4211/his-data-shalenetwork", + "description": "The Shale Network database is a collaborative water quality and quantity data resource for regions of hydrocarbon extraction (including shale gas development), acting as an "honest broker" that collates datasets from academic researchers, government agencies, industry, nonprofit entities, and watershed groups. The database is published through the CUAHSI Hydrologic Information System (HIS) for discovery and access via HydroClient and associated web services, and is used to establish background concentrations and assess environmental impacts across energy production regions.", + "keywords": [ + "Shale Network", + "ShaleNetwork", + "water quality", + "water quantity", + "hydrogeochemistry", + "shale gas", + "oil and gas production", + "hydrocarbon extraction", + "groundwater chemistry", + "surface water chemistry", + "CUAHSI", + "HIS", + "WaterOneFlow", + "ODM" + ], + "creator": [ + { + "@type": "Organization", + "name": "Shale Network", + "url": "https://shalenetwork.org/" + }, + { + "@type": "Person", + "name": "Susan L. Brantley", + "email": "sxb7@psu.edu", + "affiliation": { + "@type": "Organization", + "name": "The Pennsylvania State University", + "url": "https://www.psu.edu/" + } + } + ], + "provider": [ + { + "@type": "Organization", + "name": "Consortium of Universities for the Advancement of Hydrologic Sciences, Inc. (CUAHSI)", + "url": "https://www.cuahsi.org/" + } + ], + "publisher": [ + { + "@type": "Organization", + "name": "Consortium of Universities for the Advancement of Hydrologic Sciences, Inc. (CUAHSI)", + "url": "https://www.cuahsi.org/" + } + ], + "isAccessibleForFree": true, + "temporalCoverage": "2011-01-01/..", + "spatialCoverage": { + "@type": "Place", + "name": "Energy production regions (primarily northeastern USA; see dataset portals for exact coverage)", + "geo": { + "@type": "GeoShape", + "box": "20,-40 50,10" + } + }, + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "Common water quality measurements", + "description": "Common field and laboratory measurements such as pH, major ions (Na, K, Mg, Ca, sulfate, chloride, bromide), nutrients (ammonium, nitrate, nitrite, total N), alkalinity/acidity, hardness, TDS, and related parameters." + }, + { + "@type": "PropertyValue", + "name": "Trace elements", + "description": "Trace element concentrations such as Al, As, Ba, B, Cd, Cr, Co, Cu, Fe, Pb, Li, Mn, Hg, Mo, Ni, Se, Ag, Sr, Th, U, Zn (availability varies by site and dataset)." + }, + { + "@type": "PropertyValue", + "name": "Naturally occurring radioactive material (NORM)", + "description": "Radiological measurements such as gross alpha/beta, Ra-226, and Ra-228 (availability varies)." + }, + { + "@type": "PropertyValue", + "name": "Organic constituents", + "description": "Organic compounds and indicators such as benzene, toluene, ethylbenzene, xylenes, naphthalene, oil and grease, phenolics, and related constituents (availability varies)." + }, + { + "@type": "PropertyValue", + "name": "Water quantity (where available)", + "description": "Water quantity observations associated with monitoring sites where contributed and published through HIS services." + } + ], + "measurementTechnique": [ + "Compilation and harmonization of water quality and quantity datasets contributed by multiple organizations", + "Publication through CUAHSI HIS using the Observations Data Model (ODM) and WaterOneFlow web services" + ], + "encodingFormat": [ + "text/html", + "application/xml", + "text/xml", + "text/csv" + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "DOI landing page", + "description": "Persistent identifier landing page for the Shale Network database.", + "contentUrl": "https://doi.org/10.4211/his-data-shalenetwork", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "HIS Central network registration (Shale Network)", + "description": "HIS Central registry entry for the Shale Network WaterOneFlow service and citation information.", + "contentUrl": "https://hiscentral.cuahsi.org/pub_network.aspx?n=228", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "HydroClient data portal access", + "description": "CUAHSI HydroClient portal for discovering and downloading published observations.", + "contentUrl": "https://data.cuahsi.org/", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "Shale Network data access documentation", + "description": "Project documentation describing ways to access Shale Network data, including HydroClient, HydroShare, and Penn State DataCommons.", + "contentUrl": "https://shalenetwork.org/database/data-access.html", + "encodingFormat": ["text/html"] + } + ], + "citation": [ + { + "@type": "CreativeWork", + "name": "Shale Network Database", + "author": [ + { "@type": "Person", "name": "Susan L. Brantley" } + ], + "datePublished": "2011", + "identifier": [ + { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.4211/his-data-shalenetwork" + } + ], + "sameAs": "https://doi.org/10.4211/his-data-shalenetwork" + } + ], + "about": [ + { "@type": "Thing", "name": "Hydrogeochemistry" }, + { "@type": "Thing", "name": "Water quality" }, + { "@type": "Thing", "name": "Oil and gas development" }, + { "@type": "Thing", "name": "Shale gas" }, + { "@type": "Thing", "name": "Groundwater" }, + { "@type": "Thing", "name": "Surface water" } + ] +} diff --git a/data/objects/summoned/generated/Shale_Network/webpage.jsonld b/data/objects/summoned/generated/Shale_Network/webpage.jsonld new file mode 100644 index 0000000..6bf7ddd --- /dev/null +++ b/data/objects/summoned/generated/Shale_Network/webpage.jsonld @@ -0,0 +1,37 @@ +{ + "@context": "https://schema.org/", + "@type": "WebPage", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://doi.org/10.4211/his-data-shalenetwork", + "name": "Shale Network (ShaleNetwork Database)", + "description": "DOI landing page for the Shale Network database: water quality and quantity data from oil and gas production regions, published via CUAHSI HIS. Shale Network, Penn State, CUAHSI.", + "url": "https://doi.org/10.4211/his-data-shalenetwork", + "inLanguage": "en", + "isPartOf": { + "@type": "WebSite", + "name": "CUAHSI HydroShare", + "url": "https://www.hydroshare.org/" + }, + "about": { + "@type": "Dataset", + "name": "Shale Network", + "url": "https://doi.org/10.4211/his-data-shalenetwork#dataset" + }, + "publisher": { + "@type": "Organization", + "name": "Consortium of Universities for the Advancement of Hydrologic Sciences, Inc. (CUAHSI)", + "url": "https://www.cuahsi.org/" + }, + "mainEntity": { + "@type": "Dataset", + "name": "Shale Network (ShaleNetwork Database)", + "url": "https://doi.org/10.4211/his-data-shalenetwork#dataset" + }, + "keywords": [ + "Shale Network", + "water quality", + "hydrogeochemistry", + "CUAHSI", + "HIS" + ] +} diff --git a/data/objects/summoned/generated/SoilGrids2/prompt.txt b/data/objects/summoned/generated/SoilGrids2/prompt.txt new file mode 100644 index 0000000..7de4156 --- /dev/null +++ b/data/objects/summoned/generated/SoilGrids2/prompt.txt @@ -0,0 +1,53 @@ +You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. + +**Task**: Create a valid JSON-LD document for the following dataset. + +**Dataset Information**: +- Name: SoilGrids2 +- URL: https://data.isric.org/geonetwork/srv/api/sitemap +- Catalog/record URL: https://data.isric.org/geonetwork/srv/api/records/41cb0ae9-1604-4807-96e6-0dc8c94c5d22?language=all +- Description: Soil bulk density, organic carbon content, pH, soil texture fractions and coarse fragments etc. (ISRIC global soil property maps.) +- Group/Category: soil +- Creator: +- Provider: +- Publisher: +- Keywords: +- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") +- Extracted Metadata: ISRIC – International Soil Reference and Information Centre. SoilGrids provides global soil property maps at about 250 m resolution. Data are discoverable via the GeoNetwork sitemap/catalog URL; distribution is typically via https://files.isric.org/soilgrids/ or similar. Include variables such as bulk density, organic carbon, pH, texture, coarse fragments where appropriate. + +**Reference Example** (from existing JSON-LD in this project): +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://www.chelsa-climate.org/datasets/chelsa_bioclim#dataset", + "name": "CHELSA-bioclim (V2.1)", + "url": "https://www.chelsa-climate.org/datasets/chelsa_bioclim", + "description": "CHELSA-bioclim is a global, kilometer-scale climate dataset generated with the CHELSA downscaling model.", + "keywords": ["CHELSA", "bioclim", "bioclimatic variables", "ecology", "species distribution modeling", "climate predictors"], + "creator": {"@type": "Organization", "name": "WSL", "url": "https://www.wsl.ch/"}, + "publisher": {"@type": "Organization", "name": "WSL", "url": "https://www.wsl.ch/"}, + "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "-180.0,-60.0 180.0,90.0"}}, + "variableMeasured": [{"@type": "PropertyValue", "name": "BIO1–BIO19", "description": "Standard bioclimatic variables"}], + "encodingFormat": ["image/tiff", "application/geotiff"], + "license": "https://creativecommons.org/licenses/by/4.0/", + "distribution": [{"@type": "DataDownload", "encodingFormat": ["image/tiff"], "contentUrl": "https://example.org/data"}] +} + +**Requirements**: +1. Use Schema.org vocabulary (https://schema.org/) +2. Set @context to `{"@vocab": "https://schema.org/"}` +3. Set @type to "Dataset" +4. Include @id with the dataset URL or identifier (e.g. https://data.isric.org/geonetwork/srv/api/records/41cb0ae9-1604-4807-96e6-0dc8c94c5d22#dataset or the main SoilGrids landing page if preferred) +5. Include all available metadata fields +6. For creator/publisher use Organization: ISRIC – International Soil Reference and Information Centre (https://www.isric.org) +7. Include distribution information (e.g. GeoNetwork catalog URL and/or https://files.isric.org/soilgrids/ if applicable) +8. Add temporalCoverage if time period is known +9. Add spatialCoverage: Use Place with geo containing GeoShape. The box format MUST be: "west,south east,north". For this dataset use: "20,-40 50,10" +10. Include license and access information (SoilGrids is typically CC-BY or similar; state if unknown) +11. Use proper JSON-LD structure (arrays for multiple values, nested objects where appropriate) +12. Set "keywords" as a JSON array of strings, e.g. "keywords": ["soil", "bulk density", "organic carbon", "pH", "SoilGrids", "ISRIC"] — never a single semicolon- or comma-separated string +13. Set "encodingFormat" as a JSON array of strings when listing multiple formats — never a single semicolon- or comma-separated string +14. Add this exact comment for AI-generated disclosure: "comment": "This dataset metadata was generated by AI." + +**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/SoilGrids2/soilgrids2.jsonld b/data/objects/summoned/generated/SoilGrids2/soilgrids2.jsonld new file mode 100644 index 0000000..4909dfa --- /dev/null +++ b/data/objects/summoned/generated/SoilGrids2/soilgrids2.jsonld @@ -0,0 +1,140 @@ +{ + "@context": { + "@vocab": "https://schema.org/" + }, + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://soilgrids.org/#dataset", + "name": "SoilGrids2 (SoilGrids 2.0 global soil property maps)", + "url": "https://soilgrids.org/", + "description": "SoilGrids2 (SoilGrids 2.0) provides global gridded soil property maps at approximately 250 m spatial resolution. The dataset includes predicted soil properties such as bulk density, soil organic carbon content, soil pH, soil texture fractions (sand, silt, clay), and coarse fragments, among others. SoilGrids products are discoverable via the ISRIC Data Hub (GeoNetwork catalog) and distributed through ISRIC file services (e.g., WebDAV directories under files.isric.org/soilgrids/).", + "keywords": [ + "SoilGrids2", + "SoilGrids 2.0", + "ISRIC", + "global soil maps", + "digital soil mapping", + "bulk density", + "soil organic carbon", + "soil pH", + "soil texture", + "sand", + "silt", + "clay", + "coarse fragments", + "soil properties", + "geospatial raster" + ], + "creator": [ + { + "@type": "Organization", + "name": "ISRIC – International Soil Reference and Information Centre", + "url": "https://www.isric.org/" + } + ], + "provider": [ + { + "@type": "Organization", + "name": "ISRIC – International Soil Reference and Information Centre", + "url": "https://www.isric.org/" + } + ], + "publisher": [ + { + "@type": "Organization", + "name": "ISRIC – International Soil Reference and Information Centre", + "url": "https://www.isric.org/" + } + ], + "spatialCoverage": { + "@type": "Place", + "geo": { + "@type": "GeoShape", + "box": "20,-40 50,10" + } + }, + "spatialResolution": "Approximately 250 m", + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "Bulk density", + "description": "Predicted soil bulk density maps (global gridded layers)." + }, + { + "@type": "PropertyValue", + "name": "Soil organic carbon content", + "description": "Predicted soil organic carbon content maps (global gridded layers)." + }, + { + "@type": "PropertyValue", + "name": "Soil pH (in H2O)", + "description": "Predicted soil pH maps (global gridded layers)." + }, + { + "@type": "PropertyValue", + "name": "Soil texture fractions", + "description": "Predicted sand, silt, and clay fraction maps (global gridded layers)." + }, + { + "@type": "PropertyValue", + "name": "Coarse fragments", + "description": "Predicted coarse fragment content maps (global gridded layers)." + } + ], + "encodingFormat": [ + "image/tiff", + "application/geotiff", + "application/zip", + "text/html", + "application/xml" + ], + "license": "https://creativecommons.org/licenses/by/4.0/", + "isAccessibleForFree": true, + "distribution": [ + { + "@type": "DataDownload", + "name": "ISRIC Data Hub (GeoNetwork) catalog record", + "description": "GeoNetwork catalog record in the ISRIC Data Hub describing a SoilGrids-related dataset entry.", + "contentUrl": "https://data.isric.org/geonetwork/srv/api/records/41cb0ae9-1604-4807-96e6-0dc8c94c5d22?language=all", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "ISRIC Data Hub sitemap / discovery endpoint", + "description": "GeoNetwork API sitemap endpoint used for dataset discovery and indexing.", + "contentUrl": "https://data.isric.org/geonetwork/srv/api/sitemap", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "ISRIC SoilGrids file distribution (WebDAV root)", + "description": "ISRIC WebDAV directory listing for SoilGrids distributions (includes latest and former releases).", + "contentUrl": "https://files.isric.org/soilgrids/", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "SoilGrids latest data directory (WebDAV)", + "description": "Directory listing for SoilGrids latest data products (organized by variable).", + "contentUrl": "https://files.isric.org/soilgrids/latest/data/", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "ISRIC SoilGrids overview page", + "description": "ISRIC overview page describing SoilGrids, licensing, and access methods.", + "contentUrl": "https://isric.org/explore/soilgrids", + "encodingFormat": ["text/html"] + } + ], + "about": [ + { "@type": "Thing", "name": "Soil" }, + { "@type": "Thing", "name": "Soil properties" }, + { "@type": "Thing", "name": "Digital soil mapping" }, + { "@type": "Thing", "name": "Geospatial raster data" } + ], + "sameAs": [ + "https://isric.org/explore/soilgrids", + "https://files.isric.org/soilgrids/" + ] +} diff --git a/data/objects/summoned/generated/SoilGrids2/webpage.jsonld b/data/objects/summoned/generated/SoilGrids2/webpage.jsonld new file mode 100644 index 0000000..1138090 --- /dev/null +++ b/data/objects/summoned/generated/SoilGrids2/webpage.jsonld @@ -0,0 +1,37 @@ +{ + "@context": "https://schema.org/", + "@type": "WebPage", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://soilgrids.org/", + "name": "SoilGrids2: Global soil property maps", + "description": "Webpage for SoilGrids2 (SoilGrids 2.0)—global gridded soil property maps at ~250 m resolution from ISRIC. The site provides access to bulk density, organic carbon, pH, texture, and coarse fragments; discovery via ISRIC Data Hub (GeoNetwork) and distribution via files.isric.org/soilgrids/.", + "url": "https://soilgrids.org/", + "inLanguage": "en", + "isPartOf": { + "@type": "WebSite", + "name": "ISRIC – International Soil Reference and Information Centre", + "url": "https://www.isric.org/" + }, + "about": { + "@type": "Dataset", + "name": "SoilGrids2", + "url": "https://soilgrids.org/" + }, + "publisher": { + "@type": "Organization", + "name": "ISRIC – International Soil Reference and Information Centre", + "url": "https://www.isric.org/" + }, + "mainEntity": { + "@type": "Dataset", + "name": "SoilGrids2 (SoilGrids 2.0 global soil property maps)", + "url": "https://soilgrids.org/#dataset" + }, + "keywords": [ + "SoilGrids2", + "ISRIC", + "global soil maps", + "digital soil mapping", + "soil properties" + ] +} diff --git a/data/objects/summoned/generated/WATERBASE/prompt.txt b/data/objects/summoned/generated/WATERBASE/prompt.txt new file mode 100644 index 0000000..07f51a1 --- /dev/null +++ b/data/objects/summoned/generated/WATERBASE/prompt.txt @@ -0,0 +1,53 @@ +You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. + +**Task**: Create a valid JSON-LD document for the following dataset. + +**Dataset Information**: +- Name: WATERBASE +- URL: https://www.eea.europa.eu/en/datahub/datahubitem-view/fbf3717c-cd7b-4785-933a-d0cf510542e1 +- Description: The status and quality of Europe's rivers, lakes, groundwater bodies and transitional, coastal and marine waters. +- Group/Category: hydrogeochemistry +- Creator: +- Provider: +- Publisher: +- Keywords: +- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") +- Extracted Metadata: Hosted on European Environment Agency (EEA) Data Hub. WATERBASE is a European water quality/status dataset covering rivers, lakes, groundwater, transitional, coastal and marine waters. Infer creator (e.g. EEA, European Commission), publisher, temporal coverage, variables (e.g. water quality parameters, ecological status), distribution (data hub item URL, download links), and license/terms from the EEA data hub page and any linked documentation. + +**Reference Example** (from existing JSON-LD in this project): +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://doi.org/10.6084/m9.figshare.13661312.v1#dataset", + "name": "Global Multi-layer Soil Moisture Products", + "url": "https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312", + "description": "Global multi-layer soil moisture products covering 1970–2016.", + "keywords": ["soil moisture", "global", "Figshare", "hydrology"], + "creator": [{"@type": "Person", "name": "Yaoping Wang"}, {"@type": "Organization", "name": "Oak Ridge National Laboratory"}], + "publisher": [{"@type": "Organization", "name": "Figshare", "url": "https://figshare.com/"}], + "temporalCoverage": "1970-01-01/2016-12-31", + "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "20,-40 50,10"}}, + "variableMeasured": [{"@type": "PropertyValue", "name": "Soil moisture", "description": "Multi-layer soil moisture"}], + "encodingFormat": ["application/zip", "application/x-netcdf"], + "license": "https://creativecommons.org/licenses/by/4.0/", + "distribution": [{"@type": "DataDownload", "contentUrl": "https://doi.org/10.6084/m9.figshare.13661312.v1", "encodingFormat": ["text/html"]}] +} + +**Requirements**: +1. Use Schema.org vocabulary (https://schema.org/) +2. Set @context to `{"@vocab": "https://schema.org/"}` +3. Set @type to "Dataset" +4. Include @id with the dataset URL or stable identifier (e.g. the EEA data hub URL with #dataset) +5. Include all available metadata fields +6. Creator/publisher: infer from EEA site (e.g. European Environment Agency, European Commission) +7. Include distribution with contentUrl to the data hub page and/or download URL; use encodingFormat as a JSON array (e.g. ["text/html"] or list file formats if known) +8. temporalCoverage: infer from the page (European water reporting often has multi-year or periodic updates) +9. spatialCoverage: Place with geo GeoShape, box MUST be "west,south east,north" — use "20,-40 50,10" (note: WATERBASE is European; you may state coverage in description; keep box as specified for consistency) +10. Include license/terms if stated (EEA data often free reuse with attribution) +11. Use proper JSON-LD structure (arrays for multiple values) +12. "keywords" as a JSON array of strings — never semicolon/comma-separated string +13. "encodingFormat" as a JSON array of strings — never semicolon/comma-separated string +14. Add exactly: "comment": "This dataset metadata was generated by AI." + +**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/WATERBASE/waterbase.jsonld b/data/objects/summoned/generated/WATERBASE/waterbase.jsonld new file mode 100644 index 0000000..0a037f4 --- /dev/null +++ b/data/objects/summoned/generated/WATERBASE/waterbase.jsonld @@ -0,0 +1,130 @@ +{ + "@context": { + "@vocab": "https://schema.org/" + }, + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://www.eea.europa.eu/en/datahub/datahubitem-view/fbf3717c-cd7b-4785-933a-d0cf510542e1#dataset", + "name": "WATERBASE (Waterbase - Water Quality ICM)", + "url": "https://www.eea.europa.eu/en/datahub/datahubitem-view/fbf3717c-cd7b-4785-933a-d0cf510542e1", + "description": "WATERBASE is the generic name given to the European Environment Agency (EEA) databases on the status and quality of Europe's rivers, lakes, groundwater bodies, and transitional, coastal and marine waters. The Waterbase - Water Quality ICM dataset contains time series information on nutrients, organic matter, hazardous substances, pesticides and other chemical substances, reported by EEA member and cooperating countries from monitoring sites and aggregated reporting streams. The dataset supports European water assessments, indicators, and reporting obligations under WISE State of Environment (SoE) water quality reporting and related frameworks.", + "keywords": [ + "WATERBASE", + "Waterbase", + "water quality", + "hydrogeochemistry", + "rivers", + "lakes", + "groundwater", + "coastal waters", + "marine waters", + "transitional waters", + "nutrients", + "pesticides", + "hazardous substances", + "organic matter", + "WISE", + "EEA" + ], + "creator": [ + { + "@type": "Organization", + "name": "European Environment Agency (EEA)", + "url": "https://www.eea.europa.eu/" + } + ], + "provider": [ + { + "@type": "Organization", + "name": "European Environment Agency (EEA)", + "url": "https://www.eea.europa.eu/" + } + ], + "publisher": [ + { + "@type": "Organization", + "name": "European Environment Agency (EEA)", + "url": "https://www.eea.europa.eu/" + } + ], + "datePublished": "2025-07-02", + "version": "01.00", + "isAccessibleForFree": true, + "license": "https://creativecommons.org/licenses/by/4.0/", + "temporalCoverage": "1900-01-01/2024-12-31", + "spatialCoverage": { + "@type": "Place", + "name": "Europe (reporting countries; see dataset documentation for exact coverage)", + "geo": { + "@type": "GeoShape", + "box": "20,-40 50,10" + } + }, + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "Water quality parameters", + "description": "Measured and aggregated parameters describing chemical and physico-chemical water quality in inland and coastal/marine waters (including nutrients, organic matter, hazardous substances, pesticides and other chemicals)." + }, + { + "@type": "PropertyValue", + "name": "Monitoring locations and water bodies", + "description": "Spatial identifiers and associated attributes for monitoring sites and water bodies reported through WISE and WFD/WISE spatial reporting." + }, + { + "@type": "PropertyValue", + "name": "Ecological and chemical status (where applicable)", + "description": "Reported status and classification attributes associated with monitored waters and water bodies, as provided in reporting streams." + } + ], + "measurementTechnique": [ + "In situ water quality monitoring by national and regional authorities", + "Compilation, harmonisation, and processing of reported monitoring data for European-wide assessments" + ], + "encodingFormat": [ + "text/html", + "text/csv", + "text/plain", + "application/sql", + "application/vnd.sqlite3", + "application/zip", + "application/pdf" + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "EEA Data Hub item page", + "description": "Landing page for Waterbase - Water Quality ICM with dataset versions, temporal coverage, and access links.", + "contentUrl": "https://www.eea.europa.eu/en/datahub/datahubitem-view/fbf3717c-cd7b-4785-933a-d0cf510542e1", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "Direct download (EEA SDI DataShare)", + "description": "Direct download endpoint for the Waterbase - Water Quality ICM 2024 release (bulk download).", + "contentUrl": "https://sdi.eea.europa.eu/datashare/s/3JiTia3qePyGxyA/download", + "encodingFormat": ["application/zip"] + }, + { + "@type": "DataDownload", + "name": "Metadata factsheet (PDF)", + "description": "Metadata factsheet for Waterbase - Water Quality ICM, 2024 release.", + "contentUrl": "https://sdi.eea.europa.eu/catalogue/datahub/api/records/77976729-1aeb-4b61-a673-83db6c6a2ab2/formatters/xsl-view?approved=true&language=eng&output=pdf", + "encodingFormat": ["application/pdf"] + }, + { + "@type": "DataDownload", + "name": "DISCODATA endpoint (direct database access and filtering)", + "description": "Portal and endpoint for accessing and filtering data directly in the database, supporting application-specific access and user-driven downloads.", + "contentUrl": "https://discodata.eea.europa.eu/", + "encodingFormat": ["text/html"] + } + ], + "about": [ + { "@type": "Thing", "name": "Hydrogeochemistry" }, + { "@type": "Thing", "name": "Water quality" }, + { "@type": "Thing", "name": "Environmental monitoring" }, + { "@type": "Thing", "name": "Surface water" }, + { "@type": "Thing", "name": "Groundwater" } + ] +} diff --git a/data/objects/summoned/generated/WATERBASE/webpage.jsonld b/data/objects/summoned/generated/WATERBASE/webpage.jsonld new file mode 100644 index 0000000..7fb6a19 --- /dev/null +++ b/data/objects/summoned/generated/WATERBASE/webpage.jsonld @@ -0,0 +1,37 @@ +{ + "@context": "https://schema.org/", + "@type": "WebPage", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://www.eea.europa.eu/en/datahub/datahubitem-view/fbf3717c-cd7b-4785-933a-d0cf510542e1", + "name": "WATERBASE (Waterbase - Water Quality ICM)", + "description": "EEA Data Hub page for WATERBASE: European water quality and status data for rivers, lakes, groundwater, transitional, coastal and marine waters. European Environment Agency.", + "url": "https://www.eea.europa.eu/en/datahub/datahubitem-view/fbf3717c-cd7b-4785-933a-d0cf510542e1", + "inLanguage": "en", + "isPartOf": { + "@type": "WebSite", + "name": "EEA Data Hub", + "url": "https://www.eea.europa.eu/en/datahub" + }, + "about": { + "@type": "Dataset", + "name": "WATERBASE", + "url": "https://www.eea.europa.eu/en/datahub/datahubitem-view/fbf3717c-cd7b-4785-933a-d0cf510542e1#dataset" + }, + "publisher": { + "@type": "Organization", + "name": "European Environment Agency (EEA)", + "url": "https://www.eea.europa.eu/" + }, + "mainEntity": { + "@type": "Dataset", + "name": "WATERBASE (Waterbase - Water Quality ICM)", + "url": "https://www.eea.europa.eu/en/datahub/datahubitem-view/fbf3717c-cd7b-4785-933a-d0cf510542e1#dataset" + }, + "keywords": [ + "WATERBASE", + "water quality", + "EEA", + "hydrogeochemistry", + "WISE" + ] +} diff --git a/data/objects/summoned/generated/Water_Quality_Portal/prompt.txt b/data/objects/summoned/generated/Water_Quality_Portal/prompt.txt new file mode 100644 index 0000000..232bc0d --- /dev/null +++ b/data/objects/summoned/generated/Water_Quality_Portal/prompt.txt @@ -0,0 +1,51 @@ +You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. + +**Task**: Create a valid JSON-LD document for the following dataset/service. + +**Dataset Information**: +- Name: Water Quality Portal (WQP) +- URL: https://www.waterqualitydata.us +- Description: The Water Quality Portal (WQP) is a cooperative service sponsored by the United States Geological Survey (USGS) and the Environmental Protection Agency (EPA). The WQP integrates publicly available water quality data from the USGS National Water Information System (NWIS) and the EPA Water Quality Exchange (WQX) Data Warehouse. +- Group/Category: hydrogeochemistry +- Creator: National Water Quality Monitoring Council | US EPA +- Provider: National Water Quality Monitoring Council | US EPA +- Publisher: National Water Quality Monitoring Council | US EPA +- Keywords: water quality; USGS; US EPA (use as array: ["water quality", "USGS", "US EPA"]) +- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") +- Extracted Metadata: WebAPI/portal. The WQP provides access to water quality monitoring data (physical, chemical, biological) from US federal and state/tribal sources. Infer temporal coverage (ongoing/historical), variableMeasured (e.g. nutrients, contaminants, physical parameters), distribution (portal URL, API endpoints if known), and license/terms of use from the website. + +**Reference Example** (from existing JSON-LD in this project): +{ + "@context": "https://schema.org/", + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://doi.org/10.6084/m9.figshare.13661312.v1#dataset", + "name": "Global Multi-layer Soil Moisture Products", + "url": "https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312", + "description": "Global multi-layer soil moisture products covering 1970–2016.", + "keywords": ["soil moisture", "global", "Figshare", "hydrology"], + "creator": [{"@type": "Person", "name": "Yaoping Wang"}, {"@type": "Organization", "name": "Oak Ridge National Laboratory"}], + "publisher": [{"@type": "Organization", "name": "Figshare", "url": "https://figshare.com/"}], + "temporalCoverage": "1970-01-01/2016-12-31", + "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "20,-40 50,10"}}, + "variableMeasured": [{"@type": "PropertyValue", "name": "Soil moisture", "description": "Multi-layer soil moisture"}], + "encodingFormat": ["application/zip", "application/x-netcdf"], + "license": "https://creativecommons.org/licenses/by/4.0/", + "distribution": [{"@type": "DataDownload", "contentUrl": "https://doi.org/10.6084/m9.figshare.13661312.v1", "encodingFormat": ["text/html"]}] +} + +**Requirements**: +1. Use Schema.org vocabulary (https://schema.org/) +2. Set @context to `{"@vocab": "https://schema.org/"}` +3. Set @type to "Dataset" (or "DataCatalog" if describing the portal as a catalog of datasets; if unsure, use "Dataset" with description of the WQP as an integrated data resource) +4. Include @id with the portal URL (e.g. https://www.waterqualitydata.us#dataset or #datacatalog) +5. Include creator, provider, publisher from the CSV (National Water Quality Monitoring Council; can add US EPA, USGS as related organizations) +6. Include distribution with contentUrl to the portal and/or API; use encodingFormat as a JSON array (e.g. ["text/html", "application/json"] for API) +7. spatialCoverage: Place with geo GeoShape, box MUST be "west,south east,north" — use "20,-40 50,10" +8. temporalCoverage: use range or "ongoing" as appropriate for a live data portal +9. Use proper JSON-LD structure (arrays for multiple values) +10. "keywords" as a JSON array of strings — e.g. ["water quality", "USGS", "US EPA", "WQP", "NWIS", "WQX"] +11. "encodingFormat" as a JSON array of strings — never semicolon/comma-separated string +12. Add exactly: "comment": "This dataset metadata was generated by AI." + +**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/Water_Quality_Portal/water-quality-portal.jsonld b/data/objects/summoned/generated/Water_Quality_Portal/water-quality-portal.jsonld new file mode 100644 index 0000000..86bcdfe --- /dev/null +++ b/data/objects/summoned/generated/Water_Quality_Portal/water-quality-portal.jsonld @@ -0,0 +1,199 @@ +{ + "@context": { + "@vocab": "https://schema.org/" + }, + "@type": "Dataset", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://www.waterqualitydata.us/#dataset", + "name": "Water Quality Portal (WQP)", + "url": "https://www.waterqualitydata.us/", + "description": "The Water Quality Portal (WQP) is a cooperative, services-first data portal that integrates publicly available discrete water-quality monitoring data from the USGS National Water Information System (NWIS) and the EPA Water Quality Exchange (WQX) Data Warehouse, along with data contributed by hundreds of state, tribal, federal, and local organizations. The portal provides station metadata and analytical results for physical, chemical, and biological measurements, accessible through a web interface and programmatic web services (REST) with multiple output formats.", + "keywords": [ + "water quality", + "USGS", + "US EPA", + "WQP", + "NWIS", + "WQX", + "STORET", + "water chemistry", + "hydrogeochemistry", + "monitoring", + "stations", + "sample results", + "nutrients", + "metals", + "contaminants" + ], + "creator": [ + { + "@type": "Organization", + "name": "National Water Quality Monitoring Council", + "url": "https://www.epa.gov/awma/national-water-quality-monitoring-council" + }, + { + "@type": "Organization", + "name": "U.S. Environmental Protection Agency (EPA)", + "url": "https://www.epa.gov/" + } + ], + "provider": [ + { + "@type": "Organization", + "name": "National Water Quality Monitoring Council", + "url": "https://www.epa.gov/awma/national-water-quality-monitoring-council" + }, + { + "@type": "Organization", + "name": "U.S. Environmental Protection Agency (EPA)", + "url": "https://www.epa.gov/" + } + ], + "publisher": [ + { + "@type": "Organization", + "name": "National Water Quality Monitoring Council", + "url": "https://www.epa.gov/awma/national-water-quality-monitoring-council" + }, + { + "@type": "Organization", + "name": "U.S. Environmental Protection Agency (EPA)", + "url": "https://www.epa.gov/" + } + ], + "contributor": [ + { + "@type": "Organization", + "name": "U.S. Geological Survey (USGS)", + "url": "https://www.usgs.gov/" + } + ], + "isAccessibleForFree": true, + "temporalCoverage": "1900-01-01/..", + "spatialCoverage": { + "@type": "Place", + "geo": { + "@type": "GeoShape", + "box": "20,-40 50,10" + } + }, + "variableMeasured": [ + { + "@type": "PropertyValue", + "name": "Water quality results", + "description": "Discrete sample results including measured values, units, methods, and qualifiers for physical, chemical, and biological characteristics." + }, + { + "@type": "PropertyValue", + "name": "Station / monitoring location metadata", + "description": "Locations where samples and observations were collected, including identifiers, coordinates, and site descriptors." + }, + { + "@type": "PropertyValue", + "name": "Nutrients", + "description": "Nutrient-related characteristics such as nitrogen and phosphorus species." + }, + { + "@type": "PropertyValue", + "name": "Metals and trace elements", + "description": "Metals and trace elements measured in water, sediment, or related matrices." + }, + { + "@type": "PropertyValue", + "name": "Organic contaminants and pesticides", + "description": "Organic contaminants, pesticides, and related analytes reported by contributing organizations." + }, + { + "@type": "PropertyValue", + "name": "Physical parameters", + "description": "Physical characteristics such as temperature, specific conductance, turbidity, and dissolved oxygen (where available)." + }, + { + "@type": "PropertyValue", + "name": "Biological data", + "description": "Biological observations and metrics available through WQP services where reported by data providers." + } + ], + "encodingFormat": [ + "text/html", + "application/json", + "text/csv", + "application/xml", + "application/geo+json", + "application/zip" + ], + "license": "https://waterdata.us/disclaimer.html", + "distribution": [ + { + "@type": "DataDownload", + "name": "Water Quality Portal (web interface)", + "description": "Main portal interface for querying and downloading water-quality stations and results.", + "contentUrl": "https://www.waterqualitydata.us/", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "WQP Web Services Guide", + "description": "Documentation for constructing REST web-service requests and available endpoints and parameters.", + "contentUrl": "https://www.waterqualitydata.us/webservices_documentation/", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "WQP Data Download API (Swagger UI)", + "description": "Interactive API documentation for Water Quality Portal data download services.", + "contentUrl": "https://www.waterqualitydata.us/data/swagger-ui/index.html", + "encodingFormat": ["text/html"] + }, + { + "@type": "DataDownload", + "name": "WQP Station (sites) service", + "description": "Base endpoint for downloading station (monitoring location) data and metadata via REST.", + "contentUrl": "https://www.waterqualitydata.us/data/Station/search", + "encodingFormat": ["application/json", "text/csv", "application/xml"] + }, + { + "@type": "DataDownload", + "name": "WQP Result (analytical results) service", + "description": "Base endpoint for downloading discrete water-quality result records via REST.", + "contentUrl": "https://www.waterqualitydata.us/data/Result/search", + "encodingFormat": ["application/json", "text/csv", "application/xml"] + }, + { + "@type": "DataDownload", + "name": "WQP OGC services (WMS/WFS)", + "description": "OGC-compliant WMS/WFS services for mapping and feature access based on WQP search parameters.", + "contentUrl": "https://www.waterqualitydata.us/ogcservices/", + "encodingFormat": ["text/html", "application/xml", "application/geo+json"] + } + ], + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "Water quality data for national-scale aquatic research: The Water Quality Portal", + "author": [ + { "@type": "Person", "name": "E. K. Read" } + ], + "isPartOf": { + "@type": "Periodical", + "name": "Water Resources Research" + }, + "datePublished": "2017", + "identifier": [ + { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.1002/2016WR019993" + } + ], + "sameAs": "https://doi.org/10.1002/2016WR019993" + } + ], + "about": [ + { "@type": "Thing", "name": "Water quality" }, + { "@type": "Thing", "name": "Hydrogeochemistry" }, + { "@type": "Thing", "name": "Environmental monitoring" }, + { "@type": "Thing", "name": "Surface water" }, + { "@type": "Thing", "name": "Groundwater" } + ] +} diff --git a/data/objects/summoned/generated/Water_Quality_Portal/webpage.jsonld b/data/objects/summoned/generated/Water_Quality_Portal/webpage.jsonld new file mode 100644 index 0000000..6b5873f --- /dev/null +++ b/data/objects/summoned/generated/Water_Quality_Portal/webpage.jsonld @@ -0,0 +1,44 @@ +{ + "@context": "https://schema.org/", + "@type": "WebPage", + "comment": "This dataset metadata was generated by AI.", + "@id": "https://www.waterqualitydata.us/", + "name": "Water Quality Portal (WQP)", + "description": "Webpage for the Water Quality Portal: integrated US water quality data from USGS NWIS and EPA WQX, with web interface and REST/API access. National Water Quality Monitoring Council, US EPA, USGS.", + "url": "https://www.waterqualitydata.us/", + "inLanguage": "en", + "isPartOf": { + "@type": "WebSite", + "name": "Water Quality Portal", + "url": "https://www.waterqualitydata.us/" + }, + "about": { + "@type": "Dataset", + "name": "Water Quality Portal (WQP)", + "url": "https://www.waterqualitydata.us/#dataset" + }, + "publisher": [ + { + "@type": "Organization", + "name": "National Water Quality Monitoring Council", + "url": "https://www.epa.gov/awma/national-water-quality-monitoring-council" + }, + { + "@type": "Organization", + "name": "U.S. Environmental Protection Agency (EPA)", + "url": "https://www.epa.gov/" + } + ], + "mainEntity": { + "@type": "Dataset", + "name": "Water Quality Portal (WQP)", + "url": "https://www.waterqualitydata.us/#dataset" + }, + "keywords": [ + "water quality", + "WQP", + "USGS", + "US EPA", + "hydrogeochemistry" + ] +} From 14a4df9c4c2547017d572b5a6b255adb2b8c15d2 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 16 Feb 2026 11:01:33 -0600 Subject: [PATCH 38/58] Updated prompt --- .../Shale_Network/shale-network.jsonld | 2 +- prompts/dataset-detection-prompt.txt | 44 -- prompts/jsonld-generation-prompt.txt | 66 ++- scripts/README.md | 50 ++- scripts/generate_jsonld.py | 422 +++++++++++++++--- scripts/validate_jsonld.py | 218 --------- scripts/validate_jsonld_batch.py | 71 --- 7 files changed, 436 insertions(+), 437 deletions(-) delete mode 100644 prompts/dataset-detection-prompt.txt delete mode 100644 scripts/validate_jsonld.py delete mode 100644 scripts/validate_jsonld_batch.py diff --git a/data/objects/summoned/generated/Shale_Network/shale-network.jsonld b/data/objects/summoned/generated/Shale_Network/shale-network.jsonld index 19bf22b..13fdc40 100644 --- a/data/objects/summoned/generated/Shale_Network/shale-network.jsonld +++ b/data/objects/summoned/generated/Shale_Network/shale-network.jsonld @@ -7,7 +7,7 @@ "@id": "https://doi.org/10.4211/his-data-shalenetwork#dataset", "name": "Shale Network (ShaleNetwork Database)", "url": "https://doi.org/10.4211/his-data-shalenetwork", - "description": "The Shale Network database is a collaborative water quality and quantity data resource for regions of hydrocarbon extraction (including shale gas development), acting as an "honest broker" that collates datasets from academic researchers, government agencies, industry, nonprofit entities, and watershed groups. The database is published through the CUAHSI Hydrologic Information System (HIS) for discovery and access via HydroClient and associated web services, and is used to establish background concentrations and assess environmental impacts across energy production regions.", + "description": "The Shale Network database is a collaborative water quality and quantity data resource for regions of hydrocarbon extraction (including shale gas development), acting as an \"honest broker\" that collates datasets from academic researchers, government agencies, industry, nonprofit entities, and watershed groups. The database is published through the CUAHSI Hydrologic Information System (HIS) for discovery and access via HydroClient and associated web services, and is used to establish background concentrations and assess environmental impacts across energy production regions.", "keywords": [ "Shale Network", "ShaleNetwork", diff --git a/prompts/dataset-detection-prompt.txt b/prompts/dataset-detection-prompt.txt deleted file mode 100644 index 3dd02b4..0000000 --- a/prompts/dataset-detection-prompt.txt +++ /dev/null @@ -1,44 +0,0 @@ -You are analyzing a scientific dataset webpage to identify available datasets and their metadata. - -**Task**: Analyze the following URL and identify: -1. What datasets are available at this URL? -2. For each dataset found, extract: - - Dataset name/title - - Description - - Creator(s) or author(s) - - Publisher or organization - - Publication date - - Download links or access URLs - - Spatial coverage (if mentioned) - - Temporal coverage (if mentioned) - - License information - - Keywords or topics - -**Webpage URL**: {URL} - -**Important**: Please analyze the content at this URL: {URL} - -If you have URL Context Tool access, fetch and analyze the webpage content directly. Otherwise, use your knowledge of the domain and URL structure to infer what datasets might be available. - -**Context from Google Sheet**: -- Expected Dataset Name: {DATASET_NAME} -- Group/Category: {GROUP} -- Description: {DESCRIPTION} - -**Instructions**: -- Browse the URL and explore the webpage structure -- If the page contains multiple datasets or files, identify if this is a data catalog -- For data catalogs (like MERIT DEM with multiple spatial regions), note the file naming conventions and structure -- Focus on structured data products (not just documentation) -- Look for download links, API endpoints, or data access points -- Extract any existing JSON-LD or structured metadata if present -- Note any errors or issues accessing the page - -**Output Format**: Provide a structured JSON response with the extracted information. - - - - - - - diff --git a/prompts/jsonld-generation-prompt.txt b/prompts/jsonld-generation-prompt.txt index 2a83b19..0e18755 100644 --- a/prompts/jsonld-generation-prompt.txt +++ b/prompts/jsonld-generation-prompt.txt @@ -2,7 +2,7 @@ You are generating a JSON-LD (JSON for Linking Data) description for a scientifi **Task**: Create a valid JSON-LD document for the following dataset. -**Dataset Information**: +**Dataset Information** (fill from spreadsheet; no fixed URLs): - Name: {DATASET_NAME} - URL: {URL} - Description: {DESCRIPTION} @@ -11,28 +11,56 @@ You are generating a JSON-LD (JSON for Linking Data) description for a scientifi - Provider: {PROVIDER} - Publisher: {PUBLISHER} - Keywords: {KEYWORDS} -- Spatial Coverage: {SPATIAL_COVERAGE} -- Extracted Metadata: {EXTRACTED_METADATA} +- Spatial Coverage: {SPATIAL_COVERAGE} (box format: "west,south east,north", e.g. "20,-40 50,10") -**Reference Example** (from existing JSON-LD in this project): +**Type-specific guidance** (choose the branch that matches the spreadsheet "Type" and subtype): + +• **Webpage + ld+json(figshare)** (Figshare article or collection): + Extracted Metadata: Hosted on Figshare. Infer creator, citation, temporal coverage, and variables from the Figshare page and any linked paper (e.g. DOI or journal article). Include distribution with the Figshare article or collection URL and DOI if available. Creator/publisher: infer from Figshare page (authors and Figshare as publisher). Use @id as dataset URL or DOI (e.g. Figshare article URL with #dataset or DOI). Figshare often uses CC-BY. + +• **Webpage + Listing** (download or listing page, not Figshare): + Extracted Metadata: Infer creator, publisher, temporal coverage, variables, and distribution from the download or listing page and any linked documentation. Include distribution with the main page URL and any data access or download URLs. Use @id as the dataset or download page URL with #dataset. + +• **Webpage + Catalog** (site that lists multiple datasets): + Extracted Metadata: The site is a catalog of datasets. You may describe the catalog as a single Dataset or as a DataCatalog; if describing one representative or aggregate dataset, infer creator, publisher, temporal coverage, variables, and distribution from the catalog page. Include distribution to the catalog URL and, if applicable, links to key datasets or download areas. + +• **Webpage + Dataset** (single dataset on a webpage): + Extracted Metadata: Infer creator, publisher, temporal coverage, variables, and distribution from the dataset page. Include distribution with the page URL and any direct download or data access links. + +• **Webpage + earthengine** (Google Earth Engine or similar): + Extracted Metadata: Infer creator, publisher, temporal coverage, and variables from the page. Include distribution with the landing page URL and any Earth Engine asset or download information. + +• **Webpage** (no subtype; generic webpage or DOI landing): + Extracted Metadata: Infer creator, publisher, temporal coverage, variables, and distribution from the webpage or DOI resolver (e.g. CUAHSI HIS, HydroShare, Zenodo). Include distribution with the landing page URL and any data access or download links. + +• **Sitemap** (sitemap or catalog API URL): + Extracted Metadata: Data are discoverable via a sitemap or catalog (e.g. GeoNetwork). If a record or catalog URL is available, include it. Infer creator (e.g. from the organization), publisher, temporal coverage, variables, and distribution (catalog URL and/or file server or API). Include distribution with the sitemap/catalog URL and, if known, the main data download or record URL. + +• **WebAPI** (data portal or API): + Extracted Metadata: The resource is a data portal or API. Describe as a Dataset (or DataCatalog if it aggregates many datasets). Include distribution with the portal URL and API endpoints if known; encodingFormat may include application/json, text/csv, application/xml. temporalCoverage may be "ongoing" or a range as appropriate for a live service. + +• **WebPage:Dataset or Dataset** (data hub item or standalone dataset): + Extracted Metadata: Hosted on a data hub or as a standalone dataset (e.g. EEA Data Hub, PANGAEA, Zenodo). Infer creator, publisher, temporal coverage, variables, and distribution from the hub item or landing page. Include distribution with the hub/dataset URL and any direct download or API links. + +**Additional extracted or inferred metadata** (optional; from page content or prior knowledge): +{EXTRACTED_METADATA} + +**Reference Example** (structure only; no fixed URLs): {EXAMPLE_JSONLD} -**Requirements**: +**Requirements** (apply to all types): 1. Use Schema.org vocabulary (https://schema.org/) -2. Set @context to `{{"@vocab": "https://schema.org/"}}` -3. Set @type to "Dataset" -4. Include @id with the dataset URL or identifier +2. Set @context to {{"@vocab": "https://schema.org/"}} +3. Set @type to "Dataset" (or "DataCatalog" only when describing a catalog of datasets as a whole) +4. Include @id with the dataset URL or a stable identifier (e.g. landing page URL with #dataset or DOI) 5. Include all available metadata fields -6. For creators, use Person or Organization types with proper structure -7. Include distribution information if download links are available -8. Add temporalCoverage if time period is known (format: "YYYY-MM-DD/YYYY-MM-DD") -9. Add spatialCoverage if geographic bounds are provided: - - Use Place with geo containing GeoShape - - The box format MUST be: "west,south east,north" (comma-separated pairs, space between pairs) - - Example: For coordinates 20,-40,50,10 use box: "20,-40 50,10" (NOT "20 -40 50 10") - - Format: {{"@type": "Place", "geo": {{"@type": "GeoShape", "box": "west,south east,north"}}}} -10. Include license and access information -11. Use proper JSON-LD structure (arrays for multiple values, nested objects where appropriate) +6. Include distribution with contentUrl(s); for each distribution use "encodingFormat" as a JSON array of strings (e.g. ["text/html"], ["application/json", "text/csv"]) +7. temporalCoverage: use "YYYY-MM-DD/YYYY-MM-DD" or "ongoing" / ".." as appropriate +8. spatialCoverage: Use Place with geo containing GeoShape. The box format MUST be: "west,south east,north" (e.g. "20,-40 50,10") +9. Include license and access information when known +10. Use proper JSON-LD structure (arrays for multiple values, nested objects where appropriate) +11. Set "keywords" as a JSON array of strings — never a single semicolon- or comma-separated string +12. Set "encodingFormat" (at dataset level and in each distribution) as a JSON array of strings — never a single semicolon- or comma-separated string +13. Add exactly: "comment": "This dataset metadata was generated by AI." **Output**: Provide ONLY valid JSON-LD, no additional text or explanation. - diff --git a/scripts/README.md b/scripts/README.md index 2889fa2..fdb4b2c 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -12,10 +12,18 @@ The script requires a `datasets.csv` file exported from the Google Sheet. To cre 3. File → Download → Comma Separated Values (.csv) 4. Save as `datasets.csv` in the project root directory -**Option 2: Use Python to download** +**Option 2: Use the fetch script (recommended)** ```bash -# Download the CSV export directly -python -c "import urllib.request; urllib.request.urlretrieve('https://docs.google.com/spreadsheets/d/1pqZpMWqQFwUrleHXPbvXqXX59Xcj1Yrtqt2nJTh1reM/export?format=csv&gid=1162616600', 'datasets.csv'); print('Downloaded datasets.csv')" +# Download the spreadsheet as datasets.csv (sheet must be shared so "Anyone with the link" can view) +python scripts/fetch_spreadsheet.py +# Or save to a different file: +python scripts/fetch_spreadsheet.py path/to/datasets.csv +``` + +**Option 3: Use the sheet URL directly when generating** +```bash +# Use the Google Sheets export URL as the CSV source (no local file needed) +python scripts/generate_jsonld.py --csv "https://docs.google.com/spreadsheets/d/1pqZpMWqQFwUrleHXPbvXqXX59Xcj1Yrtqt2nJTh1reM/export?format=csv&gid=1162616600" --next ``` **Note**: The `datasets.csv` file is gitignored and will not be committed to the repository. @@ -36,9 +44,12 @@ python -c "import urllib.request; urllib.request.urlretrieve('https://docs.googl Or install specific AI service: ```bash + # For NRP or OpenAI pip install openai requests beautifulsoup4 python-dotenv - # OR + # OR for Anthropic pip install anthropic requests beautifulsoup4 python-dotenv + # OR for Gemini + pip install google-generativeai requests beautifulsoup4 python-dotenv ``` 3. Set up your API key: @@ -55,49 +66,63 @@ python -c "import urllib.request; urllib.request.urlretrieve('https://docs.googl **Alternative: Set environment variable directly** ```bash # Linux/Mac + export OPENAI_API_KEY="your-key-here" export NRP_API_KEY="your-key-here" + export GEMINI_API_KEY="your-key-here" # Windows (PowerShell) + $env:OPENAI_API_KEY="your-key-here" $env:NRP_API_KEY="your-key-here" + $env:GEMINI_API_KEY="your-key-here" # Windows (CMD) + set OPENAI_API_KEY=your-key-here set NRP_API_KEY=your-key-here + set GEMINI_API_KEY=your-key-here ``` **For NRP**: Get your API key from https://nrp.ai/documentation/userdocs/ai/llm-managed/ + + **For OpenAI/ChatGPT**: Get your API key from https://platform.openai.com/api-keys + + **For Gemini**: Get your API key from https://aistudio.google.com/apikey (free tier available with .edu email) ## Usage ### Test with a single URL ```bash -# Using NRP (default, no --ai-service needed) +# Using Gemini (default, no --ai-service needed) python scripts/generate_jsonld.py --test-url "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" -# Or use OpenAI/Anthropic if you have their API keys +# Or use OpenAI/ChatGPT, NRP, or Anthropic if you have their API keys python scripts/generate_jsonld.py --ai-service openai --test-url "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" +python scripts/generate_jsonld.py --ai-service nrp --test-url "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" +python scripts/generate_jsonld.py --ai-service gemini --test-url "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" ``` ### Process datasets from CSV ```bash -# Process all datasets that need JSON-LD (using NRP by default) +# Process all datasets that need JSON-LD (using Gemini by default) python scripts/generate_jsonld.py --csv datasets.csv # Process only first 5 datasets (for testing) python scripts/generate_jsonld.py --csv datasets.csv --limit 5 -# Alternative: Use OpenAI or Anthropic if you have their API keys +# Alternative: Use OpenAI, Anthropic, or Gemini if you have their API keys python scripts/generate_jsonld.py --ai-service openai --csv datasets.csv python scripts/generate_jsonld.py --ai-service anthropic --csv datasets.csv +python scripts/generate_jsonld.py --ai-service gemini --csv datasets.csv ``` ### Options - `--csv`: Path to CSV file (default: `datasets.csv`) - `--output-dir`: Output directory for JSON-LD files (default: `data/objects/summoned/generated`) -- `--ai-service`: Choose `nrp` (default), `openai`, or `anthropic` (optional - defaults to `nrp`) +- `--ai-service`: Choose `gemini` (default), `nrp`, `openai`, or `anthropic` (optional - defaults to `gemini`) - `--api-key`: API key (or use environment variable) - `--model`: Model name (optional, uses defaults) + - Gemini default: `gemini-2.0-flash` (other options: `gemini-2.5-flash`, `gemini-2.5-pro`) - NRP default: `qwen3` (other options: `llama3-sdsc`, `gpt-oss`, `gorilla`, `olmo`, `gemma3`, `kimi`, etc.) - - OpenAI default: `gpt-4` + - OpenAI default: `gpt-4o` - Anthropic default: `claude-3-5-sonnet-20241022` - `--limit`: Limit number of datasets to process - `--test-url`: Test with a single URL instead of CSV @@ -112,8 +137,11 @@ Generated JSON-LD files are saved to the output directory with filenames like: 1. Script reads the CSV file 2. Filters datasets where `hasJSONLD?` is `FALSE`, `#ERROR!`, or empty 3. For each dataset: - - Fetches the webpage + - **Gemini (default)**: Passes URL directly to AI, which uses URL Context Tool to browse/analyze the webpage + - **Other AI services (OpenAI, Anthropic, NRP)**: Fetches HTML, extracts text content using BeautifulSoup, then sends to AI - Uses AI to detect datasets and extract metadata - Generates JSON-LD using the extracted metadata - Saves the JSON-LD file +**Note**: Gemini is the default because it can browse URLs directly. Other services require HTML fetching and text extraction. + diff --git a/scripts/generate_jsonld.py b/scripts/generate_jsonld.py index 754e395..0c9166d 100644 --- a/scripts/generate_jsonld.py +++ b/scripts/generate_jsonld.py @@ -9,6 +9,7 @@ import csv import hashlib +import io import json import os import re @@ -18,6 +19,7 @@ import threading from pathlib import Path from typing import Dict, List, Optional +from urllib.parse import urlparse # Try to load .env file if python-dotenv is available try: @@ -112,6 +114,9 @@ PROMPTS_DIR = PROJECT_ROOT / "prompts" DATA_DIR = PROJECT_ROOT / "data" / "objects" / "summoned" +# Added to every generated JSON-LD to disclose AI-generated metadata +AI_GENERATED_COMMENT = "This dataset metadata was generated by AI." + class AIClient: """Abstract base class for AI clients.""" @@ -139,6 +144,21 @@ def _retry_with_timeout(self, func, *args, **kwargs): def _extract_json_from_response(self, response: str) -> str: """Extract and validate JSON from API response.""" try: + # Remove markdown code blocks if present + if '```json' in response: + # Extract content between ```json and ``` + start = response.find('```json') + 7 + end = response.find('```', start) + if end > start: + response = response[start:end].strip() + elif '```' in response: + # Extract content between ``` and ``` + start = response.find('```') + 3 + end = response.find('```', start) + if end > start: + response = response[start:end].strip() + + # Find JSON object or array if '{' in response: start = response.find('{') end = response.rfind('}') + 1 @@ -146,6 +166,17 @@ def _extract_json_from_response(self, response: str) -> str: json_data = json.loads(json_str) # Fix spatial coverage format if needed json_data = self._fix_spatial_coverage(json_data) + # Ensure keywords is a JSON array (not semicolon/comma-separated string) + json_data = self._fix_keywords(json_data) + json_data = self._fix_encoding_format(json_data) + json_data = self._add_ai_generated_comment(json_data) + return json.dumps(json_data, indent=2) + elif '[' in response: + # Handle JSON arrays + start = response.find('[') + end = response.rfind(']') + 1 + json_str = response[start:end] + json_data = json.loads(json_str) return json.dumps(json_data, indent=2) return response except (json.JSONDecodeError, ValueError): @@ -174,6 +205,77 @@ def _fix_spatial_coverage(self, data: Dict) -> Dict: pass # If conversion fails, leave as is return data + def _fix_keywords(self, data: Dict) -> Dict: + """Ensure keywords is a JSON array of strings. Schema.org expects an array.""" + if not isinstance(data, dict) or 'keywords' not in data: + return data + val = data['keywords'] + if isinstance(val, list): + data['keywords'] = [str(item).strip() for item in val if item] + return data + if isinstance(val, str): + val = val.strip() + if not val: + return data + # Split by semicolon or comma, strip each, filter empty + parts = re.split(r'[;,]', val) + data['keywords'] = [p.strip() for p in parts if p.strip()] + return data + return data + + def _fix_encoding_format(self, data: Dict) -> Dict: + """Ensure encodingFormat is a JSON array of strings; split any string (or list element) containing ; or ,. In-place, recursive.""" + if isinstance(data, dict): + if 'encodingFormat' in data: + val = data['encodingFormat'] + if isinstance(val, str): + val = val.strip() + if val and (';' in val or ',' in val): + data['encodingFormat'] = [p.strip() for p in re.split(r'\s*[;,]\s*', val) if p.strip()] + elif val: + data['encodingFormat'] = [val] + elif isinstance(val, list): + out = [] + for item in val: + if isinstance(item, str) and item.strip(): + if ';' in item or ',' in item: + out.extend(p.strip() for p in re.split(r'\s*[;,]\s*', item) if p.strip()) + else: + out.append(item.strip()) + elif item: + out.append(str(item).strip()) + data['encodingFormat'] = out + for v in data.values(): + self._fix_encoding_format(v) + elif isinstance(data, list): + for item in data: + self._fix_encoding_format(item) + return data + + def _add_ai_generated_comment(self, data: Dict) -> Dict: + """Add a top-level comment that this dataset metadata was generated by AI. Inserts after @type.""" + if not isinstance(data, dict): + return data + existing = data.get("comment") + if existing is None: + new_comment = AI_GENERATED_COMMENT + elif isinstance(existing, list): + new_comment = existing + [AI_GENERATED_COMMENT] if AI_GENERATED_COMMENT not in existing else existing + elif existing == AI_GENERATED_COMMENT: + return data + else: + new_comment = [existing, AI_GENERATED_COMMENT] + out = {} + for k, v in data.items(): + if k == "comment": + continue + out[k] = v + if k == "@type": + out["comment"] = new_comment + if "comment" not in out: + out["comment"] = new_comment + return out + def _is_server_error(self, error: Exception) -> bool: """Check if an exception represents a server error.""" error_str = str(error).lower() @@ -226,11 +328,33 @@ def _load_prompt_template(self, filename: str) -> str: with open(prompt_path, 'r', encoding='utf-8') as f: return f.read() - def _format_detection_prompt(self, url: str, context: Dict) -> str: - """Format the dataset detection prompt with URL only (no HTML content).""" + def _format_detection_prompt(self, url: str, context: Dict, content: str = None) -> str: + """Format the dataset detection prompt. + + Args: + url: The webpage URL + context: Context dictionary from CSV + content: Optional HTML text content (for non-Gemini services) + """ template = self._load_prompt_template("dataset-detection-prompt.txt") + + # Format content section based on whether content is provided + if content: + # For services that fetch HTML: include the extracted text content + content_limited = content[:CONTENT_LIMIT_DETECTION] if len(content) > CONTENT_LIMIT_DETECTION else content + content_section = f"""**Webpage Content** (extracted text): +{content_limited} + +**Note**: The above is the extracted text content from the webpage. Please analyze it to identify datasets and extract metadata.""" + else: + # For Gemini with URL Context Tool: instruct to browse the URL + content_section = f"""**Important**: Please analyze the content at this URL: {url} + +If you have URL Context Tool access, fetch and analyze the webpage content directly. Otherwise, use your knowledge of the domain and URL structure to infer what datasets might be available.""" + return template.format( URL=url, + CONTENT_SECTION=content_section, DATASET_NAME=context.get(CSV_FIELDS['NAME'], ''), GROUP=context.get(CSV_FIELDS['GROUP'], ''), DESCRIPTION=context.get(CSV_FIELDS['DESCRIPTION'], '') @@ -261,7 +385,7 @@ def _format_generation_prompt(self, metadata: Dict, example_jsonld: str) -> str: class OpenAIClient(AIClient): """OpenAI API client.""" - def __init__(self, api_key: str, model: str = "gpt-4", base_url: str = None): + def __init__(self, api_key: str, model: str = "gpt-4o", base_url: str = None): self.client = openai.OpenAI(api_key=api_key, base_url=base_url) self.model = model @@ -288,9 +412,26 @@ def api_call(): raise ValueError("Empty response from API") return response.choices[0].message.content - def detect_datasets(self, url: str, context: Dict) -> Dict: - """Detect datasets using OpenAI by analyzing the URL directly.""" - prompt = self._format_detection_prompt(url, context) + def detect_datasets(self, url: str, context: Dict, webpage_content: str = None) -> Dict: + """Detect datasets using OpenAI by analyzing webpage content. + + Args: + url: The webpage URL + context: Context dictionary from CSV + webpage_content: Optional pre-fetched HTML text content (if None, will fetch) + """ + # Fetch and extract content if not provided + if webpage_content is None: + print(" Fetching webpage content...") + html = fetch_webpage(url) + if html: + webpage_content = extract_text_content(html) + print(f" Extracted {len(webpage_content)} characters of text content") + else: + webpage_content = "" + print(" Warning: Could not fetch webpage content") + + prompt = self._format_detection_prompt(url, context, content=webpage_content) # Debug: Log prompt size prompt_size = len(prompt) @@ -299,10 +440,26 @@ def detect_datasets(self, url: str, context: Dict) -> Dict: def call_detect(): response = self._call_api(prompt, operation="dataset detection") + # Extract JSON from response (handles responses with extra text) + json_str = self._extract_json_from_response(response) try: - return json.loads(response) - except json.JSONDecodeError: - return {"raw_response": response, "error": "Failed to parse JSON"} + parsed = json.loads(json_str) + # If the response is already a dict with 'datasets' key, return it + # Otherwise, wrap it in a dict with 'datasets' array + if isinstance(parsed, dict): + if 'datasets' not in parsed and ('name' in parsed or 'raw_response' in parsed): + # Old format: single dataset, wrap in array + parsed = {'datasets': [parsed]} + return parsed + elif isinstance(parsed, list): + # Response is already an array, wrap in dict + return {'datasets': parsed} + else: + return {"raw_response": response, "error": "Unexpected response format"} + except json.JSONDecodeError as e: + print(f" Debug: JSON parsing error: {e}") + print(f" Debug: Response preview: {response[:500]}") + return {"raw_response": response, "error": f"Failed to parse JSON: {e}"} return self._retry_with_timeout(call_detect) @@ -359,9 +516,26 @@ def api_call(): raise ValueError("Empty response from API") return response.content[0].text - def detect_datasets(self, url: str, context: Dict) -> Dict: - """Detect datasets using Anthropic by analyzing the URL directly.""" - prompt = self._format_detection_prompt(url, context) + def detect_datasets(self, url: str, context: Dict, webpage_content: str = None) -> Dict: + """Detect datasets using Anthropic by analyzing webpage content. + + Args: + url: The webpage URL + context: Context dictionary from CSV + webpage_content: Optional pre-fetched HTML text content (if None, will fetch) + """ + # Fetch and extract content if not provided + if webpage_content is None: + print(" Fetching webpage content...") + html = fetch_webpage(url) + if html: + webpage_content = extract_text_content(html) + print(f" Extracted {len(webpage_content)} characters of text content") + else: + webpage_content = "" + print(" Warning: Could not fetch webpage content") + + prompt = self._format_detection_prompt(url, context, content=webpage_content) # Debug: Log prompt size prompt_size = len(prompt) @@ -370,10 +544,26 @@ def detect_datasets(self, url: str, context: Dict) -> Dict: def call_detect(): response = self._call_api(prompt, operation="dataset detection") + # Extract JSON from response (handles responses with extra text) + json_str = self._extract_json_from_response(response) try: - return json.loads(response) - except json.JSONDecodeError: - return {"raw_response": response, "error": "Failed to parse JSON"} + parsed = json.loads(json_str) + # If the response is already a dict with 'datasets' key, return it + # Otherwise, wrap it in a dict with 'datasets' array + if isinstance(parsed, dict): + if 'datasets' not in parsed and ('name' in parsed or 'raw_response' in parsed): + # Old format: single dataset, wrap in array + parsed = {'datasets': [parsed]} + return parsed + elif isinstance(parsed, list): + # Response is already an array, wrap in dict + return {'datasets': parsed} + else: + return {"raw_response": response, "error": "Unexpected response format"} + except json.JSONDecodeError as e: + print(f" Debug: JSON parsing error: {e}") + print(f" Debug: Response preview: {response[:500]}") + return {"raw_response": response, "error": f"Failed to parse JSON: {e}"} return self._retry_with_timeout(call_detect) @@ -597,7 +787,19 @@ def load_example_jsonld() -> str: def read_csv(csv_path: str) -> List[Dict]: - """Read the datasets CSV file.""" + """Read the datasets CSV from a local file or from a URL (e.g. Google Sheets export).""" + parsed = urlparse(csv_path) + if parsed.scheme in ('http', 'https'): + try: + resp = requests.get(csv_path, timeout=30) + resp.raise_for_status() + text = resp.text + except Exception as e: + raise FileNotFoundError(f"Could not fetch CSV from URL: {e}") from e + f = io.StringIO(text) + reader = csv.DictReader(f) + datasets = list(reader) + return datasets if not os.path.exists(csv_path): raise FileNotFoundError(f"CSV file not found: {csv_path}") datasets = [] @@ -633,13 +835,17 @@ def save_jsonld(jsonld_str: str, output_dir: Path, dataset_name: str, url: str) def main(): parser = argparse.ArgumentParser(description='Generate JSON-LD for datasets') - parser.add_argument('--csv', default='datasets.csv', help='Path to CSV file') + parser.add_argument('--csv', default='datasets.csv', help='Path to CSV file or URL (e.g. Google Sheets export)') parser.add_argument('--output-dir', default='data/objects/summoned/generated', help='Output directory for JSON-LD files') parser.add_argument('--ai-service', choices=['openai', 'anthropic', 'nrp', 'gemini'], default='gemini', help='AI service to use (default: gemini)') parser.add_argument('--api-key', help='API key (or set environment variable)') parser.add_argument('--model', help='Model name (optional)') parser.add_argument('--limit', type=int, help='Limit number of datasets to process') parser.add_argument('--test-url', help='Test with a single URL instead of CSV') + parser.add_argument('--first-row', action='store_true', help='Process only the first row from CSV') + parser.add_argument('--next', dest='next_entry', action='store_true', help='Process only the next entry (one site that needs JSON-LD). Same as --first-row.') + parser.add_argument('--skip-existing', action='store_true', help='Skip sites that already have generated JSON-LD in the output folder') + parser.add_argument('--site-folder', action='store_true', default=True, help='Create folder per site and save each dataset as separate JSON-LD file') args = parser.parse_args() @@ -678,7 +884,7 @@ def main(): if not OPENAI_AVAILABLE: print("Error: openai package not installed. Run: pip install openai") sys.exit(1) - client = OpenAIClient(api_key, args.model or "gpt-4") + client = OpenAIClient(api_key, args.model or "gpt-4o") elif args.ai_service == 'nrp': if not OPENAI_AVAILABLE: print("Error: openai package not installed. Run: pip install openai") @@ -704,7 +910,10 @@ def main(): # Test mode with single URL if args.test_url: print(f"Testing with URL: {args.test_url}") - print(" Sending URL to AI for analysis (AI will browse/analyze the webpage)...") + if args.ai_service == 'gemini': + print(" Using Gemini with URL Context Tool (AI will browse/analyze the webpage directly)...") + else: + print(" Fetching webpage and extracting text content for AI analysis...") context = { CSV_FIELDS['NAME']: 'Test Dataset', CSV_FIELDS['GROUP']: 'test', @@ -734,10 +943,30 @@ def main(): and d.get(CSV_FIELDS['WEBPAGE_URL'], '').strip() ] - if args.limit: + # Optionally skip sites that already have generated output + if getattr(args, 'skip_existing', False): + still_to_process = [] + for d in to_process: + name = d.get(CSV_FIELDS['NAME'], 'Unknown') + safe_folder = "".join(c for c in name if c.isalnum() or c in (' ', '-', '_')).rstrip().replace(' ', '_') + site_dir = output_dir / safe_folder + if site_dir.exists(): + existing = list(site_dir.glob("*.jsonld")) + if existing: + print(f"Skipping (already generated): {name}") + continue + still_to_process.append(d) + to_process = still_to_process + print(f"After skipping existing: {len(to_process)} site(s) to process") + + # Process only first row / next entry if requested + if getattr(args, 'next_entry', False) or args.first_row: + to_process = to_process[:1] if to_process else [] + print("Processing only the next entry (one site) from CSV") + elif args.limit: to_process = to_process[:args.limit] - print(f"Processing {len(to_process)} datasets that need JSON-LD") + print(f"Processing {len(to_process)} site(s) that need JSON-LD") timed_out_urls = [] @@ -749,78 +978,125 @@ def main(): print(f"[{i}/{len(to_process)}] Skipping {name}: No URL") continue - print(f"\n[{i}/{len(to_process)}] Processing: {name}") + print(f"\n[{i}/{len(to_process)}] Processing Site: {name}") print(f" URL: {url}") - # Detect datasets (AI will browse/analyze the URL directly) - print(" Analyzing URL with AI (AI will browse/analyze the webpage)...") + # Create site folder if using folder structure + if args.site_folder: + # Create safe folder name from site name + safe_folder_name = "".join(c for c in name if c.isalnum() or c in (' ', '-', '_')).rstrip() + safe_folder_name = safe_folder_name.replace(' ', '_') + site_output_dir = output_dir / safe_folder_name + else: + site_output_dir = output_dir + + # Detect datasets + if args.ai_service == 'gemini': + print(" Using Gemini with URL Context Tool (AI will browse/analyze the webpage directly)...") + else: + print(" Fetching webpage and extracting text content for AI analysis...") try: detection_result = client.detect_datasets(url, dataset) print(f" Detection complete") except TimeoutError: - print(f" Error: Request timed out. Skipping this dataset.") + print(f" Error: Request timed out. Skipping this site.") timed_out_urls.append({'name': name, 'url': url, 'reason': 'timeout'}) continue except Exception as e: # Check if it's a server error if any(code in str(e).lower() for code in SERVER_ERROR_CODES): - print(f" Error: API server error during detection. Skipping this dataset.") + print(f" Error: API server error during detection. Skipping this site.") print(f" Details: {e}") timed_out_urls.append({'name': name, 'url': url, 'reason': 'server_error'}) else: - print(f" Error during detection: {e}") + print(f" Error during detection: {type(e).__name__}: {e}") + import traceback + print(f" Traceback: {traceback.format_exc()}") timed_out_urls.append({'name': name, 'url': url, 'reason': 'detection_error'}) continue - # Prepare metadata - metadata = { - 'name': name, - 'url': url, - 'description': dataset.get(CSV_FIELDS['DESCRIPTION'], ''), - 'group': dataset.get(CSV_FIELDS['GROUP'], ''), - 'creator': dataset.get(CSV_FIELDS['CREATOR'], ''), - 'provider': dataset.get(CSV_FIELDS['PROVIDER'], ''), - 'publisher': dataset.get(CSV_FIELDS['PUBLISHER'], ''), - 'keywords': dataset.get(CSV_FIELDS['KEYWORDS'], ''), - 'spatial_coverage': ( - f"{dataset.get(CSV_FIELDS['BOX_LON_MIN'], '')}," - f"{dataset.get(CSV_FIELDS['BOX_LAT_MIN'], '')}," - f"{dataset.get(CSV_FIELDS['BOX_LON_MAX'], '')}," - f"{dataset.get(CSV_FIELDS['BOX_LAT_MAX'], '')}" - if dataset.get(CSV_FIELDS['BOX_LON_MIN']) else '' - ), - 'extracted': detection_result - } + # Extract datasets from detection result + # Handle both old format (single dataset) and new format (datasets array) + detected_datasets = [] + if isinstance(detection_result, dict): + if 'datasets' in detection_result and isinstance(detection_result['datasets'], list): + # New format: array of datasets + detected_datasets = detection_result['datasets'] + elif 'name' in detection_result or 'raw_response' in detection_result: + # Old format: single dataset or error + if 'raw_response' not in detection_result: + detected_datasets = [detection_result] + else: + print(f" Warning: Detection returned raw response, may need manual review") + detected_datasets = [detection_result] + else: + # Try to treat the whole result as a single dataset + detected_datasets = [detection_result] + elif isinstance(detection_result, list): + detected_datasets = detection_result + else: + print(f" Warning: Unexpected detection result format") + detected_datasets = [{'name': name, 'raw_response': str(detection_result)}] - # Generate JSON-LD - print(" Generating JSON-LD...") - try: - jsonld = client.generate_jsonld(metadata, example_jsonld) + print(f" Found {len(detected_datasets)} dataset(s) on this webpage") + + # Process each detected dataset + for j, detected_dataset in enumerate(detected_datasets, 1): + dataset_name = detected_dataset.get('name', f'{name}_dataset_{j}') + dataset_url = detected_dataset.get('url', url) - # Validate JSON - try: - json.loads(jsonld) - print(" Valid JSON") - except json.JSONDecodeError as e: - print(f" Warning: Generated JSON may be invalid: {e}") + print(f"\n [{j}/{len(detected_datasets)}] Processing dataset: {dataset_name}") - # Save - output_path = save_jsonld(jsonld, output_dir, name, url) - print(f" Saved to: {output_path}") - except TimeoutError: - print(f" Error: Request timed out. Skipping this dataset.") - timed_out_urls.append({'name': name, 'url': url, 'reason': 'timeout'}) - continue - except Exception as e: - # Check if it's a server error - if any(code in str(e).lower() for code in SERVER_ERROR_CODES): - print(f" Error: API server error. Skipping this dataset.") - print(f" Details: {e}") - timed_out_urls.append({'name': name, 'url': url, 'reason': 'server_error'}) - else: - print(f" Error: {e}. Skipping this dataset.") - timed_out_urls.append({'name': name, 'url': url, 'reason': 'other_error'}) - continue + # Prepare metadata for this specific dataset + metadata = { + 'name': dataset_name, + 'url': dataset_url, + 'description': detected_dataset.get('description', dataset.get(CSV_FIELDS['DESCRIPTION'], '')), + 'group': dataset.get(CSV_FIELDS['GROUP'], ''), + 'creator': detected_dataset.get('creator', dataset.get(CSV_FIELDS['CREATOR'], '')), + 'provider': dataset.get(CSV_FIELDS['PROVIDER'], ''), + 'publisher': detected_dataset.get('publisher', dataset.get(CSV_FIELDS['PUBLISHER'], '')), + 'keywords': detected_dataset.get('keywords', dataset.get(CSV_FIELDS['KEYWORDS'], '')), + 'spatial_coverage': ( + detected_dataset.get('spatialCoverage', {}) if 'spatialCoverage' in detected_dataset + else ( + f"{dataset.get(CSV_FIELDS['BOX_LON_MIN'], '')},{dataset.get(CSV_FIELDS['BOX_LAT_MIN'], '')} " + f"{dataset.get(CSV_FIELDS['BOX_LON_MAX'], '')},{dataset.get(CSV_FIELDS['BOX_LAT_MAX'], '')}" + if dataset.get(CSV_FIELDS['BOX_LON_MIN']) else '' + ) + ), + 'extracted': detected_dataset + } + + # Generate JSON-LD + print(f" Generating JSON-LD...") + try: + jsonld = client.generate_jsonld(metadata, example_jsonld) + + # Validate JSON + try: + json.loads(jsonld) + print(f" Valid JSON") + except json.JSONDecodeError as e: + print(f" Warning: Generated JSON may be invalid: {e}") + + # Save + output_path = save_jsonld(jsonld, site_output_dir, dataset_name, dataset_url) + print(f" Saved to: {output_path}") + except TimeoutError: + print(f" Error: Request timed out. Skipping this dataset.") + timed_out_urls.append({'name': dataset_name, 'url': dataset_url, 'reason': 'timeout'}) + continue + except Exception as e: + # Check if it's a server error + if any(code in str(e).lower() for code in SERVER_ERROR_CODES): + print(f" Error: API server error. Skipping this dataset.") + print(f" Details: {e}") + timed_out_urls.append({'name': dataset_name, 'url': dataset_url, 'reason': 'server_error'}) + else: + print(f" Error: {e}. Skipping this dataset.") + timed_out_urls.append({'name': dataset_name, 'url': dataset_url, 'reason': 'other_error'}) + continue # Print summary of failed URLs if timed_out_urls: diff --git a/scripts/validate_jsonld.py b/scripts/validate_jsonld.py deleted file mode 100644 index 0d4fbe3..0000000 --- a/scripts/validate_jsonld.py +++ /dev/null @@ -1,218 +0,0 @@ -#!/usr/bin/env python3 -""" -Validate JSON-LD files for Schema.org Dataset compliance. - -This script validates: -1. JSON syntax -2. Schema.org structure -3. Required fields -4. Bounding box format -5. Data types -""" - -import json -import sys -from pathlib import Path -from typing import Dict, List, Optional - - -def validate_json_syntax(file_path: Path) -> tuple[bool, Optional[str]]: - """Validate JSON syntax.""" - try: - with open(file_path, 'r', encoding='utf-8') as f: - data = json.load(f) - return True, None - except json.JSONDecodeError as e: - return False, f"Invalid JSON: {e}" - except Exception as e: - return False, f"Error reading file: {e}" - - -def validate_schema_structure(data: Dict) -> List[str]: - """Validate Schema.org Dataset structure.""" - errors = [] - warnings = [] - - # Check required fields - required_fields = ['@context', '@type', '@id', 'name'] - for field in required_fields: - if field not in data: - errors.append(f"Missing required field: {field}") - - # Check @type - if '@type' in data and data['@type'] != 'Dataset': - warnings.append(f"@type is '{data['@type']}', expected 'Dataset'") - - # Check @context - if '@context' in data: - context = data['@context'] - if isinstance(context, str): - if not context.startswith('https://schema.org'): - warnings.append(f"@context should point to schema.org: {context}") - elif isinstance(context, dict): - if '@vocab' in context: - vocab = context['@vocab'] - if not vocab.startswith('https://schema.org'): - warnings.append(f"@vocab should point to schema.org: {vocab}") - - # Check spatialCoverage format - if 'spatialCoverage' in data: - spatial = data['spatialCoverage'] - if isinstance(spatial, dict): - if 'geo' in spatial: - geo = spatial['geo'] - if isinstance(geo, dict) and 'box' in geo: - box = geo['box'] - if isinstance(box, str): - # Validate box format: "west,south east,north" - parts = box.split() - if len(parts) != 2: - errors.append(f"Invalid box format: '{box}'. Expected 'west,south east,north'") - else: - try: - west_south = parts[0].split(',') - east_north = parts[1].split(',') - if len(west_south) != 2 or len(east_north) != 2: - errors.append(f"Invalid box format: '{box}'. Coordinates must be comma-separated pairs") - else: - west, south = float(west_south[0]), float(west_south[1]) - east, north = float(east_north[0]), float(east_north[1]) - - # Validate ranges - if not (-180 <= west <= 180): - errors.append(f"Invalid west longitude: {west} (must be -180 to 180)") - if not (-180 <= east <= 180): - errors.append(f"Invalid east longitude: {east} (must be -180 to 180)") - if not (-90 <= south <= 90): - errors.append(f"Invalid south latitude: {south} (must be -90 to 90)") - if not (-90 <= north <= 90): - errors.append(f"Invalid north latitude: {north} (must be -90 to 90)") - if west >= east: - errors.append(f"West ({west}) must be less than East ({east})") - if south >= north: - errors.append(f"South ({south}) must be less than North ({north})") - except ValueError as e: - errors.append(f"Invalid box format: '{box}'. {e}") - - # Check distribution format - if 'distribution' in data: - dist = data['distribution'] - if isinstance(dist, list): - for i, item in enumerate(dist): - if not isinstance(item, dict): - errors.append(f"Distribution[{i}] must be an object") - elif '@type' not in item: - warnings.append(f"Distribution[{i}] missing @type (should be 'DataDownload')") - elif isinstance(dist, dict): - if '@type' not in dist: - warnings.append("Distribution missing @type (should be 'DataDownload')") - - return errors, warnings - - -def validate_data_types(data: Dict) -> List[str]: - """Validate data types for common fields.""" - warnings = [] - - # Check datePublished format - if 'datePublished' in data: - date = data['datePublished'] - if isinstance(date, str): - # Should be ISO 8601 format (YYYY-MM-DD) - if len(date) < 10 or date[4] != '-' or date[7] != '-': - warnings.append(f"datePublished format may be incorrect: '{date}' (expected YYYY-MM-DD)") - - # Check version - if 'version' in data: - version = data['version'] - if not isinstance(version, str): - warnings.append(f"version should be a string, got {type(version)}") - - # Check license - if 'license' in data: - license_val = data['license'] - if isinstance(license_val, str): - if not license_val.startswith('http'): - warnings.append(f"license should be a URL: '{license_val}'") - elif isinstance(license_val, list): - for i, lic in enumerate(license_val): - if isinstance(lic, str) and not lic.startswith('http'): - warnings.append(f"license[{i}] should be a URL: '{lic}'") - elif isinstance(lic, dict) and 'url' in lic: - url = lic['url'] - if not url.startswith('http'): - warnings.append(f"license[{i}].url should be a URL: '{url}'") - - return warnings - - -def main(): - if len(sys.argv) < 2: - print("Usage: python validate_jsonld.py ") - sys.exit(1) - - file_path = Path(sys.argv[1]) - - if not file_path.exists(): - print(f"[ERROR] File not found: {file_path}") - sys.exit(1) - - print(f"Validating: {file_path}") - print("=" * 60) - - # Validate JSON syntax - is_valid, error = validate_json_syntax(file_path) - if not is_valid: - print(f"[ERROR] JSON Syntax Error: {error}") - sys.exit(1) - - print("[OK] Valid JSON syntax") - - # Load data - with open(file_path, 'r', encoding='utf-8') as f: - data = json.load(f) - - # Validate Schema.org structure - errors, warnings = validate_schema_structure(data) - - # Validate data types - type_warnings = validate_data_types(data) - warnings.extend(type_warnings) - - # Print results - if errors: - print("\n[ERROR] Errors found:") - for error in errors: - print(f" - {error}") - - if warnings: - print("\n[WARNING] Warnings:") - for warning in warnings: - print(f" - {warning}") - - if not errors and not warnings: - print("\n[SUCCESS] All validations passed!") - print("\nSummary:") - print(f" - Type: {data.get('@type', 'N/A')}") - print(f" - Name: {data.get('name', 'N/A')[:60]}...") - if 'spatialCoverage' in data: - spatial = data['spatialCoverage'] - if isinstance(spatial, dict) and 'geo' in spatial: - geo = spatial['geo'] - if isinstance(geo, dict) and 'box' in geo: - print(f" - Bounding Box: {geo['box']}") - if 'distribution' in data: - dist = data['distribution'] - count = len(dist) if isinstance(dist, list) else 1 - print(f" - Distribution entries: {count}") - sys.exit(0) - elif errors: - print(f"\n[FAILED] Validation failed with {len(errors)} error(s)") - sys.exit(1) - else: - print(f"\n[PASSED] Validation passed with {len(warnings)} warning(s)") - sys.exit(0) - - -if __name__ == '__main__': - main() diff --git a/scripts/validate_jsonld_batch.py b/scripts/validate_jsonld_batch.py deleted file mode 100644 index 0d55d12..0000000 --- a/scripts/validate_jsonld_batch.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python3 -""" -Batch validate JSON-LD files in a directory. - -Usage: - python scripts/validate_jsonld_batch.py [--exclude ] -""" - -import sys -import subprocess -from pathlib import Path - -# Resolve path to validate_jsonld.py (same directory as this script) -SCRIPT_DIR = Path(__file__).resolve().parent -VALIDATE_SCRIPT = SCRIPT_DIR / "validate_jsonld.py" - - -def main(): - if len(sys.argv) < 2: - print("Usage: python scripts/validate_jsonld_batch.py [--exclude ]") - sys.exit(1) - - directory = Path(sys.argv[1]) - exclude_pattern = None - if len(sys.argv) >= 4 and sys.argv[2] == "--exclude": - exclude_pattern = sys.argv[3] - - if not directory.exists(): - print(f"Directory {directory} does not exist. Skipping validation.") - sys.exit(0) - - if not directory.is_dir(): - print(f"{directory} is not a directory.") - sys.exit(1) - - # Find all JSON-LD files - jsonld_files = list(directory.rglob("*.jsonld")) - - # Filter out excluded paths - if exclude_pattern: - jsonld_files = [f for f in jsonld_files if exclude_pattern not in str(f)] - - if not jsonld_files: - print(f"No JSON-LD files found in {directory}") - sys.exit(0) - - print(f"Found {len(jsonld_files)} JSON-LD file(s) to validate") - print("=" * 60) - - failed = False - for file_path in sorted(jsonld_files): - print(f"\nValidating: {file_path}") - result = subprocess.run( - [sys.executable, str(VALIDATE_SCRIPT), str(file_path)], - capture_output=False, - cwd=SCRIPT_DIR.parent # run from repo root so paths resolve - ) - if result.returncode != 0: - failed = True - - print("\n" + "=" * 60) - if failed: - print("Some files failed validation") - sys.exit(1) - else: - print(f"All {len(jsonld_files)} file(s) validated successfully") - sys.exit(0) - - -if __name__ == "__main__": - main() From be48884d2020716cd6b85607c4469b749df80d62 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 16 Feb 2026 11:08:29 -0600 Subject: [PATCH 39/58] added the validation script --- scripts/validate_jsonld_batch.py | 182 +++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 scripts/validate_jsonld_batch.py diff --git a/scripts/validate_jsonld_batch.py b/scripts/validate_jsonld_batch.py new file mode 100644 index 0000000..2768c0e --- /dev/null +++ b/scripts/validate_jsonld_batch.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python3 +""" +Validate JSON-LD files under a directory (e.g. data/objects/summoned/generated). +Checks: valid JSON, @context, @type, name; spatialCoverage box format; distribution encodingFormat as array. +WebPage and DataCatalog are accepted with a warning (expected Dataset for dataset files). +Exits 0 if all pass, 1 if any file fails. +""" +import json +import sys +from pathlib import Path + + +def get_box_string(data): + """Extract spatialCoverage box string if present.""" + sc = data.get("spatialCoverage") + if not isinstance(sc, dict): + return None + geo = sc.get("geo") + if not isinstance(geo, dict): + return None + box = geo.get("box") + if isinstance(box, str): + return box.strip() + return None + + +def validate_box(box_str): + """Validate Schema.org box format: 'west,south east,north'. Returns (True, None) or (False, error_msg).""" + if not box_str: + return True, None + parts = box_str.split() + if len(parts) == 2: + try: + ws = parts[0].split(",") + en = parts[1].split(",") + if len(ws) == 2 and len(en) == 2: + west, south = float(ws[0]), float(ws[1]) + east, north = float(en[0]), float(en[1]) + if -90 <= south <= 90 and -90 <= north <= 90 and -180 <= west <= 180 and -180 <= east <= 180: + return True, None + return False, "box out of range" + except ValueError: + return False, "invalid box format" + elif len(parts) == 4: + try: + a, b, c, d = float(parts[0]), float(parts[1]), float(parts[2]), float(parts[3]) + if -90 <= b <= 90 and -90 <= d <= 90: + west, south, east, north = a, b, c, d + else: + south, west, north, east = a, b, c, d + if -90 <= south <= 90 and -90 <= north <= 90 and -180 <= west <= 180 and -180 <= east <= 180: + return True, None + return False, "box out of range" + except ValueError: + return False, "invalid box numbers" + return False, "box expected 2 or 4 numbers" + + +def check_distribution_encoding_format(data): + """Check that each distribution has encodingFormat as array. Returns list of error strings (empty if ok).""" + errs = [] + dist = data.get("distribution") + if not isinstance(dist, list): + return errs + for i, item in enumerate(dist): + if not isinstance(item, dict): + continue + ef = item.get("encodingFormat") + if ef is None: + continue + if isinstance(ef, str): + errs.append(f"distribution[{i}].encodingFormat must be array, got string") + elif not isinstance(ef, list): + errs.append(f"distribution[{i}].encodingFormat must be array, got {type(ef).__name__}") + return errs + + +def validate_file(path: Path) -> tuple[bool, list]: + """ + Validate one JSON-LD file. Returns (success: bool, list of warning/error messages). + success=False means hard failure (invalid JSON or missing required fields). + """ + errors = [] + warnings = [] + try: + with open(path, "r", encoding="utf-8") as f: + data = json.load(f) + except json.JSONDecodeError as e: + return False, [f"JSON Syntax Error: Invalid JSON: {e}"] + except Exception as e: + return False, [str(e)] + + # Required keys + for key in ["@context", "@type", "name"]: + if key not in data: + errors.append(f"missing '{key}'") + if errors: + return False, errors + + # @type: Dataset expected; WebPage and DataCatalog allowed with warning + dtype = data.get("@type", "") + if dtype not in ("Dataset", "DataCatalog", "WebPage"): + if dtype: + warnings.append(f"@type is '{dtype}', expected 'Dataset'") + elif dtype in ("WebPage", "DataCatalog"): + warnings.append(f"@type is '{dtype}', expected 'Dataset'") + + # spatialCoverage box + box_str = get_box_string(data) + if box_str: + ok, msg = validate_box(box_str) + if not ok: + errors.append(f"spatialCoverage box: {msg}") + + # distribution encodingFormat must be array + ef_errs = check_distribution_encoding_format(data) + errors.extend(ef_errs) + + all_msgs = errors + warnings + return len(errors) == 0, all_msgs + + +def main(): + if len(sys.argv) < 2: + print("Usage: python validate_jsonld_batch.py ", file=sys.stderr) + sys.exit(2) + root = Path(sys.argv[1]) + if not root.is_dir(): + print(f"Error: not a directory: {root}", file=sys.stderr) + sys.exit(2) + + files = sorted(root.rglob("*.jsonld")) + print(f"Found {len(files)} JSON-LD file(s) to validate") + print("=" * 60) + + failed = [] + for path in files: + rel = path.as_posix() + print(f"\nValidating: {rel}") + print("=" * 60) + success, messages = validate_file(path) + if not success: + for m in messages: + print(f"Error: {m}") + failed.append(rel) + continue + print("[OK] Valid JSON syntax") + if messages: + print("Warning: Warnings:") + for m in messages: + print(f" - {m}") + print("[PASSED] Validation passed with warning(s)") + else: + print("[SUCCESS] All validations passed!") + # Summary + try: + with open(path, "r", encoding="utf-8") as f: + data = json.load(f) + name = data.get("name", "") + if len(name) > 50: + name = name[:47] + "..." + print("Summary:") + print(f" - Type: {data.get('@type', '')}") + print(f" - Name: {name}...") + box_str = get_box_string(data) + if box_str: + print(f" - Bounding Box: {box_str}") + dist = data.get("distribution") + if isinstance(dist, list): + print(f" - Distribution entries: {len(dist)}") + except Exception: + pass + print("\n" + "=" * 60) + if failed: + print("Some files failed validation") + sys.exit(1) + print("All validations passed.") + sys.exit(0) + + +if __name__ == "__main__": + main() From 7cbafe10a2a7e0e9592cd7cffcdc018734414072 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 16 Feb 2026 11:16:42 -0600 Subject: [PATCH 40/58] updated encodingFormat to string array --- .../CHELSA/chelsa_cerra_daily.jsonld | 4 ++- .../chelsa_ch_highres_climatologies.jsonld | 4 ++- .../CHELSA/chelsa_ch_highres_daily.jsonld | 4 ++- .../generated/CHELSA/chelsa_daily.jsonld | 4 ++- .../generated/CHELSA/chelsa_monthly.jsonld | 4 ++- .../generated/CHELSA/chelsa_w5e5_daily.jsonld | 4 ++- .../consensus-land-cover.jsonld | 8 +++-- .../objects/summoned/generated/GFC/gfc.jsonld | 36 ++++++++++++++----- .../global-tree-density.jsonld | 12 +++++-- .../generated/MERIT_DEM/merit-dem.jsonld | 12 +++++-- .../summoned/generated/MRLC_NLCD/rcmap.jsonld | 12 +++++-- .../TerraClimate/terraclimate.jsonld | 16 ++++++--- 12 files changed, 90 insertions(+), 30 deletions(-) diff --git a/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld index e3390bb..a4ab667 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld @@ -45,7 +45,9 @@ "name": "CHELSAcerra-daily downloads (portal)", "description": "Download portal linked from the CHELSA catalog entry for CHELSAcerra-daily.", "contentUrl": "https://envicloud.wsl.ch/", - "encodingFormat": "text/html" + "encodingFormat": [ + "text/html" + ] } ], "citation": "Karger, D. N. and Janzing, J. (2025). CHELSAcerra-daily. EnviDat. https://doi.org/10.16904/envidat.703", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld index 9bb3d97..cf623e8 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld @@ -65,7 +65,9 @@ "name": "CHELSAch-highres-climatologies downloads (NetCDF)", "description": "Download portal for CHELSAch-highres-climatologies.", "contentUrl": "https://envicloud.wsl.ch/", - "encodingFormat": "application/x-netcdf" + "encodingFormat": [ + "application/x-netcdf" + ] } ], "citation": "Karger, D. N.; Conrad, O.; Böhner, J.; Kawohl, T.; Kreft, H.; Soria-Auza, R. W.; Zimmermann, N. E.; Linder, H. P.; Kessler, M. (2017). Climatologies at high resolution for the earth's land surface areas. Scientific Data, 4, 170122. https://doi.org/10.1038/sdata.2017.122", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld index 6375014..c07d6db 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld @@ -65,7 +65,9 @@ "name": "CHELSAch-highres-daily downloads (NetCDF)", "description": "Download portal for CHELSAch-highres-daily.", "contentUrl": "https://envicloud.wsl.ch/", - "encodingFormat": "application/x-netcdf" + "encodingFormat": [ + "application/x-netcdf" + ] } ], "citation": "Zilker, F., Karger, D. N. (2025). CHELSAch-highres-daily climate data at high resolution. EnviDat. https://www.doi.org/10.16904/envidat.688", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld index 95d782c..c24e080 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld @@ -119,7 +119,9 @@ "name": "Catalog landing page (downloads)", "description": "Alternative CHELSA downloads landing page.", "contentUrl": "https://chelsa-climate.org/downloads/", - "encodingFormat": "text/html" + "encodingFormat": [ + "text/html" + ] } ], "citation": "Karger, D. N.; Conrad, O.; Böhner, J.; Kawohl, T.; Kreft, H.; Soria-Auza, R. W.; Zimmermann, N. E.; Linder, H. P.; Kessler, M. (2017). Climatologies at high resolution for the earth's land surface areas. Scientific Data, 4, 170122. https://doi.org/10.1038/sdata.2017.122", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld index 9114404..fb9d384 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld @@ -82,7 +82,9 @@ "name": "Catalog landing page (downloads)", "description": "Alternative CHELSA downloads landing page.", "contentUrl": "https://chelsa-climate.org/downloads/", - "encodingFormat": "text/html" + "encodingFormat": [ + "text/html" + ] } ], "citation": "Karger, D. N.; Conrad, O.; Böhner, J.; Kawohl, T.; Kreft, H.; Soria-Auza, R. W.; Zimmermann, N. E.; Linder, H. P.; Kessler, M. (2017). Climatologies at high resolution for the earth's land surface areas. Scientific Data, 4, 170122. https://doi.org/10.1038/sdata.2017.122", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld index 560ec9e..c214eb7 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld @@ -51,7 +51,9 @@ "name": "CHELSA-W5E5-daily downloads (ISIMIP portal)", "description": "The catalog links the W5E5-based CHELSA daily product to the ISIMIP data portal.", "contentUrl": "https://data.isimip.org/", - "encodingFormat": "text/html" + "encodingFormat": [ + "text/html" + ] } ], "encodingFormat": [ diff --git a/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld b/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld index 91045fa..8c0099b 100644 --- a/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld +++ b/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld @@ -135,14 +135,18 @@ "name": "Full Version 1.0 downloads (with DISCover) – directory", "description": "Directory containing GeoTIFF layers (one per class) for Full Version 1.0 (with DISCover/GLCC).", "contentUrl": "https://data.earthenv.org/consensus_landcover/with_DISCover/", - "encodingFormat": "text/html" + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "Reduced Version 1.0 downloads (without DISCover) – directory", "description": "Directory containing GeoTIFF layers (one per class) for Reduced Version 1.0 (without DISCover/GLCC).", "contentUrl": "https://data.earthenv.org/consensus_landcover/without_DISCover/", - "encodingFormat": "text/html" + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", diff --git a/data/objects/summoned/generated/GFC/gfc.jsonld b/data/objects/summoned/generated/GFC/gfc.jsonld index 43a5ba5..0f14af1 100644 --- a/data/objects/summoned/generated/GFC/gfc.jsonld +++ b/data/objects/summoned/generated/GFC/gfc.jsonld @@ -165,7 +165,9 @@ "name": "Direct download page (tile selection map and links)", "description": "Interactive download instructions and example granule URLs for 10×10 degree GeoTIFF tiles.", "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/download.html", - "encodingFormat": "text/html" + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", @@ -227,51 +229,67 @@ "name": "Layer-wide URL list: treecover2000", "description": "Text file listing URLs for all granules for the treecover2000 layer.", "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/treecover2000.txt", - "encodingFormat": "text/plain" + "encodingFormat": [ + "text/plain" + ] }, { "@type": "DataDownload", "name": "Layer-wide URL list: gain", "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/gain.txt", - "encodingFormat": "text/plain" + "encodingFormat": [ + "text/plain" + ] }, { "@type": "DataDownload", "name": "Layer-wide URL list: lossyear", "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/lossyear.txt", - "encodingFormat": "text/plain" + "encodingFormat": [ + "text/plain" + ] }, { "@type": "DataDownload", "name": "Layer-wide URL list: datamask", "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/datamask.txt", - "encodingFormat": "text/plain" + "encodingFormat": [ + "text/plain" + ] }, { "@type": "DataDownload", "name": "Layer-wide URL list: first", "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/first.txt", - "encodingFormat": "text/plain" + "encodingFormat": [ + "text/plain" + ] }, { "@type": "DataDownload", "name": "Layer-wide URL list: last", "contentUrl": "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/last.txt", - "encodingFormat": "text/plain" + "encodingFormat": [ + "text/plain" + ] }, { "@type": "DataDownload", "name": "Google Earth Engine access (asset)", "description": "Analyze the dataset directly in Google Earth Engine using asset ID UMD/hansen/global_forest_change_2023_v1_11.", "contentUrl": "https://developers.google.com/earth-engine/datasets/catalog/UMD_hansen_global_forest_change_2023_v1_11", - "encodingFormat": "text/html" + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "Web visualization (recommended linking URL)", "description": "Interactive visualization site recommended by the dataset providers for linking/citation.", "contentUrl": "https://glad.earthengine.app/view/global-forest-change", - "encodingFormat": "text/html" + "encodingFormat": [ + "text/html" + ] } ], "citation": [ diff --git a/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld b/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld index c5eebcc..0d386d3 100644 --- a/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld +++ b/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld @@ -131,21 +131,27 @@ "name": "Primary download (ZIP; ArcGIS File Geodatabase + supporting ArcGIS files)", "description": "Primary dataset package (zipped) containing an ArcGIS File Geodatabase (.gdb) with two rasters (biome-level and ecoregion-level), plus supporting ArcGIS layer files (.lyr) and a map document (.mxd).", "contentUrl": "https://elischolar.library.yale.edu/context/yale_fes_data/article/1000/type/native/viewcontent", - "encodingFormat": "application/zip" + "encodingFormat": [ + "application/zip" + ] }, { "@type": "DataDownload", "name": "Revision 01 (small islands) ZIP", "description": "Revision_01 adds tree density predictions for small islands not included in the primary download; follows the original file structure.", "contentUrl": "https://elischolar.library.yale.edu/cgi/viewcontent.cgi?article=1000&context=yale_fes_data&filename=0&type=additional", - "encodingFormat": "application/zip" + "encodingFormat": [ + "application/zip" + ] }, { "@type": "DataDownload", "name": "Revision 01 WGS84 GeoTIFF ZIP", "description": "Revision_01 biome-level model stored in WGS84 GeoTIFF format (reprojected from Goode Homolosine using nearest-neighbor resampling); includes the primary .tif and visualization support files.", "contentUrl": "https://elischolar.library.yale.edu/cgi/viewcontent.cgi?article=1000&context=yale_fes_data&filename=1&type=additional", - "encodingFormat": "application/zip" + "encodingFormat": [ + "application/zip" + ] } ], "citation": [ diff --git a/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld b/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld index ad90a53..11b4972 100644 --- a/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld +++ b/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld @@ -90,7 +90,9 @@ "@type": "DataDownload", "name": "MERIT DEM – ESRI EHdr (FLT) packages (30°×30° tar.gz)", "description": "Elevation tiles distributed as 30°×30° packages containing 5°×5° tiles (6000×6000 pixels). ESRI FLT rasters with HDR (Fortran Direct Access / ESRI FLT style; 4-byte float, little endian). Package names encode the lower-left corner (e.g., dem_flt_n30w120.tar.gz contains tiles for N30–N60, W120–W090). Filenames encode the center of the lower-left pixel (e.g., n30w120_dem.* covers N30–N35, W120–W115). Access may require registration/password per the provider instructions.", - "encodingFormat": "application/octet-stream", + "encodingFormat": [ + "application/octet-stream" + ], "contentUrl": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/dem_flt_n30w120.tar.gz", "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" }, @@ -109,7 +111,9 @@ "@type": "DataDownload", "name": "MERIT DEM – MRR (single merged raster)", "description": "Single merged MRR raster created by merging all MERIT DEM source tiles; vertical resolution converted to 0.01 meters by rounding to the nearest centimeter. MRR rasters can be displayed in the MapInfo Pro GIS platform. Prepared by Sam Roberts (Roberts Geospatial).", - "encodingFormat": "application/octet-stream", + "encodingFormat": [ + "application/octet-stream" + ], "contentUrl": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/MERIT_DEM.mrr", "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" }, @@ -117,7 +121,9 @@ "@type": "DataDownload", "name": "MERIT DEM – 5°×5° tile download page (GeoTIFF tiles)", "description": "Alternative download page for individual 5°×5° tiles (useful when large package downloads are difficult). Tile filenames encode the center of the lower-left pixel (e.g., n30w120_dem.tif covers N30–N35, W120–W115). Access may require registration/password per the provider instructions.", - "encodingFormat": "text/html", + "encodingFormat": [ + "text/html" + ], "contentUrl": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/list_5deg.html", "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/list_5deg.html" } diff --git a/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld b/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld index 4d5209a..98908a8 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld @@ -82,21 +82,27 @@ "@type": "DataDownload", "name": "Example component download: Bare Ground (2011–2024 ZIP)", "contentUrl": "https://www.mrlc.gov/downloads/sciweb1/shared/mrlc/data-bundles/Bare_Ground_2011_2024.zip", - "encodingFormat": "application/zip", + "encodingFormat": [ + "application/zip" + ], "description": "Example direct ZIP download link for an RCMAP component time-series." }, { "@type": "DataDownload", "name": "Example component download: Herbaceous (2011–2024 ZIP)", "contentUrl": "https://www.mrlc.gov/downloads/sciweb1/shared/mrlc/data-bundles/Herbaceous_2011_2024.zip", - "encodingFormat": "application/zip", + "encodingFormat": [ + "application/zip" + ], "description": "Example direct ZIP download link for an RCMAP component time-series." }, { "@type": "DataDownload", "name": "RCMAP FGDC metadata (XML)", "contentUrl": "https://www.mrlc.gov/downloads/sciweb1/shared/mrlc/metadata/RCMAP_V7_FGDC_Metadata.xml", - "encodingFormat": "application/xml", + "encodingFormat": [ + "application/xml" + ], "description": "FGDC metadata for RCMAP products." }, { diff --git a/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld b/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld index 1fc7d14..d6619f6 100644 --- a/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld +++ b/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld @@ -179,28 +179,36 @@ "@type": "DataDownload", "name": "TerraClimate THREDDS catalog (all data, summaries, climatologies, and +2°C/+4°C scenarios)", "description": "Browsable THREDDS catalog containing annual/monthly NetCDF files and aggregated products, including summaries, climatologies, and climate futures (+2°C and +4°C).", - "encodingFormat": "text/html", + "encodingFormat": [ + "text/html" + ], "contentUrl": "https://tds-proxy.nkn.uidaho.edu/thredds/catalog/TERRACLIMATE_ALL/catalog.html" }, { "@type": "DataDownload", "name": "TerraClimate THREDDS aggregated catalog (monthly aggregations by variable)", "description": "THREDDS aggregated catalogs providing service endpoints (OPeNDAP/NetCDF Subset/NCSS/WMS/WCS) for 1958–current-year monthly aggregations by variable.", - "encodingFormat": "text/html", + "encodingFormat": [ + "text/html" + ], "contentUrl": "https://tds-proxy.nkn.uidaho.edu/thredds/terraclimate_aggregated.html" }, { "@type": "DataDownload", "name": "Example OP(e)NDAP access endpoint (aggregated monthly variable file)", "description": "Example OPeNDAP endpoint for an aggregated monthly variable NetCDF. Replace the variable/file identifier as needed for other variables and products available in the THREDDS aggregated catalog.", - "encodingFormat": "application/x-netcdf", + "encodingFormat": [ + "application/x-netcdf" + ], "contentUrl": "https://tds-proxy.nkn.uidaho.edu/thredds/dodsC/agg_terraclimate_tmax_1958_CurrentYear_GLOBE.nc" }, { "@type": "DataDownload", "name": "Google Earth Engine ImageCollection: IDAHO_EPSCOR/TERRACLIMATE", "description": "TerraClimate is also available as a Google Earth Engine ImageCollection for cloud-based analysis and visualization.", - "encodingFormat": "text/html", + "encodingFormat": [ + "text/html" + ], "contentUrl": "https://developers.google.com/earth-engine/datasets/catalog/IDAHO_EPSCOR_TERRACLIMATE" } ], From f921268fd9fe95112781c5a768d01d9cacd3f7b7 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 16 Feb 2026 11:20:53 -0600 Subject: [PATCH 41/58] updated .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index c569d3d..35eb694 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .idea +.vscode # Python __pycache__/ From d9f688db294fca8795c7f260ef9b7802ec11c977 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 16 Feb 2026 11:24:07 -0600 Subject: [PATCH 42/58] removed unnecessary files --- .gitignore | 1 - README.md | 4 ---- mkdocs.yaml | 5 ----- 3 files changed, 10 deletions(-) delete mode 100644 mkdocs.yaml diff --git a/.gitignore b/.gitignore index 35eb694..c569d3d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ .idea -.vscode # Python __pycache__/ diff --git a/README.md b/README.md index fe7c0a3..44c6c6b 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,3 @@ Documentation, files and code related to the exposure of resource on the web for indexing. -Sitemaps: -All in data/object/summoned : https://earthcube.github.io/communityCollections/data/objects/summoned/sitemap.xml - -The AI Generated JSON-LD sitemap: https://earthcube.github.io/communityCollections/data/objects/summoned/generated/sitemap.xml diff --git a/mkdocs.yaml b/mkdocs.yaml deleted file mode 100644 index 7c4eec1..0000000 --- a/mkdocs.yaml +++ /dev/null @@ -1,5 +0,0 @@ -site_name: Geocodes Metadata -site_url: https://earthcube.github.io/communityCollections/site/ -#theme: 'material' -#theme: 'mkdocs' - From d1c76e55a8aa9f1a165153b4656365108ee8377d Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 16 Feb 2026 11:26:06 -0600 Subject: [PATCH 43/58] retreieved delete file --- collection/stac-broswer/sitemap.xml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 collection/stac-broswer/sitemap.xml diff --git a/collection/stac-broswer/sitemap.xml b/collection/stac-broswer/sitemap.xml new file mode 100644 index 0000000..3d82292 --- /dev/null +++ b/collection/stac-broswer/sitemap.xml @@ -0,0 +1,9 @@ + + + + https://radiantearth.github.io/stac-browser/#/external/raw.githubusercontent.com/addelany/neon4cast-catalog/main/stac/phenology/collection.json?.language=en&.asset=asset-thumbnail + 2005-01-01 + monthly + 0.8 + + \ No newline at end of file From 7a92a6d904adad4f24bb01c0ed988a41213ea6d4 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 16 Feb 2026 11:27:53 -0600 Subject: [PATCH 44/58] Retrieved mkdocs.yaml --- mkdocs.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 mkdocs.yaml diff --git a/mkdocs.yaml b/mkdocs.yaml new file mode 100644 index 0000000..7c4eec1 --- /dev/null +++ b/mkdocs.yaml @@ -0,0 +1,5 @@ +site_name: Geocodes Metadata +site_url: https://earthcube.github.io/communityCollections/site/ +#theme: 'material' +#theme: 'mkdocs' + From 6e340f5f597ca9c26d5acebceb4aa467ae530154 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 16 Feb 2026 11:36:00 -0600 Subject: [PATCH 45/58] updated README --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 44c6c6b..6a24593 100644 --- a/README.md +++ b/README.md @@ -5,3 +5,8 @@ Documentation, files and code related to the exposure of resource on the web for indexing. +Sitemaps: +All in data/object/summoned : https://earthcube.github.io/communityCollections/data/objects/summoned/sitemap.xml + + +The AI Generated JSON-LD sitemap: https://earthcube.github.io/communityCollections/data/objects/summoned/generated/sitemap.xml \ No newline at end of file From 0e0a91243efa8a96d3f06177188b42cfe033e98b Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 16 Feb 2026 11:39:27 -0600 Subject: [PATCH 46/58] test sitemap generation --- .github/workflows/sitemap_resources.yaml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/sitemap_resources.yaml b/.github/workflows/sitemap_resources.yaml index 6033238..e20e98f 100644 --- a/.github/workflows/sitemap_resources.yaml +++ b/.github/workflows/sitemap_resources.yaml @@ -6,6 +6,9 @@ on: - main - 3-generate-jsonld-datasets-from-websites +permissions: + contents: write + jobs: sitemap_job: runs-on: ubuntu-latest @@ -19,7 +22,7 @@ jobs: - name: Checkout the repo uses: actions/checkout@v6 with: - fetch-depth: 1 + fetch-depth: 0 # Generate single sitemap for all JSON-LD files in data and collection directories - name: Generate sitemap for all JSON-LD resources @@ -50,6 +53,14 @@ jobs: additional-extensions: jsonld json xml exclude-paths: .git .github docs scripts crawler prompts .vscode + + - name: Commit and push sitemaps to branch + run: | + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git add data/objects/summoned/sitemap.xml data/objects/summoned/generated/sitemap.xml + git diff --staged --quiet || (git commit -m "chore: update JSON-LD sitemaps" && git push) + ### WE MIGHT WANT TO DO INDIVIDUAL SITEMAPS # - name: Generate sitemap for all JSON-LD resources # id: sitemap_glim From 952b2938643dbeff1c2a88309d361c8b368d40f7 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 16 Feb 2026 17:40:23 +0000 Subject: [PATCH 47/58] chore: update JSON-LD sitemaps --- data/objects/summoned/generated/sitemap.xml | 223 +++++++++++++++ data/objects/summoned/sitemap.xml | 287 ++++++++++++++++++++ 2 files changed, 510 insertions(+) create mode 100644 data/objects/summoned/generated/sitemap.xml create mode 100644 data/objects/summoned/sitemap.xml diff --git a/data/objects/summoned/generated/sitemap.xml b/data/objects/summoned/generated/sitemap.xml new file mode 100644 index 0000000..42cb5a6 --- /dev/null +++ b/data/objects/summoned/generated/sitemap.xml @@ -0,0 +1,223 @@ + + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/datacatalog.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/webpage.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Consensus_Land_Cover/webpage.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/FLO1K/flo1k.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/FLO1K/webpage.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/G-RUN/g-run.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/G-RUN/webpage.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/GFC/gfc.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/GFC/webpage.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/GHSL/ghsl.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/GHSL/webpage.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/GPP_MOD17/gpp_mod17.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/GPP_MOD17/webpage.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/GRACE-REC/grace-rec.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/GRACE-REC/webpage.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/global-multi-layer-soil-moisture.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/webpage.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Global_Tree_Density/webpage.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/HydroSHEDS/hydrosheds.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/HydroSHEDS/webpage.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Hydrography90m/hydrography90m.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Hydrography90m/webpage.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/MRLC_NLCD/datacatalog.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/MRLC_NLCD/webpage.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Shale_Network/shale-network.jsonld +2026-02-16T11:01:33-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Shale_Network/webpage.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/SoilGrids2/soilgrids2.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/SoilGrids2/webpage.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/TerraClimate/webpage.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/WATERBASE/waterbase.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/WATERBASE/webpage.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Water_Quality_Portal/water-quality-portal.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Water_Quality_Portal/webpage.jsonld +2026-02-16T10:46:20-06:00 + + diff --git a/data/objects/summoned/sitemap.xml b/data/objects/summoned/sitemap.xml new file mode 100644 index 0000000..7205d29 --- /dev/null +++ b/data/objects/summoned/sitemap.xml @@ -0,0 +1,287 @@ + + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/glim/1560991f9071fd8c168bf8bff8d5abcdaf7d3370.jsonld +2023-11-09T11:27:19-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/glim/42cc768d19310666e11275e0ef420914c5d10868.jsonld +2023-11-09T11:27:19-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/glim/5cbde8720e62b2e74ea38d7cdfdafd747405093f.jsonld +2023-11-09T11:27:19-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/glim/b828dd7f22d629058b801d915af937420b4de1ab.jsonld +2023-11-09T11:27:19-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/glim/fc3fbb167c441f59021612f8261b51694b49904b.jsonld +2023-11-09T11:27:19-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/gpp/2d78c4242a108f70ea2c0604964dc095b34bfd7b.jsonld +2023-11-09T11:27:19-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/gpp/73b23fc49e03311c4a4abf85208ae8a0fd114aa7.jsonld +2023-11-09T11:27:19-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/gpp/a1e2f82711848e68b9e20dbd8019b3360e4e2ad5.jsonld +2023-11-09T11:27:19-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/gpp/bc4d190917216d7d0db70cd10d64f3e7a1d54d03.jsonld +2023-11-09T11:27:19-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/gpp/ef65272b4c4da100943c1142f85457f41f9f03c5.jsonld +2023-11-09T11:27:19-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/nitrogen/4ef2e6a6cdec4cb04e059b588a790f2817aa9959.jsonld +2023-11-09T11:27:19-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/nitrogen2/5588a6fb892f4d5b0a21b503b63dc0586c256de8.jsonld +2023-11-09T11:27:19-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/nitrogen2/70eeed8a265bb642c401e0e51c98103a875b6fb1.jsonld +2023-11-09T11:27:19-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/nitrogen2/a1e2f82711848e68b9e20dbd8019b3360e4e2ad5.jsonld +2023-11-09T11:27:19-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/nitrogen2/eba7bc7d2f6dc818108833457f776e065ae9521c.jsonld +2023-11-09T11:27:19-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/stac/62431785c83ca0c826bdb5cca1dd9eeb3cc0655b.jsonld +2023-11-09T11:27:19-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/datacatalog.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/webpage.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Consensus_Land_Cover/webpage.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/FLO1K/flo1k.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/FLO1K/webpage.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/G-RUN/g-run.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/G-RUN/webpage.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/GFC/gfc.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/GFC/webpage.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/GHSL/ghsl.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/GHSL/webpage.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/GPP_MOD17/gpp_mod17.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/GPP_MOD17/webpage.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/GRACE-REC/grace-rec.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/GRACE-REC/webpage.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/global-multi-layer-soil-moisture.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/webpage.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Global_Tree_Density/webpage.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/HydroSHEDS/hydrosheds.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/HydroSHEDS/webpage.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Hydrography90m/hydrography90m.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Hydrography90m/webpage.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/MRLC_NLCD/datacatalog.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/MRLC_NLCD/webpage.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Shale_Network/shale-network.jsonld +2026-02-16T11:01:33-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Shale_Network/webpage.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/SoilGrids2/soilgrids2.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/SoilGrids2/webpage.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld +2026-02-16T11:16:42-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/TerraClimate/webpage.jsonld +2026-02-16T09:22:43-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/WATERBASE/waterbase.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/WATERBASE/webpage.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Water_Quality_Portal/water-quality-portal.jsonld +2026-02-16T10:46:20-06:00 + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/Water_Quality_Portal/webpage.jsonld +2026-02-16T10:46:20-06:00 + + From 0b6faf5d99d42795d731caac848abdb74e276272 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 16 Feb 2026 11:42:33 -0600 Subject: [PATCH 48/58] site map test try 2 --- .github/workflows/sitemap_resources.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/sitemap_resources.yaml b/.github/workflows/sitemap_resources.yaml index e20e98f..3de3577 100644 --- a/.github/workflows/sitemap_resources.yaml +++ b/.github/workflows/sitemap_resources.yaml @@ -58,8 +58,13 @@ jobs: run: | git config user.name "github-actions[bot]" git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + ls -la data/objects/summoned/sitemap.xml data/objects/summoned/generated/sitemap.xml 2>/dev/null || true git add data/objects/summoned/sitemap.xml data/objects/summoned/generated/sitemap.xml - git diff --staged --quiet || (git commit -m "chore: update JSON-LD sitemaps" && git push) + git status + if ! git diff --staged --quiet; then + git commit -m "chore: update JSON-LD sitemaps" + git push origin HEAD:${{ github.ref_name }} + fi ### WE MIGHT WANT TO DO INDIVIDUAL SITEMAPS # - name: Generate sitemap for all JSON-LD resources From 71ad6f34281050e0192feab0417274ba447f284e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 16 Feb 2026 17:44:51 +0000 Subject: [PATCH 49/58] chore: update JSON-LD sitemaps --- data/objects/summoned/generated/sitemap.xml | 4 ++++ data/objects/summoned/sitemap.xml | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/data/objects/summoned/generated/sitemap.xml b/data/objects/summoned/generated/sitemap.xml index 42cb5a6..7f51946 100644 --- a/data/objects/summoned/generated/sitemap.xml +++ b/data/objects/summoned/generated/sitemap.xml @@ -1,6 +1,10 @@ +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/sitemap.xml +2026-02-16T17:40:23Z + + https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld 2026-02-16T09:22:43-06:00 diff --git a/data/objects/summoned/sitemap.xml b/data/objects/summoned/sitemap.xml index 7205d29..bf2bb30 100644 --- a/data/objects/summoned/sitemap.xml +++ b/data/objects/summoned/sitemap.xml @@ -1,6 +1,14 @@ +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/sitemap.xml +2026-02-16T17:40:23Z + + +https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/sitemap.xml +2026-02-16T17:40:23Z + + https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/glim/1560991f9071fd8c168bf8bff8d5abcdaf7d3370.jsonld 2023-11-09T11:27:19-06:00 From a1cfd039eec2ecde59a178ede72a598d37da72c7 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 16 Feb 2026 11:45:17 -0600 Subject: [PATCH 50/58] Update sitemap workflow Co-authored-by: Cursor --- .github/workflows/sitemap_resources.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/sitemap_resources.yaml b/.github/workflows/sitemap_resources.yaml index 3de3577..ec7a57c 100644 --- a/.github/workflows/sitemap_resources.yaml +++ b/.github/workflows/sitemap_resources.yaml @@ -63,6 +63,8 @@ jobs: git status if ! git diff --staged --quiet; then git commit -m "chore: update JSON-LD sitemaps" + git fetch origin ${{ github.ref_name }} + git rebase origin/${{ github.ref_name }} git push origin HEAD:${{ github.ref_name }} fi From 389962de9af82b4790ef63620f88e761eff93f30 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 16 Feb 2026 17:46:19 +0000 Subject: [PATCH 51/58] chore: update JSON-LD sitemaps --- data/objects/summoned/generated/sitemap.xml | 2 +- data/objects/summoned/sitemap.xml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/data/objects/summoned/generated/sitemap.xml b/data/objects/summoned/generated/sitemap.xml index 7f51946..b6b3d10 100644 --- a/data/objects/summoned/generated/sitemap.xml +++ b/data/objects/summoned/generated/sitemap.xml @@ -2,7 +2,7 @@ https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/sitemap.xml -2026-02-16T17:40:23Z +2026-02-16T17:44:51Z https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld diff --git a/data/objects/summoned/sitemap.xml b/data/objects/summoned/sitemap.xml index bf2bb30..fac96ce 100644 --- a/data/objects/summoned/sitemap.xml +++ b/data/objects/summoned/sitemap.xml @@ -2,11 +2,11 @@ https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/sitemap.xml -2026-02-16T17:40:23Z +2026-02-16T17:44:51Z https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/sitemap.xml -2026-02-16T17:40:23Z +2026-02-16T17:44:51Z https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/glim/1560991f9071fd8c168bf8bff8d5abcdaf7d3370.jsonld From d51bdb183def3a5e18c706287b38b1c31dc02db9 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 16 Feb 2026 11:51:18 -0600 Subject: [PATCH 52/58] changed main to master --- .github/workflows/sitemap_resources.yaml | 1 + README.md | 7 +++---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/sitemap_resources.yaml b/.github/workflows/sitemap_resources.yaml index ec7a57c..0c8b704 100644 --- a/.github/workflows/sitemap_resources.yaml +++ b/.github/workflows/sitemap_resources.yaml @@ -4,6 +4,7 @@ on: push: branches: - main + - master - 3-generate-jsonld-datasets-from-websites permissions: diff --git a/README.md b/README.md index 6a24593..28729ce 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,7 @@ Documentation, files and code related to the exposure of resource on the web for indexing. -Sitemaps: -All in data/object/summoned : https://earthcube.github.io/communityCollections/data/objects/summoned/sitemap.xml +Sitemaps (generated by [sitemap_resources.yaml](.github/workflows/sitemap_resources.yaml) on push to `master` / `main` / feature branches): - -The AI Generated JSON-LD sitemap: https://earthcube.github.io/communityCollections/data/objects/summoned/generated/sitemap.xml \ No newline at end of file +- **All JSON-LD under data/objects/summoned:** [GitHub Pages](https://earthcube.github.io/communityCollections/data/objects/summoned/sitemap.xml) · [Raw (e.g. master)](https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/sitemap.xml) +- **AI-generated JSON-LD only:** [GitHub Pages](https://earthcube.github.io/communityCollections/data/objects/summoned/generated/sitemap.xml) · [Raw (e.g. master)](https://raw.githubusercontent.com/earthcube/communityCollections/master/data/objects/summoned/generated/sitemap.xml) \ No newline at end of file From 3c94014bf844c3dcd690158aa9166ad0b76e4549 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 16 Feb 2026 17:55:58 +0000 Subject: [PATCH 53/58] chore: update JSON-LD sitemaps --- data/objects/summoned/generated/sitemap.xml | 2 +- data/objects/summoned/sitemap.xml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/data/objects/summoned/generated/sitemap.xml b/data/objects/summoned/generated/sitemap.xml index b6b3d10..bdd3a6b 100644 --- a/data/objects/summoned/generated/sitemap.xml +++ b/data/objects/summoned/generated/sitemap.xml @@ -2,7 +2,7 @@ https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/sitemap.xml -2026-02-16T17:44:51Z +2026-02-16T17:46:19Z https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld diff --git a/data/objects/summoned/sitemap.xml b/data/objects/summoned/sitemap.xml index fac96ce..d47df06 100644 --- a/data/objects/summoned/sitemap.xml +++ b/data/objects/summoned/sitemap.xml @@ -2,11 +2,11 @@ https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/sitemap.xml -2026-02-16T17:44:51Z +2026-02-16T17:46:19Z https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/generated/sitemap.xml -2026-02-16T17:44:51Z +2026-02-16T17:46:19Z https://raw.githubusercontent.com/earthcube/communityCollections/refs/heads/3-generate-jsonld-datasets-from-websites/data/objects/summoned/glim/1560991f9071fd8c168bf8bff8d5abcdaf7d3370.jsonld From c46ea20c1b365e2e43f61cd6ec59c1bcac3c7908 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 16 Feb 2026 12:07:20 -0600 Subject: [PATCH 54/58] Removed test branch from the github action --- .github/workflows/sitemap_resources.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/sitemap_resources.yaml b/.github/workflows/sitemap_resources.yaml index 0c8b704..8c13d79 100644 --- a/.github/workflows/sitemap_resources.yaml +++ b/.github/workflows/sitemap_resources.yaml @@ -5,7 +5,6 @@ on: branches: - main - master - - 3-generate-jsonld-datasets-from-websites permissions: contents: write From 8bedbd42ae872296466c5320a9a6a4ca0313f863 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 16 Feb 2026 12:15:38 -0600 Subject: [PATCH 55/58] Remove prompt.txt files from generated folders --- .../summoned/generated/CHELSA/prompt.txt | 75 ----------- .../generated/Consensus_Land_Cover/prompt.txt | 117 ------------------ .../summoned/generated/FLO1K/prompt.txt | 53 -------- .../summoned/generated/G-RUN/prompt.txt | 53 -------- .../objects/summoned/generated/GFC/prompt.txt | 80 ------------ .../summoned/generated/GHSL/prompt.txt | 53 -------- .../summoned/generated/GPP_MOD17/prompt.txt | 98 --------------- .../summoned/generated/GRACE-REC/prompt.txt | 53 -------- .../prompt.txt | 53 -------- .../generated/Global_Tree_Density/prompt.txt | 99 --------------- .../summoned/generated/HydroSHEDS/prompt.txt | 52 -------- .../generated/Hydrography90m/prompt.txt | 53 -------- .../summoned/generated/MERIT_DEM/prompt.txt | 59 --------- .../summoned/generated/MRLC_NLCD/prompt.txt | 97 --------------- .../generated/Shale_Network/prompt.txt | 53 -------- .../summoned/generated/SoilGrids2/prompt.txt | 53 -------- .../generated/TerraClimate/prompt.txt | 98 --------------- .../summoned/generated/WATERBASE/prompt.txt | 53 -------- .../generated/Water_Quality_Portal/prompt.txt | 51 -------- 19 files changed, 1303 deletions(-) delete mode 100644 data/objects/summoned/generated/CHELSA/prompt.txt delete mode 100644 data/objects/summoned/generated/Consensus_Land_Cover/prompt.txt delete mode 100644 data/objects/summoned/generated/FLO1K/prompt.txt delete mode 100644 data/objects/summoned/generated/G-RUN/prompt.txt delete mode 100644 data/objects/summoned/generated/GFC/prompt.txt delete mode 100644 data/objects/summoned/generated/GHSL/prompt.txt delete mode 100644 data/objects/summoned/generated/GPP_MOD17/prompt.txt delete mode 100644 data/objects/summoned/generated/GRACE-REC/prompt.txt delete mode 100644 data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/prompt.txt delete mode 100644 data/objects/summoned/generated/Global_Tree_Density/prompt.txt delete mode 100644 data/objects/summoned/generated/HydroSHEDS/prompt.txt delete mode 100644 data/objects/summoned/generated/Hydrography90m/prompt.txt delete mode 100644 data/objects/summoned/generated/MERIT_DEM/prompt.txt delete mode 100644 data/objects/summoned/generated/MRLC_NLCD/prompt.txt delete mode 100644 data/objects/summoned/generated/Shale_Network/prompt.txt delete mode 100644 data/objects/summoned/generated/SoilGrids2/prompt.txt delete mode 100644 data/objects/summoned/generated/TerraClimate/prompt.txt delete mode 100644 data/objects/summoned/generated/WATERBASE/prompt.txt delete mode 100644 data/objects/summoned/generated/Water_Quality_Portal/prompt.txt diff --git a/data/objects/summoned/generated/CHELSA/prompt.txt b/data/objects/summoned/generated/CHELSA/prompt.txt deleted file mode 100644 index 72795ed..0000000 --- a/data/objects/summoned/generated/CHELSA/prompt.txt +++ /dev/null @@ -1,75 +0,0 @@ -I need to create Schema.org JSON-LD descriptions for a scientific climate data catalog and its datasets. - -**Website URL**: https://www.chelsa-climate.org/datasets -**Alternative URL**: https://chelsa-climate.org/downloads/ - -**Catalog Information**: -- Name: CHELSA (Climatologies at high resolution for the earth's land surface areas) -- Group/Category: climate -- Description: CHELSA is a global, kilometer-scale climate dataset generated with the CHELSA downscaling model. It provides high-resolution climatological data for the earth's land surface areas, including daily, monthly, and climatological means, as well as bioclimatic variables and drought indices. - -**Key Details**: -- Publisher: WSL (Swiss Federal Institute for Forest, Snow and Landscape Research) -- Website: https://www.chelsa-climate.org/ -- Coverage: Global, Europe, Switzerland, Canary Islands -- Temporal Coverage: Past (paleoclimate), Present (1979-present), Future (climate scenarios) -- Resolution: Kilometer-scale (~1km) -- Multiple datasets available with different temporal frequencies and variables - -**Available Datasets** (from the catalog): -1. CHELSA-daily (V2.1) - Daily surface variables, 1979-01-01 to 2025-08-29 -2. CHELSA-monthly (V2.1) - Monthly aggregated variables, 1979-01-15 to 2021-12-15 -3. CHELSA-drought-indices (V2.1) - SPI and SPEI indices, 1980-06-15 to 2018-07-15 -4. CHELSA-climatologies (V2.1) - Long-term climatological means, 1981-2010 to 2071-2100 -5. CHELSA-bioclim (V2.1) - Bioclimatic variables, 1981-2010 to 1971-2100 -6. CHELSAch-highres (Switzerland) - High resolution for Switzerland -7. CHELSACanaryClim (Canary Islands) - Very high resolution for Canary Islands -8. CHELSA-TraCE21k - Paleoclimate data, 21k BP to 0 BP -9. CHELSA-W5E5-daily (V2.0) - Daily data, 1979-2016 -10. CHELSAcerra-daily (Europe) - High resolution for Europe, 1985-2015 - -**Variables Available**: -- Temperature (mean, min, max, diurnal range, seasonality) -- Precipitation (annual, monthly, seasonal) -- Bioclimatic variables -- Drought indices (SPI, SPEI) -- Cloud cover, humidity, wind speed -- Growing degree days, growing season metrics -- Köppen-Geiger climate classification -- And many more... - -**Instructions for Step 1.0 (WebPage Description)**: -Please create a Schema.org WebPage JSON-LD that includes: -- @context: "https://schema.org/" -- @type: "WebPage" -- @id: The catalog webpage URL -- name: Full catalog name -- description: Comprehensive description of the CHELSA climate data catalog -- url: Main catalog webpage URL -- publisher: WSL (Swiss Federal Institute for Forest, Snow and Landscape Research) -- inLanguage: "en" -- isPartOf: WebSite information -- about: Reference to the catalog as a DataCatalog -- keywords: Relevant keywords (climate, climatology, CHELSA, high resolution, downscaling, etc.) -- mainEntity: Reference to the DataCatalog - -**Instructions for Dataset JSON-LD (to be created separately)**: -For each major dataset, create a Schema.org Dataset JSON-LD that includes: -- @context, @type, @id, name, description, url -- creator/publisher: WSL -- temporalCoverage: Time period covered -- spatialCoverage: Geographic coverage (global, regional, etc.) -- variableMeasured: List of climate variables -- distribution: Download links and formats -- encodingFormat: Data formats (likely GeoTIFF, NetCDF, etc.) -- version: Dataset version -- license: License information (if available) - -**Output Format**: -Provide the complete JSON-LD in a code block, properly formatted and valid JSON. - -**Important Notes**: -- This is a data catalog with multiple datasets -- Each dataset should be described separately -- The catalog page describes the overall project and provides access to multiple datasets -- Include all relevant metadata from the website diff --git a/data/objects/summoned/generated/Consensus_Land_Cover/prompt.txt b/data/objects/summoned/generated/Consensus_Land_Cover/prompt.txt deleted file mode 100644 index bc0d88f..0000000 --- a/data/objects/summoned/generated/Consensus_Land_Cover/prompt.txt +++ /dev/null @@ -1,117 +0,0 @@ -I need to create Schema.org JSON-LD descriptions for a scientific land cover dataset. - -**Website URL**: https://www.earthenv.org/landcover - -**Dataset Information**: -- Name: Consensus Land Cover (Global 1-km Consensus Land Cover) -- Group/Category: land_cover -- Description: A global 1-km consensus land-cover product for biodiversity and ecosystem modelling. The dataset integrates multiple global remote sensing-derived land-cover products and provides consensus information on the prevalence of 12 land-cover classes at 1-km resolution. -- Website: https://www.earthenv.org/landcover -- Coverage: Global (90°N to 56°S, 180°W to 180°E) -- Spatial Resolution: 30 arc-second (~1 km per pixel at the equator) -- License: Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) - -**Key Details**: -- Publisher: EarthEnv (Yale University, University of Florida, University of Buffalo) -- Creator: Mao-Ning Tuanmu, Walter Jetz -- Website: https://www.earthenv.org/landcover -- Coverage: Global (90°N to 56°S, 180°W to 180°E) -- Spatial Resolution: 30 arc-second (~1 km at equator) -- Temporal Coverage: Based on source products (2005-2006 for GlobCover, 1992-1993 for DISCover) -- License: CC BY-NC 4.0 - -**Dataset Versions**: -1. **Full Version 1.0** (with DISCover) - - Integrates: GlobCover (2005-06; v2.2), MODIS land-cover product (MCD12Q1; v051), GLC2000 (global product; v1.1), and DISCover (GLCC; v2) - - Recommended for most applications - -2. **Reduced Version 1.0** (without DISCover) - - Integrates: GlobCover, MODIS, GLC2000 (first three products only) - - Alternative for applications in regions with large land cover change in the past two decades - -**12 Land Cover Classes**: -1. Evergreen/Deciduous Needleleaf Trees -2. Evergreen Broadleaf Trees -3. Deciduous Broadleaf Trees -4. Mixed/Other Trees -5. Shrubs -6. Herbaceous Vegetation -7. Cultivated and Managed Vegetation -8. Regularly Flooded Vegetation -9. Urban/Built-up -10. Snow/Ice -11. Barren -12. Open Water - -**Data Format and Structure**: -- Each dataset version contains 12 data layers (one per land-cover class) -- All data layers contain unsigned 8-bit values (0-100, representing consensus prevalence in percentage) -- Format: GeoTIFF (20-100MB per class file) -- Spatial extent: 90°N to 56°S, 180°W to 180°E -- Spatial resolution: 30 arc-second per pixel (~1 km per pixel at the equator) - -**Methods**: -- Integrates multiple global remote sensing-derived land-cover products -- Provides consensus information on land-cover class prevalence -- Uses percentage values (0-100) to represent consensus prevalence - -**Data Access**: -- GeoTIFF format downloads for individual land-cover classes -- Full Version downloads: https://data.earthenv.org/consensus_landcover/with_DISCover/ -- Reduced Version downloads: https://data.earthenv.org/consensus_landcover/without_DISCover/ -- Each class available as separate GeoTIFF file (20-100MB each) - -**Citation**: -Tuanmu, M.-N. and W. Jetz. 2014. A global 1-km consensus land-cover product for biodiversity and ecosystem modeling. Global Ecology and Biogeography 23(9): 1031-1045. Data available on-line at http://www.earthenv.org/. - -**Instructions for Step 1.0 (WebPage Description)**: -Please create a Schema.org WebPage JSON-LD that describes the webpage itself. This should include: - -Required fields: -- @context: "https://schema.org/" -- @type: "WebPage" -- @id: The webpage URL with fragment identifier (e.g., "https://www.earthenv.org/landcover#webpage") -- name: "Global 1-km Consensus Land Cover" or similar webpage title -- description: Comprehensive description of what the webpage is about (the dataset and its purpose) -- url: Main webpage URL (https://www.earthenv.org/landcover) -- inLanguage: "en" - -Recommended fields: -- isPartOf: WebSite object with name "EarthEnv" and url "https://www.earthenv.org/" -- publisher: Organization object for EarthEnv, including member organizations (Yale University, University of Florida, University at Buffalo) -- about: Reference to the Dataset using @id (e.g., {"@type": "Dataset", "@id": "https://www.earthenv.org/landcover#dataset"}) -- mainEntity: Reference to the Dataset (same as about) -- keywords: Array of relevant keywords (land cover, consensus, remote sensing, biodiversity, ecosystem modeling, GeoTIFF, GlobCover, MODIS, GLC2000, DISCover, etc.) -- breadcrumb: BreadcrumbList with EarthEnv as parent and the landcover page as current - -The WebPage JSON-LD should describe the webpage that hosts information about the dataset, not the dataset itself. It should reference the dataset via @id links. - -**Instructions for Dataset JSON-LD**: -Please create a Schema.org Dataset JSON-LD that includes: -- @context, @type, @id, name, description, url -- creator: Mao-Ning Tuanmu, Walter Jetz -- publisher: EarthEnv / Yale University -- temporalCoverage: Based on source products (mention the range) -- spatialCoverage: Global (90°N to 56°S, 180°W to 180°E) - use bounding box format "west,south east,north" -- variableMeasured: List all 12 land cover classes -- distribution: Multiple DataDownload entries for: - - Full Version downloads (with DISCover) - - Reduced Version downloads (without DISCover) - - Individual class downloads -- encodingFormat: GeoTIFF -- version: Version 1.0 (mention both Full and Reduced versions) -- license: CC BY-NC 4.0 -- citation: Scientific publication citation -- measurementTechnique: Integration of multiple global remote sensing-derived land-cover products -- about: Land cover, biodiversity, ecosystem modeling -- spatialResolution: 30 arc-second (~1 km at equator) - -**Output Format**: -Provide the complete JSON-LD in a code block, properly formatted and valid JSON. - -**Important Notes**: -- This is a single comprehensive dataset (not a catalog) -- Two versions (Full and Reduced) are variants of the same product, not separate datasets -- 12 land cover classes are components of the dataset, not separate datasets -- All classes are available as separate GeoTIFF files -- Bounding box format: "-180,-56 180,90" (west,south east,north) diff --git a/data/objects/summoned/generated/FLO1K/prompt.txt b/data/objects/summoned/generated/FLO1K/prompt.txt deleted file mode 100644 index 17cf7b4..0000000 --- a/data/objects/summoned/generated/FLO1K/prompt.txt +++ /dev/null @@ -1,53 +0,0 @@ -You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. - -**Task**: Create a valid JSON-LD document for the following dataset. - -**Dataset Information**: -- Name: FLO1K -- URL: https://figshare.com/collections/FLO1K_global_maps_of_mean_maximum_and_minimum_annual_streamflow_at_1_km_resolution_from_1960_through_2015/3890224 -- Description: Global mean, maximum and minimum annual streamflow at 1 km resolution (1960–2015). -- Group/Category: hydrology -- Creator: -- Provider: -- Publisher: -- Keywords: -- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") -- Extracted Metadata: Hosted on Figshare (collection). FLO1K provides global maps of mean, maximum, and minimum annual streamflow at 1 km resolution from 1960 through 2015. Infer creator, citation, temporal coverage, and variables from the Figshare collection page and any linked paper (e.g. DOI or journal article). Include distribution with the Figshare collection URL and DOI if available. - -**Reference Example** (from existing JSON-LD in this project): -{ - "@context": "https://schema.org/", - "@type": "Dataset", - "comment": "This dataset metadata was generated by AI.", - "@id": "https://doi.org/10.6084/m9.figshare.13661312.v1#dataset", - "name": "Global Multi-layer Soil Moisture Products", - "url": "https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312", - "description": "Global multi-layer soil moisture products covering 1970–2016.", - "keywords": ["soil moisture", "global", "Figshare", "hydrology"], - "creator": [{"@type": "Person", "name": "Yaoping Wang"}, {"@type": "Person", "name": "Jiafu Mao"}], - "publisher": [{"@type": "Organization", "name": "Figshare", "url": "https://figshare.com/"}], - "temporalCoverage": "1970-01-01/2016-12-31", - "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "20,-40 50,10"}}, - "variableMeasured": [{"@type": "PropertyValue", "name": "Soil moisture", "description": "Multi-layer soil moisture"}], - "encodingFormat": ["application/zip", "application/x-netcdf"], - "license": "https://creativecommons.org/licenses/by/4.0/", - "distribution": [{"@type": "DataDownload", "contentUrl": "https://doi.org/10.6084/m9.figshare.13661312.v1", "encodingFormat": ["text/html"]}] -} - -**Requirements**: -1. Use Schema.org vocabulary (https://schema.org/) -2. Set @context to `{"@vocab": "https://schema.org/"}` -3. Set @type to "Dataset" -4. Include @id with the dataset URL or DOI (e.g. Figshare collection/article URL with #dataset or DOI) -5. Include all available metadata fields -6. Creator/publisher: infer from Figshare page (authors and Figshare as publisher) -7. Include distribution with contentUrl to the Figshare dataset/collection/DOI -8. temporalCoverage: use 1960–2015 from the collection title unless the page states otherwise -9. spatialCoverage: Place with geo GeoShape, box MUST be "west,south east,north" — use "20,-40 50,10" -10. Include license and access information (Figshare often CC-BY) -11. Use proper JSON-LD structure (arrays for multiple values) -12. "keywords" as a JSON array of strings — never semicolon/comma-separated string -13. "encodingFormat" as a JSON array of strings — never semicolon/comma-separated string -14. Add exactly: "comment": "This dataset metadata was generated by AI." - -**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/G-RUN/prompt.txt b/data/objects/summoned/generated/G-RUN/prompt.txt deleted file mode 100644 index a649df3..0000000 --- a/data/objects/summoned/generated/G-RUN/prompt.txt +++ /dev/null @@ -1,53 +0,0 @@ -You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. - -**Task**: Create a valid JSON-LD document for the following dataset. - -**Dataset Information**: -- Name: G-RUN -- URL: https://figshare.com/articles/dataset/GRUN_Global_Runoff_Reconstruction/9228176 -- Description: Global runoff reconstruction (GRUN: Global Runoff Reconstruction). -- Group/Category: hydrology -- Creator: -- Provider: -- Publisher: -- Keywords: -- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") -- Extracted Metadata: Hosted on Figshare. G-RUN / GRUN is a global runoff reconstruction. Infer creator, citation, temporal coverage, and variables from the Figshare page and any linked paper (e.g. DOI or journal article). Include distribution with the Figshare article URL and DOI if available. - -**Reference Example** (from existing JSON-LD in this project): -{ - "@context": "https://schema.org/", - "@type": "Dataset", - "comment": "This dataset metadata was generated by AI.", - "@id": "https://doi.org/10.6084/m9.figshare.13661312.v1#dataset", - "name": "Global Multi-layer Soil Moisture Products", - "url": "https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312", - "description": "Global multi-layer soil moisture products covering 1970–2016.", - "keywords": ["soil moisture", "global", "Figshare", "hydrology"], - "creator": [{"@type": "Person", "name": "Yaoping Wang"}, {"@type": "Person", "name": "Jiafu Mao"}], - "publisher": [{"@type": "Organization", "name": "Figshare", "url": "https://figshare.com/"}], - "temporalCoverage": "1970-01-01/2016-12-31", - "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "20,-40 50,10"}}, - "variableMeasured": [{"@type": "PropertyValue", "name": "Soil moisture", "description": "Multi-layer soil moisture"}], - "encodingFormat": ["application/zip", "application/x-netcdf"], - "license": "https://creativecommons.org/licenses/by/4.0/", - "distribution": [{"@type": "DataDownload", "contentUrl": "https://doi.org/10.6084/m9.figshare.13661312.v1", "encodingFormat": ["text/html"]}] -} - -**Requirements**: -1. Use Schema.org vocabulary (https://schema.org/) -2. Set @context to `{"@vocab": "https://schema.org/"}` -3. Set @type to "Dataset" -4. Include @id with the dataset URL or DOI (e.g. Figshare article URL with #dataset or DOI) -5. Include all available metadata fields -6. Creator/publisher: infer from Figshare page (authors and Figshare as publisher) -7. Include distribution with contentUrl to the Figshare dataset/DOI -8. temporalCoverage: infer from title/description (e.g. reconstruction period) -9. spatialCoverage: Place with geo GeoShape, box MUST be "west,south east,north" — use "20,-40 50,10" -10. Include license and access information (Figshare often CC-BY) -11. Use proper JSON-LD structure (arrays for multiple values) -12. "keywords" as a JSON array of strings — never semicolon/comma-separated string -13. "encodingFormat" as a JSON array of strings — never semicolon/comma-separated string -14. Add exactly: "comment": "This dataset metadata was generated by AI." - -**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/GFC/prompt.txt b/data/objects/summoned/generated/GFC/prompt.txt deleted file mode 100644 index 4c679e4..0000000 --- a/data/objects/summoned/generated/GFC/prompt.txt +++ /dev/null @@ -1,80 +0,0 @@ -I need to create Schema.org JSON-LD descriptions for a scientific forest monitoring dataset. - -**Website URL**: https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/download.html - -**Dataset Information**: -- Name: GFC (Global Forest Change) -- Group/Category: land_cover -- Description: Global forest extent and change -- Website: https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/download.html -- Version: GFC-2023-v1.11 (suggests 2023 data, version 1.11) -- Organization: Hansen Global Forest Change (Google Earth Engine Partners) - -**Expected Details** (Hansen Global Forest Change typically includes): -- Creator: Matthew C. Hansen and collaborators (University of Maryland, Google) -- Publisher: Google Earth Engine Partners / University of Maryland -- Coverage: Global -- Temporal Coverage: Typically 2000-2023 (or based on version number) -- Variables: - - Tree cover extent (baseline, typically year 2000) - - Forest loss (yearly from 2000 onward) - - Forest gain (2000-2012 or similar period) - - Tree cover loss year -- Format: GeoTIFF, typically available via Google Earth Engine or direct downloads -- Spatial Resolution: Typically 30m (Landsat-based) - -**Note**: The URL appears to be a Google Cloud Storage download page. Please browse/analyze the actual webpage to verify: -- Available data products/layers -- Temporal coverage -- Spatial coverage and resolution -- File formats -- License information -- Download methods -- Citation information - -**Instructions for Step 1.0 (WebPage Description)**: -Please create a Schema.org WebPage JSON-LD that describes the webpage itself. This should include: - -Required fields: -- @context: "https://schema.org/" -- @type: "WebPage" -- @id: The webpage URL with fragment identifier (e.g., "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/download.html#webpage") -- name: "Global Forest Change" or similar webpage title -- description: Comprehensive description of what the webpage is about (the dataset and its purpose) -- url: Main webpage URL -- inLanguage: "en" - -Recommended fields: -- isPartOf: WebSite object if applicable -- publisher: Organization object (Google Earth Engine Partners, University of Maryland, etc.) -- about: Reference to the Dataset using @id -- mainEntity: Reference to the Dataset (same as about) -- keywords: Array of relevant keywords (forest, deforestation, forest loss, tree cover, global, remote sensing, Landsat, etc.) - -The WebPage JSON-LD should describe the webpage that hosts information about the dataset, not the dataset itself. It should reference the dataset via @id links. - -**Instructions for Dataset JSON-LD**: -Please create a Schema.org Dataset JSON-LD that includes: -- @context, @type, @id, name, description, url -- creator: Matthew C. Hansen and collaborators (verify from website) -- publisher: Google Earth Engine Partners / University of Maryland (verify from website) -- temporalCoverage: Based on dataset version (likely 2000-2023 or similar) -- spatialCoverage: Global - use bounding box format "west,south east,north" (likely "-180,-90 180,90" for global) -- variableMeasured: List data layers/variables (tree cover, forest loss, forest gain, etc.) -- distribution: Multiple DataDownload entries for available download options -- encodingFormat: GeoTIFF or other formats available -- version: GFC-2023-v1.11 (or verify from website) -- license: Verify from website -- citation: Scientific publication citation (Hansen et al., verify from website) -- measurementTechnique: Remote sensing using Landsat imagery, change detection methods -- about: Forest monitoring, deforestation, forest change, global mapping -- spatialResolution: Typically 30m (Landsat-based, verify from website) - -**Output Format**: -Provide the complete JSON-LD in a code block, properly formatted and valid JSON. - -**Important Notes**: -- This appears to be a single comprehensive dataset (not a catalog) -- Multiple data layers (tree cover, loss, gain) are components of the same dataset -- Bounding box format: "-180,-90 180,90" (west,south east,north) for global coverage -- Verify all details from the actual website as this is a Google Cloud Storage page diff --git a/data/objects/summoned/generated/GHSL/prompt.txt b/data/objects/summoned/generated/GHSL/prompt.txt deleted file mode 100644 index ad6fc45..0000000 --- a/data/objects/summoned/generated/GHSL/prompt.txt +++ /dev/null @@ -1,53 +0,0 @@ -You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. - -**Task**: Create a valid JSON-LD document for the following dataset. - -**Dataset Information**: -- Name: GHSL (Global Human Settlement Layer) -- URL: https://human-settlement.emergency.copernicus.eu/download.php -- Description: Built-up areas, population and settlements (global human settlement layer products). -- Group/Category: human -- Creator: -- Provider: -- Publisher: -- Keywords: -- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") -- Extracted Metadata: Hosted on Copernicus Emergency Management Service (human settlement). GHSL provides global data on built-up areas, population distribution, and human settlements at multiple resolutions and time periods. Infer creator (e.g. European Commission Joint Research Centre), publisher, temporal coverage, variables, and distribution from the download page and any linked documentation. Include distribution with the download page URL and any DOI or data access URLs if available. - -**Reference Example** (from existing JSON-LD in this project): -{ - "@context": "https://schema.org/", - "@type": "Dataset", - "comment": "This dataset metadata was generated by AI.", - "@id": "https://doi.org/10.6084/m9.figshare.13661312.v1#dataset", - "name": "Global Multi-layer Soil Moisture Products", - "url": "https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312", - "description": "Global multi-layer soil moisture products covering 1970–2016.", - "keywords": ["soil moisture", "global", "Figshare", "hydrology"], - "creator": [{"@type": "Person", "name": "Yaoping Wang"}, {"@type": "Organization", "name": "Oak Ridge National Laboratory"}], - "publisher": [{"@type": "Organization", "name": "Figshare", "url": "https://figshare.com/"}], - "temporalCoverage": "1970-01-01/2016-12-31", - "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "20,-40 50,10"}}, - "variableMeasured": [{"@type": "PropertyValue", "name": "Soil moisture", "description": "Multi-layer soil moisture"}], - "encodingFormat": ["application/zip", "application/x-netcdf"], - "license": "https://creativecommons.org/licenses/by/4.0/", - "distribution": [{"@type": "DataDownload", "contentUrl": "https://doi.org/10.6084/m9.figshare.13661312.v1", "encodingFormat": ["text/html"]}] -} - -**Requirements**: -1. Use Schema.org vocabulary (https://schema.org/) -2. Set @context to `{"@vocab": "https://schema.org/"}` -3. Set @type to "Dataset" -4. Include @id with the dataset URL or a stable identifier (e.g. the download page URL with #dataset) -5. Include all available metadata fields -6. Creator/publisher: infer from the Copernicus/JRC site (e.g. European Commission Joint Research Centre, Copernicus) -7. Include distribution with contentUrl to the download page or data access URL; use encodingFormat as a JSON array (e.g. ["text/html"] or list file formats if known) -8. temporalCoverage: infer from the site (GHSL has multi-epoch data, e.g. 1975, 1990, 2000, 2015) -9. spatialCoverage: Place with geo GeoShape, box MUST be "west,south east,north" — use "20,-40 50,10" -10. Include license if stated (Copernicus data often free use with attribution) -11. Use proper JSON-LD structure (arrays for multiple values) -12. "keywords" as a JSON array of strings — never semicolon/comma-separated string -13. "encodingFormat" as a JSON array of strings — never semicolon/comma-separated string -14. Add exactly: "comment": "This dataset metadata was generated by AI." - -**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/GPP_MOD17/prompt.txt b/data/objects/summoned/generated/GPP_MOD17/prompt.txt deleted file mode 100644 index 79be060..0000000 --- a/data/objects/summoned/generated/GPP_MOD17/prompt.txt +++ /dev/null @@ -1,98 +0,0 @@ -You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. - -**Task**: Create a valid JSON-LD document for the following dataset. - -**Dataset Information**: -- Name: GPP MOD17 -- URL: http://files.ntsg.umt.edu/data/NTSG_Products/MOD17/ -- Description: Global gross primary production and net primary production derived from MOD17 -- Group/Category: ecosystem -- Creator: -- Provider: -- Publisher: -- Keywords: -- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") -- Extracted Metadata: NTSG (Numerical Terradynamic Simulation Group), University of Montana. MOD17 products: GPP, NPP. Data available via file listing at the URL above. Infer distribution and variable info from typical MOD17 product structure if needed. - -**Reference Example** (from existing JSON-LD in this project): -{ - "@context": { - "@vocab": "https://schema.org/" - }, - "@id": "https://doi.org/10.1594/PANGAEA.879543", - "@type": "Dataset", - "identifier": "https://doi.org/10.1594/PANGAEA.879543", - "url": "https://doi.pangaea.de/10.1594/PANGAEA.879543", - "creator": [ - { - "@id": "https://orcid.org/0000-0002-7468-2409", - "@type": "Person", - "name": "Yao Zhang", - "familyName": "Zhang", - "givenName": "Yao", - "identifier": "https://orcid.org/0000-0002-7468-2409", - "email": "yaozhang@lbl.gov" - }, - { - "@type": "Person", - "name": "Xiangming Xiao", - "familyName": "Xiao", - "givenName": "Xiangming", - "email": "xiangming.xiao@ou.edu" - } - ], - "name": "(Table 3) Continental and global total gross primary production of carbon for the years 2000-2016", - "publisher": { - "@type": "Organization", - "name": "PANGAEA", - "disambiguatingDescription": "Data Publisher for Earth & Environmental Science", - "url": "https://www.pangaea.de/" - }, - "includedInDataCatalog": { - "@type": "DataCatalog", - "name": "PANGAEA", - "url": "https://www.pangaea.de/" - }, - "datePublished": "2017-08-11", - "description": "Continental and global total gross primary production of carbon for the years 2000-2016.", - "keywords": ["GPP", "gross primary production", "NPP", "MODIS", "carbon"], - "encodingFormat": ["application/netcdf", "GeoTIFF"], - "spatialCoverage": { - "@type": "Place", - "geo": { - "@type": "GeoShape", - "box": "-180,-60 180,90" - } - }, - "temporalCoverage": "2000-01-01/2016-12-31", - "license": "https://creativecommons.org/licenses/by/4.0/", - "distribution": [ - { - "@type": "DataDownload", - "encodingFormat": "application/netcdf", - "contentUrl": "https://example.org/data" - } - ] -} - -**Requirements**: -1. Use Schema.org vocabulary (https://schema.org/) -2. Set @context to `{"@vocab": "https://schema.org/"}` -3. Set @type to "Dataset" -4. Include @id with the dataset URL or identifier (e.g. http://files.ntsg.umt.edu/data/NTSG_Products/MOD17/#dataset) -5. Include all available metadata fields -6. For creators, use Person or Organization types with proper structure (e.g. NTSG / University of Montana if known) -7. Include distribution information if download links are available (the URL is a directory listing; you may reference the base URL or typical MOD17 product formats) -8. Add temporalCoverage if time period is known (format: "YYYY-MM-DD/YYYY-MM-DD") — MOD17 has multi-year products -9. Add spatialCoverage if geographic bounds are provided: - - Use Place with geo containing GeoShape - - The box format MUST be: "west,south east,north" (comma-separated pairs, space between pairs) - - For this dataset use: "20,-40 50,10" (NOT "20 -40 50 10") - - Format: {"@type": "Place", "geo": {"@type": "GeoShape", "box": "20,-40 50,10"}} -10. Include license and access information -11. Use proper JSON-LD structure (arrays for multiple values, nested objects where appropriate) -12. Set "keywords" as a JSON array of strings, e.g. "keywords": ["keyword1", "keyword2", "keyword3"] — never a single semicolon- or comma-separated string -13. Set "encodingFormat" as a JSON array of strings when listing multiple formats, e.g. "encodingFormat": ["image/tiff", "application/geotiff"] — never a single semicolon- or comma-separated string -14. Add this exact comment (for AI-generated disclosure): "comment": "This dataset metadata was generated by AI." - -**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/GRACE-REC/prompt.txt b/data/objects/summoned/generated/GRACE-REC/prompt.txt deleted file mode 100644 index be945c6..0000000 --- a/data/objects/summoned/generated/GRACE-REC/prompt.txt +++ /dev/null @@ -1,53 +0,0 @@ -You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. - -**Task**: Create a valid JSON-LD document for the following dataset. - -**Dataset Information**: -- Name: GRACE-REC -- URL: https://figshare.com/articles/dataset/GRACE-REC_A_reconstruction_of_climate-driven_water_storage_changes_over_the_last_century/7670849 -- Description: Terrestrial water storage (reconstruction of climate-driven water storage changes over the last century; GRACE-REC). -- Group/Category: hydrology -- Creator: -- Provider: -- Publisher: -- Keywords: -- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") -- Extracted Metadata: Hosted on Figshare. GRACE-REC is a reconstruction of terrestrial water storage changes (e.g. from GRACE and related data) over the last century. Infer creator, citation, temporal coverage, and variables from the Figshare page and any linked paper (e.g. DOI or journal article). Include distribution with the Figshare article URL and DOI if available. - -**Reference Example** (from existing JSON-LD in this project): -{ - "@context": "https://schema.org/", - "@type": "Dataset", - "comment": "This dataset metadata was generated by AI.", - "@id": "https://doi.org/10.6084/m9.figshare.13661312.v1#dataset", - "name": "Global Multi-layer Soil Moisture Products", - "url": "https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312", - "description": "Global multi-layer soil moisture products covering 1970–2016.", - "keywords": ["soil moisture", "global", "Figshare", "hydrology"], - "creator": [{"@type": "Person", "name": "Yaoping Wang"}, {"@type": "Person", "name": "Jiafu Mao"}], - "publisher": [{"@type": "Organization", "name": "Figshare", "url": "https://figshare.com/"}], - "temporalCoverage": "1970-01-01/2016-12-31", - "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "20,-40 50,10"}}, - "variableMeasured": [{"@type": "PropertyValue", "name": "Soil moisture", "description": "Multi-layer soil moisture"}], - "encodingFormat": ["application/zip", "application/x-netcdf"], - "license": "https://creativecommons.org/licenses/by/4.0/", - "distribution": [{"@type": "DataDownload", "contentUrl": "https://doi.org/10.6084/m9.figshare.13661312.v1", "encodingFormat": ["text/html"]}] -} - -**Requirements**: -1. Use Schema.org vocabulary (https://schema.org/) -2. Set @context to `{"@vocab": "https://schema.org/"}` -3. Set @type to "Dataset" -4. Include @id with the dataset URL or DOI (e.g. Figshare article URL with #dataset or DOI) -5. Include all available metadata fields -6. Creator/publisher: infer from Figshare page (authors and Figshare as publisher) -7. Include distribution with contentUrl to the Figshare dataset/DOI -8. temporalCoverage: infer from title/description (e.g. "last century" → ~1900–2000 or as stated) -9. spatialCoverage: Place with geo GeoShape, box MUST be "west,south east,north" — use "20,-40 50,10" -10. Include license and access information (Figshare often CC-BY) -11. Use proper JSON-LD structure (arrays for multiple values) -12. "keywords" as a JSON array of strings — never semicolon/comma-separated string -13. "encodingFormat" as a JSON array of strings — never semicolon/comma-separated string -14. Add exactly: "comment": "This dataset metadata was generated by AI." - -**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/prompt.txt b/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/prompt.txt deleted file mode 100644 index d7277b6..0000000 --- a/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/prompt.txt +++ /dev/null @@ -1,53 +0,0 @@ -You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. - -**Task**: Create a valid JSON-LD document for the following dataset. - -**Dataset Information**: -- Name: Global Multi-layer Soil Moisture -- URL: https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312/1?file=26220602 -- Description: Soil moisture datasets that cover the globe and the time period 1970–2016, at a spatial resolution of 0.5 degrees, time step of monthly, and vertical resolution of four soil layers (0-10cm, 10-30cm, 30-50cm, 50-100cm). -- Group/Category: soil -- Creator: -- Provider: -- Publisher: -- Keywords: -- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") -- Extracted Metadata: Hosted on Figshare. Global multi-layer soil moisture products; 0.5° resolution, monthly, four depth layers. Infer creator/publisher from Figshare and any paper or project cited on the Figshare page (e.g. GLDAS, reanalysis, or research group). Include variableMeasured for the four soil layers and temporalCoverage 1970–2016. - -**Reference Example** (from existing JSON-LD in this project): -{ - "@context": "https://schema.org/", - "@type": "Dataset", - "comment": "This dataset metadata was generated by AI.", - "@id": "https://soilgrids.org/#dataset", - "name": "SoilGrids2 (SoilGrids 2.0 global soil property maps)", - "url": "https://soilgrids.org/", - "description": "SoilGrids2 provides global gridded soil property maps at approximately 250 m spatial resolution.", - "keywords": ["SoilGrids2", "ISRIC", "global soil maps", "soil moisture", "soil properties"], - "creator": [{"@type": "Organization", "name": "ISRIC", "url": "https://www.isric.org/"}], - "publisher": [{"@type": "Organization", "name": "ISRIC", "url": "https://www.isric.org/"}], - "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "20,-40 50,10"}}, - "temporalCoverage": "1970-01-01/2016-12-31", - "variableMeasured": [{"@type": "PropertyValue", "name": "Soil moisture", "description": "Multi-layer soil moisture"}], - "encodingFormat": ["image/tiff", "application/zip"], - "license": "https://creativecommons.org/licenses/by/4.0/", - "distribution": [{"@type": "DataDownload", "contentUrl": "https://figshare.com/", "encodingFormat": ["application/zip"]}] -} - -**Requirements**: -1. Use Schema.org vocabulary (https://schema.org/) -2. Set @context to `{"@vocab": "https://schema.org/"}` -3. Set @type to "Dataset" -4. Include @id with the dataset URL or identifier (e.g. the Figshare article URL with #dataset) -5. Include all available metadata fields -6. For creator/publisher use Organization (Figshare and/or the data producers if known from the page) -7. Include distribution with contentUrl pointing to the Figshare dataset/download -8. temporalCoverage: use "1970-01-01/2016-12-31" (or as stated on Figshare) -9. spatialCoverage: Place with geo GeoShape, box MUST be "west,south east,north" — use "20,-40 50,10" -10. Include license and access information (Figshare often uses CC-BY) -11. Use proper JSON-LD structure (arrays for multiple values) -12. "keywords" as a JSON array of strings — never semicolon/comma-separated string -13. "encodingFormat" as a JSON array of strings — never semicolon/comma-separated string -14. Add exactly: "comment": "This dataset metadata was generated by AI." - -**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/Global_Tree_Density/prompt.txt b/data/objects/summoned/generated/Global_Tree_Density/prompt.txt deleted file mode 100644 index afb1ffa..0000000 --- a/data/objects/summoned/generated/Global_Tree_Density/prompt.txt +++ /dev/null @@ -1,99 +0,0 @@ -I need to create Schema.org JSON-LD descriptions for a scientific tree density dataset. - -**Website URL**: https://elischolar.library.yale.edu/yale_fes_data/1/ - -**Dataset Information**: -- Name: Global Tree Density (Global tree density map) -- Group/Category: land_cover -- Description: Tree density at a global scale. Two global maps (raster files) of tree density highlighting how the number of trees varies across the world. One map generated using biome-level models and applied at the biome scale. The other map generated using ecoregion-level models and applied at the ecoregion scale. -- Website: https://elischolar.library.yale.edu/yale_fes_data/1/ -- Coverage: Global -- License: Creative Commons Attribution-No Derivative Works 4.0 International (CC BY-ND 4.0) - -**Key Details**: -- Publisher: EliScholar / Yale School of the Environment -- Creator: T. W. Crowther (lead author) and many co-authors from Yale University and other institutions -- Publication Date: 2015-09-02 -- Coverage: Global dataset -- License: CC BY-ND 4.0 - -**Two Models**: -1. **Biome-level model** - tree density estimates applied at the biome scale (featured more prominently in publication) -2. **Ecoregion-level model** - tree density estimates applied at the ecoregion scale - -**Methodology**: -- Collected over 420,000 ground-source estimates of tree density from around the world -- Constructed linear regression models using vegetative, climatic, topographic, and anthropogenic variables -- Modeling done in R, mapping done in R and ArcGIS 10.1 -- Estimates are more robust at country-scale (or larger) than individual pixel-level -- Transitions between biomes/ecoregions may be unrealistically harsh, but large-scale estimates are robust - -**File Formats**: -- Primary: ArcGIS File Geodatabase (.gdb) containing both models -- Additional: Revision 01 (small islands), GeoTIFF versions in WGS84 -- Coordinate system: Goode Homolosine interrupted projected coordinate system (original) -- For visualization: Needs reprojection to Eckert III projected coordinate system - -**Distribution**: -- Primary download: ZIP file containing ArcGIS .gdb files, layer files (.lyr), and map document (.mxd) -- Revision 01: Contains tree density predictions for small islands not in primary download -- GeoTIFF version: WGS84 GeoTIFF format (reprojected from Goode Homolosine) - -**Citation**: -Crowther, T. W., Glick, H. B., Covey, K. R., et al. (2015). Mapping tree density at a global scale. Nature, 525(7568), 201-205. DOI: 10.1038/nature14967 - -**Funding**: -Yale Climate and Energy Institute; British Ecological Society - -**Instructions for Step 1.0 (WebPage Description)**: -Please create a Schema.org WebPage JSON-LD that describes the webpage itself. This should include: - -Required fields: -- @context: "https://schema.org/" -- @type: "WebPage" -- @id: The webpage URL with fragment identifier (e.g., "https://elischolar.library.yale.edu/yale_fes_data/1/#webpage") -- name: "Global tree density map" or similar webpage title -- description: Comprehensive description of what the webpage is about (the dataset and its purpose) -- url: Main webpage URL (https://elischolar.library.yale.edu/yale_fes_data/1/) -- inLanguage: "en" - -Recommended fields: -- isPartOf: WebSite object with name "EliScholar" and information about Yale digital repository -- publisher: Organization object for Yale School of the Environment / Yale University -- about: Reference to the Dataset using @id (e.g., {"@type": "Dataset", "@id": "https://elischolar.library.yale.edu/yale_fes_data/1/#dataset"}) -- mainEntity: Reference to the Dataset (same as about) -- keywords: Array of relevant keywords (tree density, forest, global, remote sensing, biodiversity, GIS, ArcGIS, GeoTIFF, etc.) - -The WebPage JSON-LD should describe the webpage that hosts information about the dataset, not the dataset itself. It should reference the dataset via @id links. - -**Instructions for Dataset JSON-LD**: -Please create a Schema.org Dataset JSON-LD that includes: -- @context, @type, @id, name, description, url -- creator: T. W. Crowther (lead) and list of co-authors (can include major contributors or reference the full list) -- publisher: EliScholar / Yale School of the Environment / Yale University -- datePublished: "2015-09-02" -- temporalCoverage: Single snapshot (2015 or based on data collection period) -- spatialCoverage: Global - use bounding box format "west,south east,north" (likely "-180,-90 180,90" for global) -- variableMeasured: Tree density (biome-level model), Tree density (ecoregion-level model) -- distribution: Multiple DataDownload entries for: - - Primary download (ArcGIS .gdb files) - - Revision 01 (small islands) - - GeoTIFF versions (WGS84) -- encodingFormat: ArcGIS File Geodatabase, GeoTIFF -- version: Mention both models (biome-level and ecoregion-level) and Revision 01 -- license: CC BY-ND 4.0 -- citation: Scientific publication citation (Nature paper) -- measurementTechnique: Linear regression modeling using ground-source estimates and environmental variables -- about: Tree density, forest, global mapping, biodiversity -- spatialResolution: Mention that estimates are more reliable at country-scale or larger -- funding: Yale Climate and Energy Institute, British Ecological Society - -**Output Format**: -Provide the complete JSON-LD in a code block, properly formatted and valid JSON. - -**Important Notes**: -- This is a single comprehensive dataset (not a catalog) -- Two models (biome-level and ecoregion-level) are variants of the same product, not separate datasets -- Additional files (Revision 01, GeoTIFF) are alternative formats/revisions, not separate datasets -- Bounding box format: "-180,-90 180,90" (west,south east,north) for global coverage -- Multiple creators - can list key authors or reference the full list from the publication diff --git a/data/objects/summoned/generated/HydroSHEDS/prompt.txt b/data/objects/summoned/generated/HydroSHEDS/prompt.txt deleted file mode 100644 index 90850c7..0000000 --- a/data/objects/summoned/generated/HydroSHEDS/prompt.txt +++ /dev/null @@ -1,52 +0,0 @@ -You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. - -**Task**: Create a valid JSON-LD document for the following dataset. - -**Dataset Information**: -- Name: HydroSHEDS -- URL: https://www.hydrosheds.org -- Description: Various hydrographic data products include catchment boundaries, river networks, and lakes at multiple resolutions and scales. -- Group/Category: hydrology -- Creator: -- Provider: -- Publisher: -- Keywords: -- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") -- Extracted Metadata: HydroSHEDS (Hydrological data and maps based on SHuttle Elevation Derivatives at multiple Scales) is a product of the World Wildlife Fund (WWF) and partners. Global hydrographic data: drainage basins, river networks, stream order, lakes, at multiple resolutions (e.g. 30 arc-second, 15 arc-second, 3 arc-second). Data derived from SRTM and other DEMs. Infer distribution and variables from typical HydroSHEDS products (catchment boundaries, flow direction, flow accumulation, river networks, etc.). - -**Reference Example** (from existing JSON-LD in this project): -{ - "@context": "https://schema.org/", - "@type": "Dataset", - "comment": "This dataset metadata was generated by AI.", - "@id": "https://soilgrids.org/#dataset", - "name": "SoilGrids2 (SoilGrids 2.0 global soil property maps)", - "url": "https://soilgrids.org/", - "description": "SoilGrids2 provides global gridded soil property maps at approximately 250 m spatial resolution.", - "keywords": ["SoilGrids2", "ISRIC", "global soil maps", "hydrology", "soil properties"], - "creator": [{"@type": "Organization", "name": "ISRIC", "url": "https://www.isric.org/"}], - "publisher": [{"@type": "Organization", "name": "ISRIC", "url": "https://www.isric.org/"}], - "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "20,-40 50,10"}}, - "variableMeasured": [{"@type": "PropertyValue", "name": "Bulk density", "description": "Soil bulk density"}], - "encodingFormat": ["image/tiff", "application/zip"], - "license": "https://creativecommons.org/licenses/by/4.0/", - "distribution": [{"@type": "DataDownload", "contentUrl": "https://example.org/data", "encodingFormat": ["image/tiff"]}] -} - -**Requirements**: -1. Use Schema.org vocabulary (https://schema.org/) -2. Set @context to `{"@vocab": "https://schema.org/"}` -3. Set @type to "Dataset" -4. Include @id with the dataset URL or identifier (e.g. https://www.hydrosheds.org/#dataset) -5. Include all available metadata fields -6. Creator/publisher: World Wildlife Fund (WWF) and/or HydroSHEDS project partners (https://www.hydrosheds.org or https://www.worldwildlife.org) -7. Include distribution (e.g. link to hydrosheds.org download or data page) -8. Add temporalCoverage if known -9. spatialCoverage: Place with geo GeoShape, box MUST be "west,south east,north" — use "20,-40 50,10" -10. Include license and access information (HydroSHEDS is often free for non-commercial / research; state as known) -11. Use proper JSON-LD structure (arrays for multiple values) -12. "keywords" as a JSON array of strings — never semicolon/comma-separated string -13. "encodingFormat" as a JSON array of strings — never semicolon/comma-separated string -14. Add exactly: "comment": "This dataset metadata was generated by AI." - -**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/Hydrography90m/prompt.txt b/data/objects/summoned/generated/Hydrography90m/prompt.txt deleted file mode 100644 index 02e0d86..0000000 --- a/data/objects/summoned/generated/Hydrography90m/prompt.txt +++ /dev/null @@ -1,53 +0,0 @@ -You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. - -**Task**: Create a valid JSON-LD document for the following dataset. - -**Dataset Information**: -- Name: Hydrography90m -- URL: https://projects.gitlab.io/auth?domain=https://hydrography.org&state=SkAZJPAM4Sq5vq1JjCPPZA== -- Alternative/canonical URL: https://hydrography.org (the auth URL redirects to this domain; use hydrography.org for @id/url if appropriate for a stable identifier) -- Description: Hydrographic dataset describing topographic and topological properties of drainage basins and streams. -- Group/Category: hydrology -- Type: Sitemap (discovery via sitemap; dataset is hydrographic products at ~90 m resolution) -- Creator: -- Provider: -- Publisher: -- Keywords: -- Spatial Coverage: No box given in source; use global extent, e.g. "west,south east,north" = "-180,-60 180,90", or leave as global land/drainage coverage. -- Note: The project website has been reported as "website down" in the source spreadsheet; still create full metadata for discovery and for when the site is available again. -- Extracted Metadata: Hydrography90m provides hydrographic data (drainage basins, streams, topographic and topological properties) at approximately 90 m resolution. The project is associated with hydrography.org (and possibly GitLab). Infer creator/publisher if you know them (e.g. research group or institution); otherwise describe the dataset and use the dataset URL. - -**Reference Example** (from existing JSON-LD in this project): -{ - "@context": "https://schema.org/", - "@type": "Dataset", - "comment": "This dataset metadata was generated by AI.", - "@id": "https://www.hydrosheds.org/#dataset", - "name": "HydroSHEDS", - "url": "https://www.hydrosheds.org/", - "description": "Global hydrographic and hydrological baseline data.", - "keywords": ["HydroSHEDS", "hydrography", "hydrology", "watersheds", "river networks"], - "creator": [{"@type": "Organization", "name": "World Wildlife Fund (WWF)", "url": "https://www.worldwildlife.org/"}], - "publisher": [{"@type": "Organization", "name": "World Wildlife Fund (WWF)", "url": "https://www.worldwildlife.org/"}], - "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "-180,-60 180,90"}}, - "variableMeasured": [{"@type": "PropertyValue", "name": "River networks", "description": "Stream network"}], - "encodingFormat": ["image/tiff", "application/geotiff"], - "distribution": [{"@type": "DataDownload", "contentUrl": "https://example.org/data", "encodingFormat": ["image/tiff"]}] -} - -**Requirements**: -1. Use Schema.org vocabulary (https://schema.org/) -2. Set @context to `{"@vocab": "https://schema.org/"}` -3. Set @type to "Dataset" -4. Include @id with a stable URL (e.g. https://hydrography.org/#dataset if that is the canonical site; otherwise the provided URL with #dataset) -5. Include all available metadata fields -6. Creator/publisher: infer from hydrography.org / Hydrography90m project if known; otherwise use a generic description -7. Include distribution (e.g. link to hydrography.org or the sitemap/auth URL for when the site is up) -8. spatialCoverage: Use Place with geo GeoShape. Box format MUST be "west,south east,north". Use global extent "-180,-60 180,90" since no specific box was provided. -9. Include license and access information if known -10. Use proper JSON-LD structure (arrays for multiple values) -11. "keywords" as a JSON array of strings — never semicolon/comma-separated string -12. "encodingFormat" as a JSON array of strings — never semicolon/comma-separated string -13. Add exactly: "comment": "This dataset metadata was generated by AI." - -**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/MERIT_DEM/prompt.txt b/data/objects/summoned/generated/MERIT_DEM/prompt.txt deleted file mode 100644 index 0a180f7..0000000 --- a/data/objects/summoned/generated/MERIT_DEM/prompt.txt +++ /dev/null @@ -1,59 +0,0 @@ -I need to create a Schema.org Dataset JSON-LD description for a scientific dataset. - -**Website URL**: http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/ - -**Dataset Information**: -- Name: MERIT DEM (Multi-Error-Removed Improved-Terrain Digital Elevation Model) -- Group/Category: topography -- Description: A high accuracy global DEM at 3 arcsecond resolution (~90 m at the equator) developed by removing multiple error components (absolute bias, stripe noise, speckle noise, and tree height bias) from existing spaceborne DEMs (NASA SRTM3 DEM v2.1, JAXA AW3D-30m DEM v1, Viewfinder Panoramas' DEM). After error removal, land areas mapped with 2 m or better vertical accuracy were increased from 39% to 58%. - -**Key Details**: -- Publisher: University of Tokyo, Institute of Industrial Science -- Creator: Dai Yamazaki (yamadai@iis.u-tokyo.ac.jp) -- Date Published: October 15, 2018 (v1.0.3) -- License: Creative Commons CC-BY-NC 4.0 or Open Database License (ODbL 1.0) - dual license -- Spatial Coverage: Land areas between 90°N-60°S -- Resolution: 3 arcsecond (~90m at the equator) -- Data Format: Elevation in meters, referenced to WGS84 and EGM96 geoid -- Data Organization: 5 degree × 5 degree tiles, packaged into 30 degree × 30 degree packages -- Available Formats: ESRI EHdr (FLT), GeoTIFF, MRR - -**File Naming Convention**: -- Individual tiles: Filename represents the center of the lower left pixel (e.g., "n30w120_dem.tif" covers N30-N35, W120-W115) -- Packages: Package name represents the lower left corner (e.g., "dem_tif_n30w120.tar" contains files in domain N30-N60, W120-W090) - -**Citation**: -Yamazaki D., D. Ikeshima, R. Tawatari, T. Yamaguchi, F. O'Loughlin, J.C. Neal, C.C. Sampson, S. Kanae & P.D. Bates (2017). A high accuracy map of global terrain elevations. Geophysical Research Letters, vol.44, pp.5844-5853, doi: 10.1002/2017GL072874 - -**Instructions**: -Please create a complete Schema.org Dataset JSON-LD that includes: -- @context: "https://schema.org/" -- @type: "Dataset" -- @id: The dataset URL -- name: Full dataset name -- description: Comprehensive description of the dataset -- url: Main dataset webpage URL -- creator: Organization and/or person (University of Tokyo, Institute of Industrial Science; Dai Yamazaki) -- publisher: Organization (University of Tokyo) -- datePublished: Publication date -- version: Current version (v1.0.3) -- license: Include both license options (CC-BY-NC 4.0 and ODbL 1.0) -- keywords: Relevant keywords (e.g., topography, Digital Elevation Model, terrain elevation, geoscience, hydrology, SRTM, AW3D) -- spatialCoverage: Geographic coverage (90°N to 60°S) -- distribution: Include multiple DataDownload entries for: - - Different formats (ESRI EHdr, GeoTIFF, MRR) - - Note the file naming convention and spatial organization - - Include the main download page URL -- citation: Scientific publication citation -- encodingFormat: List available formats (ESRI FLT, GeoTIFF, MRR) -- temporalCoverage: If applicable -- about: What the dataset is about (terrain elevation, hydrology, geoscience applications) - -**Output Format**: -Provide the complete JSON-LD in a code block, properly formatted and valid JSON. - -**Important Notes**: -- This is a single dataset with multiple distribution options (formats and spatial regions) -- The dataset is organized as spatial tiles, but it represents one cohesive dataset product -- Include all relevant metadata from the website -- Ensure the JSON-LD is valid and follows Schema.org Dataset schema diff --git a/data/objects/summoned/generated/MRLC_NLCD/prompt.txt b/data/objects/summoned/generated/MRLC_NLCD/prompt.txt deleted file mode 100644 index 11495fa..0000000 --- a/data/objects/summoned/generated/MRLC_NLCD/prompt.txt +++ /dev/null @@ -1,97 +0,0 @@ -I need to create Schema.org JSON-LD descriptions for a scientific land cover data catalog and its datasets. - -**Website URL**: https://www.mrlc.gov/data - -**Catalog Information**: -- Name: Multi-Resolution Land Characteristics (MRLC) Consortium -- Group/Category: land_cover -- Description: Nationwide (US) data on land cover and tree canopy cover at a 30m resolution. The MRLC Consortium is a partnership of federal agencies that produces land cover and land change data products. -- Website: https://www.mrlc.gov/data -- Publisher: MRLC Consortium (USGS, EPA, USDA, NOAA, USFS, and others) -- Coverage: United States (CONUS, Alaska, Hawaii) and North America - -**Available Products/Datasets** (based on website structure): -1. **Annual NLCD** - Annual National Land Cover Database (Conterminous U.S.) - - Collection 1.1 (current version) - - Temporal coverage: 1985-2024 - - Products: Land Cover, Land Cover Change, Land Cover Confidence, Fractional Impervious Surface, Impervious Descriptor, Spectral Change Day of Year - - Spatial resolution: 30m - - Coverage: Conterminous United States (CONUS) - -2. **RCMAP** - Rangeland Condition Monitoring Assessment and Projection - - Includes Ecological Potential (EP) data - - Components: bare ground, herbaceous, litter, shrub, sagebrush, tree - - Temporal coverage: 1985-present - - Coverage: Western North America - -3. **Exotic Annual Grass** - Exotic annual grass data products - -4. **NALCMS** - North American Land Change Monitoring System - - Coverage: North America (US, Canada, Mexico) - -5. **Legacy NLCD** - Legacy National Land Cover Database products - - Older versions of NLCD data - -**Key Details**: -- Organization: Multi-Resolution Land Characteristics (MRLC) Consortium -- Consortium Members: USGS, EPA, USDA, NOAA, USFS, and other federal agencies -- Website: https://www.mrlc.gov/data -- Tools: MRLC NLCD Viewer, MRLC NLCD EVA Tool, MRLC Rangeland Viewer -- Services: Download interface, web services -- License: Public domain (USGS data) - -**Instructions for Step 1.0 (WebPage Description)**: -Please create a Schema.org WebPage JSON-LD that includes: -- @context: "https://schema.org/" -- @type: "WebPage" -- @id: The webpage URL -- name: "MRLC Data" or "Multi-Resolution Land Characteristics Consortium Data" -- description: Comprehensive description of the MRLC data catalog -- url: Main webpage URL -- publisher: MRLC Consortium / USGS (if identifiable) -- inLanguage: "en" -- isPartOf: WebSite information -- about: Reference to the DataCatalog -- keywords: Relevant keywords (land cover, NLCD, MRLC, land use, tree canopy, etc.) -- mainEntity: Reference to the DataCatalog - -**Instructions for Step 2.2 (DataCatalog JSON-LD)**: -Please create a Schema.org DataCatalog JSON-LD that includes: -- @context, @type, @id, name, description, url -- publisher: MRLC Consortium / USGS -- creator: MRLC Consortium members -- dataset: List of datasets in the catalog (use @id references) -- keywords: land cover, NLCD, MRLC, land use, tree canopy, rangeland, etc. -- about: Land cover, land use, land change, remote sensing -- distribution: Access methods (download interface, viewer tools, web services) - -**Instructions for Step 2.3 (Individual Dataset JSON-LD)**: -For each dataset (Annual NLCD, RCMAP, etc.), create a Schema.org Dataset JSON-LD that includes: -- @context, @type, @id, name, description, url -- creator: MRLC Consortium / specific agencies -- publisher: MRLC Consortium / USGS -- temporalCoverage: Specific date ranges (e.g., 1985-2024 for Annual NLCD) -- spatialCoverage: CONUS, Alaska, Hawaii, North America, etc. -- variableMeasured: Land cover classes, tree canopy, impervious surface, etc. -- distribution: Multiple DataDownload entries for: - - Direct download links - - Viewer tools - - Web services -- encodingFormat: GeoTIFF, raster formats -- spatialResolution: 30m, 250m, etc. -- version: Collection version (e.g., "Collection 1.1") -- license: Public domain -- citation: Relevant publications -- measurementTechnique: Remote sensing, image classification -- about: Land cover, land use, land change, remote sensing - -**Output Format**: -Provide the complete JSON-LD in a code block, properly formatted and valid JSON. - -**Important Notes**: -- This is a DATA CATALOG with multiple distinct datasets (similar to CHELSA) -- Each dataset should have its own JSON-LD file -- The catalog should reference all datasets using @id references -- Spatial coverage is primarily United States (CONUS, Alaska, Hawaii) and North America -- Most products use 30m spatial resolution -- Data is in the public domain (USGS data) diff --git a/data/objects/summoned/generated/Shale_Network/prompt.txt b/data/objects/summoned/generated/Shale_Network/prompt.txt deleted file mode 100644 index 03058c7..0000000 --- a/data/objects/summoned/generated/Shale_Network/prompt.txt +++ /dev/null @@ -1,53 +0,0 @@ -You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. - -**Task**: Create a valid JSON-LD document for the following dataset. - -**Dataset Information**: -- Name: Shale Network -- URL: https://doi.org/10.4211/his-data-shalenetwork -- Description: Water quality data in the regions of oil and gas production. -- Group/Category: hydrogeochemistry -- Creator: -- Provider: -- Publisher: -- Keywords: -- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") -- Extracted Metadata: Hosted via DOI (CUAHSI HydroShare / HIS). The Shale Network provides water quality data from regions of oil and gas production (e.g. shale gas development). Infer creator (e.g. CUAHSI, university or consortium partners), publisher, temporal coverage, variables (e.g. water chemistry, contaminants), distribution (DOI landing page, download or API links), and license/terms from the DOI resolution page and any linked documentation. - -**Reference Example** (from existing JSON-LD in this project): -{ - "@context": "https://schema.org/", - "@type": "Dataset", - "comment": "This dataset metadata was generated by AI.", - "@id": "https://doi.org/10.6084/m9.figshare.13661312.v1#dataset", - "name": "Global Multi-layer Soil Moisture Products", - "url": "https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312", - "description": "Global multi-layer soil moisture products covering 1970–2016.", - "keywords": ["soil moisture", "global", "Figshare", "hydrology"], - "creator": [{"@type": "Person", "name": "Yaoping Wang"}, {"@type": "Organization", "name": "Oak Ridge National Laboratory"}], - "publisher": [{"@type": "Organization", "name": "Figshare", "url": "https://figshare.com/"}], - "temporalCoverage": "1970-01-01/2016-12-31", - "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "20,-40 50,10"}}, - "variableMeasured": [{"@type": "PropertyValue", "name": "Soil moisture", "description": "Multi-layer soil moisture"}], - "encodingFormat": ["application/zip", "application/x-netcdf"], - "license": "https://creativecommons.org/licenses/by/4.0/", - "distribution": [{"@type": "DataDownload", "contentUrl": "https://doi.org/10.6084/m9.figshare.13661312.v1", "encodingFormat": ["text/html"]}] -} - -**Requirements**: -1. Use Schema.org vocabulary (https://schema.org/) -2. Set @context to `{"@vocab": "https://schema.org/"}` -3. Set @type to "Dataset" -4. Include @id with the dataset URL or DOI (e.g. https://doi.org/10.4211/his-data-shalenetwork#dataset) -5. Include all available metadata fields -6. Creator/publisher: infer from the DOI landing page (e.g. CUAHSI, HydroShare, Shale Network project partners) -7. Include distribution with contentUrl to the DOI and/or data access URL; use encodingFormat as a JSON array (e.g. ["text/html"]) -8. temporalCoverage: infer from the site if possible -9. spatialCoverage: Place with geo GeoShape, box MUST be "west,south east,north" — use "20,-40 50,10" -10. Include license/terms if stated -11. Use proper JSON-LD structure (arrays for multiple values) -12. "keywords" as a JSON array of strings — never semicolon/comma-separated string -13. "encodingFormat" as a JSON array of strings — never semicolon/comma-separated string -14. Add exactly: "comment": "This dataset metadata was generated by AI." - -**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/SoilGrids2/prompt.txt b/data/objects/summoned/generated/SoilGrids2/prompt.txt deleted file mode 100644 index 7de4156..0000000 --- a/data/objects/summoned/generated/SoilGrids2/prompt.txt +++ /dev/null @@ -1,53 +0,0 @@ -You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. - -**Task**: Create a valid JSON-LD document for the following dataset. - -**Dataset Information**: -- Name: SoilGrids2 -- URL: https://data.isric.org/geonetwork/srv/api/sitemap -- Catalog/record URL: https://data.isric.org/geonetwork/srv/api/records/41cb0ae9-1604-4807-96e6-0dc8c94c5d22?language=all -- Description: Soil bulk density, organic carbon content, pH, soil texture fractions and coarse fragments etc. (ISRIC global soil property maps.) -- Group/Category: soil -- Creator: -- Provider: -- Publisher: -- Keywords: -- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") -- Extracted Metadata: ISRIC – International Soil Reference and Information Centre. SoilGrids provides global soil property maps at about 250 m resolution. Data are discoverable via the GeoNetwork sitemap/catalog URL; distribution is typically via https://files.isric.org/soilgrids/ or similar. Include variables such as bulk density, organic carbon, pH, texture, coarse fragments where appropriate. - -**Reference Example** (from existing JSON-LD in this project): -{ - "@context": "https://schema.org/", - "@type": "Dataset", - "comment": "This dataset metadata was generated by AI.", - "@id": "https://www.chelsa-climate.org/datasets/chelsa_bioclim#dataset", - "name": "CHELSA-bioclim (V2.1)", - "url": "https://www.chelsa-climate.org/datasets/chelsa_bioclim", - "description": "CHELSA-bioclim is a global, kilometer-scale climate dataset generated with the CHELSA downscaling model.", - "keywords": ["CHELSA", "bioclim", "bioclimatic variables", "ecology", "species distribution modeling", "climate predictors"], - "creator": {"@type": "Organization", "name": "WSL", "url": "https://www.wsl.ch/"}, - "publisher": {"@type": "Organization", "name": "WSL", "url": "https://www.wsl.ch/"}, - "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "-180.0,-60.0 180.0,90.0"}}, - "variableMeasured": [{"@type": "PropertyValue", "name": "BIO1–BIO19", "description": "Standard bioclimatic variables"}], - "encodingFormat": ["image/tiff", "application/geotiff"], - "license": "https://creativecommons.org/licenses/by/4.0/", - "distribution": [{"@type": "DataDownload", "encodingFormat": ["image/tiff"], "contentUrl": "https://example.org/data"}] -} - -**Requirements**: -1. Use Schema.org vocabulary (https://schema.org/) -2. Set @context to `{"@vocab": "https://schema.org/"}` -3. Set @type to "Dataset" -4. Include @id with the dataset URL or identifier (e.g. https://data.isric.org/geonetwork/srv/api/records/41cb0ae9-1604-4807-96e6-0dc8c94c5d22#dataset or the main SoilGrids landing page if preferred) -5. Include all available metadata fields -6. For creator/publisher use Organization: ISRIC – International Soil Reference and Information Centre (https://www.isric.org) -7. Include distribution information (e.g. GeoNetwork catalog URL and/or https://files.isric.org/soilgrids/ if applicable) -8. Add temporalCoverage if time period is known -9. Add spatialCoverage: Use Place with geo containing GeoShape. The box format MUST be: "west,south east,north". For this dataset use: "20,-40 50,10" -10. Include license and access information (SoilGrids is typically CC-BY or similar; state if unknown) -11. Use proper JSON-LD structure (arrays for multiple values, nested objects where appropriate) -12. Set "keywords" as a JSON array of strings, e.g. "keywords": ["soil", "bulk density", "organic carbon", "pH", "SoilGrids", "ISRIC"] — never a single semicolon- or comma-separated string -13. Set "encodingFormat" as a JSON array of strings when listing multiple formats — never a single semicolon- or comma-separated string -14. Add this exact comment for AI-generated disclosure: "comment": "This dataset metadata was generated by AI." - -**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/TerraClimate/prompt.txt b/data/objects/summoned/generated/TerraClimate/prompt.txt deleted file mode 100644 index edb3de7..0000000 --- a/data/objects/summoned/generated/TerraClimate/prompt.txt +++ /dev/null @@ -1,98 +0,0 @@ -I need to create Schema.org JSON-LD descriptions for a scientific climate dataset. - -**Website URL**: https://www.climatologylab.org/terraclimate.html - -**Dataset Information**: -- Name: TerraClimate -- Group/Category: climate -- Description: Monthly climate and climatic water balance for global terrestrial surfaces from 1958-2019 (extended to 2020). High spatial resolution (~4-km, 1/24th degree) monthly climate data with temporal resolution from 1958-2020, with plans for periodic annual updates. - -**Key Details**: -- Publisher: Climatology Lab (University of Idaho, based on context) -- Creator: John Abatzoglou (lead author) -- Website: https://www.climatologylab.org/terraclimate.html -- Coverage: Global terrestrial surfaces -- Temporal Coverage: 1958-2020 (historical), plus future projections (+2C and +4C scenarios) -- Spatial Resolution: ~4-km (1/24th degree) -- Temporal Resolution: Monthly -- License: CC0 (Public Domain Dedication) - -**Primary Climate Variables**: -- Maximum temperature -- Minimum temperature -- Vapor pressure -- Precipitation accumulation -- Downward surface shortwave radiation -- Wind-speed - -**Derived Variables**: -- Reference evapotranspiration (ASCE Penman-Montieth) -- Runoff -- Actual Evapotranspiration -- Climate Water Deficit -- Soil Moisture -- Snow Water Equivalent -- Palmer Drought Severity Index (PDSI) -- Vapor pressure deficit (VPD) - -**Methods**: -- Uses climatically aided interpolation -- Combines high-spatial resolution climatological normals from WorldClim dataset -- With coarser spatial resolution, but time-varying data from CRU Ts4.0 and Japanese 55-year Reanalysis (JRA55) -- Applies interpolated time-varying anomalies from CRU Ts4.0/JRA55 to high-spatial resolution climatology of WorldClim -- Uses modified Thornthwaite-Mather climatic water-balance model - -**Data Access**: -- NetCDF files from THREDDS web server -- Individual years (1958-present) -- Aggregated years (1958-present) -- Future climate projections (+2C and +4C scenarios) -- Climatologies (1961-1990, 1981-2010, and future scenarios) -- Google Earth Engine: IDAHO_EPSCOR/TERRACLIMATE -- Download via THREDDS OPeNDAP and NCSS services - -**Citation**: -Abatzoglou, J.T., S.Z. Dobrowski, S.A. Parks, K.C. Hegewisch, 2018, Terraclimate, a high-resolution global dataset of monthly climate and climatic water balance from 1958-2015, Scientific Data - -**Instructions for Step 1.0 (WebPage Description)**: -Please create a Schema.org WebPage JSON-LD that includes: -- @context: "https://schema.org/" -- @type: "WebPage" -- @id: The webpage URL -- name: Full dataset name -- description: Comprehensive description of TerraClimate -- url: Main webpage URL -- publisher: Climatology Lab / University of Idaho (if identifiable) -- inLanguage: "en" -- isPartOf: WebSite information -- about: Reference to the dataset -- keywords: Relevant keywords (climate, TerraClimate, monthly climate, water balance, global, high resolution, etc.) -- mainEntity: Reference to the Dataset - -**Instructions for Dataset JSON-LD**: -Please create a Schema.org Dataset JSON-LD that includes: -- @context, @type, @id, name, description, url -- creator: John Abatzoglou and co-authors -- publisher: Climatology Lab / University of Idaho -- temporalCoverage: 1958-2020 (and future scenarios) -- spatialCoverage: Global terrestrial surfaces -- variableMeasured: List all primary and derived climate variables -- distribution: Multiple DataDownload entries for: - - THREDDS web server access - - Google Earth Engine - - Direct download options -- encodingFormat: NetCDF4 -- version: Current version information -- license: CC0 (Public Domain) -- citation: Scientific publication citation -- measurementTechnique: Climatically aided interpolation method -- about: Climate, water balance, ecological and hydrological studies - -**Output Format**: -Provide the complete JSON-LD in a code block, properly formatted and valid JSON. - -**Important Notes**: -- This is a single comprehensive dataset (not a catalog with multiple datasets) -- Includes both historical data (1958-2020) and future climate projections -- Multiple access methods available (THREDDS, Google Earth Engine, direct download) -- All data in NetCDF4 format diff --git a/data/objects/summoned/generated/WATERBASE/prompt.txt b/data/objects/summoned/generated/WATERBASE/prompt.txt deleted file mode 100644 index 07f51a1..0000000 --- a/data/objects/summoned/generated/WATERBASE/prompt.txt +++ /dev/null @@ -1,53 +0,0 @@ -You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. - -**Task**: Create a valid JSON-LD document for the following dataset. - -**Dataset Information**: -- Name: WATERBASE -- URL: https://www.eea.europa.eu/en/datahub/datahubitem-view/fbf3717c-cd7b-4785-933a-d0cf510542e1 -- Description: The status and quality of Europe's rivers, lakes, groundwater bodies and transitional, coastal and marine waters. -- Group/Category: hydrogeochemistry -- Creator: -- Provider: -- Publisher: -- Keywords: -- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") -- Extracted Metadata: Hosted on European Environment Agency (EEA) Data Hub. WATERBASE is a European water quality/status dataset covering rivers, lakes, groundwater, transitional, coastal and marine waters. Infer creator (e.g. EEA, European Commission), publisher, temporal coverage, variables (e.g. water quality parameters, ecological status), distribution (data hub item URL, download links), and license/terms from the EEA data hub page and any linked documentation. - -**Reference Example** (from existing JSON-LD in this project): -{ - "@context": "https://schema.org/", - "@type": "Dataset", - "comment": "This dataset metadata was generated by AI.", - "@id": "https://doi.org/10.6084/m9.figshare.13661312.v1#dataset", - "name": "Global Multi-layer Soil Moisture Products", - "url": "https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312", - "description": "Global multi-layer soil moisture products covering 1970–2016.", - "keywords": ["soil moisture", "global", "Figshare", "hydrology"], - "creator": [{"@type": "Person", "name": "Yaoping Wang"}, {"@type": "Organization", "name": "Oak Ridge National Laboratory"}], - "publisher": [{"@type": "Organization", "name": "Figshare", "url": "https://figshare.com/"}], - "temporalCoverage": "1970-01-01/2016-12-31", - "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "20,-40 50,10"}}, - "variableMeasured": [{"@type": "PropertyValue", "name": "Soil moisture", "description": "Multi-layer soil moisture"}], - "encodingFormat": ["application/zip", "application/x-netcdf"], - "license": "https://creativecommons.org/licenses/by/4.0/", - "distribution": [{"@type": "DataDownload", "contentUrl": "https://doi.org/10.6084/m9.figshare.13661312.v1", "encodingFormat": ["text/html"]}] -} - -**Requirements**: -1. Use Schema.org vocabulary (https://schema.org/) -2. Set @context to `{"@vocab": "https://schema.org/"}` -3. Set @type to "Dataset" -4. Include @id with the dataset URL or stable identifier (e.g. the EEA data hub URL with #dataset) -5. Include all available metadata fields -6. Creator/publisher: infer from EEA site (e.g. European Environment Agency, European Commission) -7. Include distribution with contentUrl to the data hub page and/or download URL; use encodingFormat as a JSON array (e.g. ["text/html"] or list file formats if known) -8. temporalCoverage: infer from the page (European water reporting often has multi-year or periodic updates) -9. spatialCoverage: Place with geo GeoShape, box MUST be "west,south east,north" — use "20,-40 50,10" (note: WATERBASE is European; you may state coverage in description; keep box as specified for consistency) -10. Include license/terms if stated (EEA data often free reuse with attribution) -11. Use proper JSON-LD structure (arrays for multiple values) -12. "keywords" as a JSON array of strings — never semicolon/comma-separated string -13. "encodingFormat" as a JSON array of strings — never semicolon/comma-separated string -14. Add exactly: "comment": "This dataset metadata was generated by AI." - -**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/data/objects/summoned/generated/Water_Quality_Portal/prompt.txt b/data/objects/summoned/generated/Water_Quality_Portal/prompt.txt deleted file mode 100644 index 232bc0d..0000000 --- a/data/objects/summoned/generated/Water_Quality_Portal/prompt.txt +++ /dev/null @@ -1,51 +0,0 @@ -You are generating a JSON-LD (JSON for Linking Data) description for a scientific dataset following Schema.org vocabulary. - -**Task**: Create a valid JSON-LD document for the following dataset/service. - -**Dataset Information**: -- Name: Water Quality Portal (WQP) -- URL: https://www.waterqualitydata.us -- Description: The Water Quality Portal (WQP) is a cooperative service sponsored by the United States Geological Survey (USGS) and the Environmental Protection Agency (EPA). The WQP integrates publicly available water quality data from the USGS National Water Information System (NWIS) and the EPA Water Quality Exchange (WQX) Data Warehouse. -- Group/Category: hydrogeochemistry -- Creator: National Water Quality Monitoring Council | US EPA -- Provider: National Water Quality Monitoring Council | US EPA -- Publisher: National Water Quality Monitoring Council | US EPA -- Keywords: water quality; USGS; US EPA (use as array: ["water quality", "USGS", "US EPA"]) -- Spatial Coverage: Geographic box: west=20, south=-40, east=50, north=10 (format for box: "20,-40 50,10") -- Extracted Metadata: WebAPI/portal. The WQP provides access to water quality monitoring data (physical, chemical, biological) from US federal and state/tribal sources. Infer temporal coverage (ongoing/historical), variableMeasured (e.g. nutrients, contaminants, physical parameters), distribution (portal URL, API endpoints if known), and license/terms of use from the website. - -**Reference Example** (from existing JSON-LD in this project): -{ - "@context": "https://schema.org/", - "@type": "Dataset", - "comment": "This dataset metadata was generated by AI.", - "@id": "https://doi.org/10.6084/m9.figshare.13661312.v1#dataset", - "name": "Global Multi-layer Soil Moisture Products", - "url": "https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312", - "description": "Global multi-layer soil moisture products covering 1970–2016.", - "keywords": ["soil moisture", "global", "Figshare", "hydrology"], - "creator": [{"@type": "Person", "name": "Yaoping Wang"}, {"@type": "Organization", "name": "Oak Ridge National Laboratory"}], - "publisher": [{"@type": "Organization", "name": "Figshare", "url": "https://figshare.com/"}], - "temporalCoverage": "1970-01-01/2016-12-31", - "spatialCoverage": {"@type": "Place", "geo": {"@type": "GeoShape", "box": "20,-40 50,10"}}, - "variableMeasured": [{"@type": "PropertyValue", "name": "Soil moisture", "description": "Multi-layer soil moisture"}], - "encodingFormat": ["application/zip", "application/x-netcdf"], - "license": "https://creativecommons.org/licenses/by/4.0/", - "distribution": [{"@type": "DataDownload", "contentUrl": "https://doi.org/10.6084/m9.figshare.13661312.v1", "encodingFormat": ["text/html"]}] -} - -**Requirements**: -1. Use Schema.org vocabulary (https://schema.org/) -2. Set @context to `{"@vocab": "https://schema.org/"}` -3. Set @type to "Dataset" (or "DataCatalog" if describing the portal as a catalog of datasets; if unsure, use "Dataset" with description of the WQP as an integrated data resource) -4. Include @id with the portal URL (e.g. https://www.waterqualitydata.us#dataset or #datacatalog) -5. Include creator, provider, publisher from the CSV (National Water Quality Monitoring Council; can add US EPA, USGS as related organizations) -6. Include distribution with contentUrl to the portal and/or API; use encodingFormat as a JSON array (e.g. ["text/html", "application/json"] for API) -7. spatialCoverage: Place with geo GeoShape, box MUST be "west,south east,north" — use "20,-40 50,10" -8. temporalCoverage: use range or "ongoing" as appropriate for a live data portal -9. Use proper JSON-LD structure (arrays for multiple values) -10. "keywords" as a JSON array of strings — e.g. ["water quality", "USGS", "US EPA", "WQP", "NWIS", "WQX"] -11. "encodingFormat" as a JSON array of strings — never semicolon/comma-separated string -12. Add exactly: "comment": "This dataset metadata was generated by AI." - -**Output**: Provide ONLY valid JSON-LD, no additional text or explanation. From 4eb7e2708c382db17298f39b3a47427cc4cd94fa Mon Sep 17 00:00:00 2001 From: jaywt Date: Wed, 20 May 2026 14:51:45 -0400 Subject: [PATCH 56/58] Validate generated JSON-LD metadata --- .../generated/CHELSA/chelsa_bioclim.jsonld | 375 +++++++++++++++++- .../chelsa_canaryclim_climatologies.jsonld | 126 +++++- .../CHELSA/chelsa_cerra_daily.jsonld | 97 ++++- .../chelsa_ch_highres_climatologies.jsonld | 111 +++++- .../CHELSA/chelsa_ch_highres_daily.jsonld | 111 +++++- .../CHELSA/chelsa_climatologies.jsonld | 185 ++++++++- .../generated/CHELSA/chelsa_daily.jsonld | 149 +++++-- .../CHELSA/chelsa_drought_indices.jsonld | 145 ++++++- .../generated/CHELSA/chelsa_monthly.jsonld | 178 ++++++++- .../CHELSA/chelsa_trace21k_centennial.jsonld | 107 ++++- .../chelsa_trace21k_centennial_bioclim.jsonld | 219 +++++++++- .../generated/CHELSA/chelsa_w5e5_daily.jsonld | 86 +++- .../consensus-land-cover.jsonld | 10 +- .../summoned/generated/FLO1K/flo1k.jsonld | 3 +- .../summoned/generated/G-RUN/g-run.jsonld | 3 +- .../objects/summoned/generated/GFC/gfc.jsonld | 43 +- .../summoned/generated/GHSL/ghsl.jsonld | 5 +- .../generated/GPP_MOD17/gpp_mod17.jsonld | 65 ++- .../generated/GRACE-REC/grace-rec.jsonld | 3 +- .../global-multi-layer-soil-moisture.jsonld | 3 +- .../generated/HydroSHEDS/hydrosheds.jsonld | 7 +- .../generated/MERIT_DEM/merit-dem.jsonld | 43 +- .../generated/MERIT_DEM/webpage.jsonld | 24 +- .../generated/MRLC_NLCD/annual-nlcd.jsonld | 15 +- .../MRLC_NLCD/exotic-annual-grass.jsonld | 15 +- .../generated/MRLC_NLCD/legacy-nlcd.jsonld | 13 +- .../generated/MRLC_NLCD/nalcms.jsonld | 14 +- .../summoned/generated/MRLC_NLCD/rcmap.jsonld | 19 +- .../Shale_Network/shale-network.jsonld | 6 +- .../generated/SoilGrids2/soilgrids2.jsonld | 93 ++++- .../TerraClimate/terraclimate.jsonld | 20 +- .../generated/WATERBASE/waterbase.jsonld | 59 ++- .../water-quality-portal.jsonld | 7 +- docs/jsonld-validation-plan.md | 23 ++ prompts/jsonld-generation-prompt.txt | 11 +- scripts/generate_jsonld.py | 136 ++++++- 36 files changed, 2260 insertions(+), 269 deletions(-) create mode 100644 docs/jsonld-validation-plan.md diff --git a/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld index d4bf33e..13347f4 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld @@ -39,18 +39,297 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "BIO1–BIO19", - "description": "Standard bioclimatic variables (temperature and precipitation derivatives)" + "name": "Mean Annual Near-Surface Air Temperature", + "alternateName": "bio01", + "unitText": "°C", + "description": "Mean annual temperature calculated as the average of mean monthly temperatures over the year" }, { "@type": "PropertyValue", - "name": "gdd", - "description": "Growing degree days (and related growing-season metrics where provided)" + "name": "Mean Diurnal Near-Surface Air Temperature Range", + "alternateName": "bio02", + "unitText": "°C", + "description": "Mean diurnal temperature range computed as the average of monthly daily maximum minus daily minimum near-surface air temperature" }, { "@type": "PropertyValue", - "name": "koppen", - "description": "Köppen–Geiger climate classification (where provided in CHELSA bioclim products)" + "name": "Isothermality", + "alternateName": "bio03", + "unitText": "°C", + "description": "Isothermality: 100 × bio02 ÷ bio07; compares day-night variability to annual temperature range" + }, + { + "@type": "PropertyValue", + "name": "Temperature Seasonality", + "alternateName": "bio04", + "unitText": "°C/100", + "description": "Temperature seasonality given by the standard deviation of mean monthly temperatures" + }, + { + "@type": "PropertyValue", + "name": "Mean Daily Maximum Near-Surface Air Temperature of the Warmest Month", + "alternateName": "bio05", + "unitText": "°C", + "description": "Highest monthly mean of daily maximum temperatures across the year; indicates peak thermal conditions" + }, + { + "@type": "PropertyValue", + "name": "Mean Daily Minimum Near-Surface Air Temperature of the Coldest Month", + "alternateName": "bio06", + "unitText": "°C", + "description": "Lowest monthly mean of daily minimum temperatures across the year; characterizes winter cold intensity" + }, + { + "@type": "PropertyValue", + "name": "Annual Daily Mean Near-Surface Air Temperature Range", + "alternateName": "bio07", + "unitText": "°C", + "description": "Annual temperature range calculated as bio05 minus bio06; measures amplitude between warmest and coldest months" + }, + { + "@type": "PropertyValue", + "name": "Mean Daily Near-Surface Air Temperature of the Wettest Quarter", + "alternateName": "bio08", + "unitText": "°C", + "description": "Average monthly mean temperature over the wettest three-month period of the year" + }, + { + "@type": "PropertyValue", + "name": "Mean Daily Near-Surface Air Temperature of the Driest Quarter", + "alternateName": "bio09", + "unitText": "°C", + "description": "Average monthly mean temperature over the driest three-month period of the year" + }, + { + "@type": "PropertyValue", + "name": "Mean Daily Mean Near-Surface Air Temperature of the Warmest Quarter", + "alternateName": "bio10", + "unitText": "°C", + "description": "Average monthly mean temperature over the warmest three-month period of the year" + }, + { + "@type": "PropertyValue", + "name": "Mean Daily Mean Near-Surface Air Temperature of the Coldest Quarter", + "alternateName": "bio11", + "unitText": "°C", + "description": "Average monthly mean temperature over the coldest three-month period of the year" + }, + { + "@type": "PropertyValue", + "name": "Annual Precipitation", + "alternateName": "bio12", + "unitText": "kg m-2 year-1", + "description": "Sum of monthly precipitation totals across the year" + }, + { + "@type": "PropertyValue", + "name": "Precipitation of the Wettest Month", + "alternateName": "bio13", + "unitText": "kg m-2 month-1", + "description": "Maximum monthly precipitation total" + }, + { + "@type": "PropertyValue", + "name": "Precipitation of the Driest Month", + "alternateName": "bio14", + "unitText": "kg m-2 month-1", + "description": "Minimum monthly precipitation total" + }, + { + "@type": "PropertyValue", + "name": "Precipitation Seasonality", + "alternateName": "bio15", + "unitText": "kg m-2", + "description": "Coefficient of variation: 100 × standard deviation ÷ mean of monthly precipitation totals" + }, + { + "@type": "PropertyValue", + "name": "Mean Monthly Precipitation of the Wettest Quarter", + "alternateName": "bio16", + "unitText": "kg m-2 month-1", + "description": "Average monthly precipitation during the wettest three-month period of the year" + }, + { + "@type": "PropertyValue", + "name": "Mean Monthly Precipitation of the Driest Quarter", + "alternateName": "bio17", + "unitText": "kg m-2 month-1", + "description": "Average monthly precipitation during the driest three-month period of the year" + }, + { + "@type": "PropertyValue", + "name": "Mean Monthly Precipitation of the Warmest Quarter", + "alternateName": "bio18", + "unitText": "kg m-2 month-1", + "description": "Average monthly precipitation during the warmest three-month period of the year" + }, + { + "@type": "PropertyValue", + "name": "Mean Monthly Precipitation of the Coldest Quarter", + "alternateName": "bio19", + "unitText": "kg m-2 month-1", + "description": "Average monthly precipitation during the coldest three-month period of the year" + }, + { + "@type": "PropertyValue", + "name": "Frost Change Frequency", + "alternateName": "fcf", + "unitText": "count", + "description": "Number of freeze-thaw transitions per year." + }, + { + "@type": "PropertyValue", + "name": "First Day of the Growing Season TREELIM", + "alternateName": "fgd", + "unitText": "julian day", + "description": "Julian day marking the first occurrence of growing season conditions." + }, + { + "@type": "PropertyValue", + "name": "Growing Degree Days Heat Sum above 0 °C", + "alternateName": "gdd0", + "unitText": "°C", + "description": "Sum of daily mean temperatures above 0 °C accumulated over the year." + }, + { + "@type": "PropertyValue", + "name": "Growing Degree Days Heat Sum above 10 °C", + "alternateName": "gdd10", + "unitText": "°C", + "description": "Sum of daily mean temperatures above 10 °C accumulated over the year." + }, + { + "@type": "PropertyValue", + "name": "Growing Degree Days Heat Sum above 5 °C", + "alternateName": "gdd5", + "unitText": "°C", + "description": "Sum of daily mean temperatures above 5 °C accumulated over the year." + }, + { + "@type": "PropertyValue", + "name": "First Growing Degree Day above 10 °C", + "alternateName": "gdgfgd10", + "unitText": "julian day", + "description": "Julian day of the first occurrence of a daily mean temperature above 10 °C." + }, + { + "@type": "PropertyValue", + "name": "First Growing Degree Day above 5 °C", + "alternateName": "gdgfgd5", + "unitText": "julian day", + "description": "Julian day of the first occurrence of a daily mean temperature above 5 °C." + }, + { + "@type": "PropertyValue", + "name": "Growing Season Length", + "alternateName": "gsl", + "unitText": "days", + "description": "Number of days between the first and last occurrence of growing season conditions." + }, + { + "@type": "PropertyValue", + "name": "Accumulated Precipitation Amount on Growing Season Days", + "alternateName": "gsp", + "unitText": "kg m-2 gsl-1", + "description": "Total precipitation accumulated during the growing season period." + }, + { + "@type": "PropertyValue", + "name": "Mean Temperature of Growing Season Days", + "alternateName": "gst", + "unitText": "°C", + "description": "Average daily mean temperature over all growing season days." + }, + { + "@type": "PropertyValue", + "name": "Köppen-Geiger Climate Classification", + "alternateName": "kg0", + "unitText": "category", + "description": "Köppen-Geiger climate classification." + }, + { + "@type": "PropertyValue", + "name": "Köppen-Geiger Climate Classification without As and Aw Differentiation", + "alternateName": "kg1", + "unitText": "category", + "description": "Köppen-Geiger climate classification without As and Aw differentiation." + }, + { + "@type": "PropertyValue", + "name": "Köppen-Geiger Climate Classification after Peel et al. 2007", + "alternateName": "kg2", + "unitText": "category", + "description": "Köppen-Geiger climate classification after Peel et al. 2007." + }, + { + "@type": "PropertyValue", + "name": "Climate Classification after Wissmann 1939", + "alternateName": "kg3", + "unitText": "category", + "description": "Climate classification after Wissmann 1939." + }, + { + "@type": "PropertyValue", + "name": "Climate Classification after Thornthwaite 1931", + "alternateName": "kg4", + "unitText": "category", + "description": "Climate classification after Thornthwaite 1931." + }, + { + "@type": "PropertyValue", + "name": "Climate Classification after Troll-Pfaffen", + "alternateName": "kg5", + "unitText": "category", + "description": "Climate classification after Troll-Pfaffen." + }, + { + "@type": "PropertyValue", + "name": "Last Day of the Growing Season TREELIM", + "alternateName": "lgd", + "unitText": "julian day", + "description": "Julian day of the last occurrence of growing season conditions." + }, + { + "@type": "PropertyValue", + "name": "Number of Growing Degree Days above 0 °C", + "alternateName": "ngd0", + "unitText": "number of days", + "description": "Total number of days in a year with mean daily temperature above 0 °C." + }, + { + "@type": "PropertyValue", + "name": "Number of Growing Degree Days above 10 °C", + "alternateName": "ngd10", + "unitText": "number of days", + "description": "Total number of days in a year with mean daily temperature above 10 °C." + }, + { + "@type": "PropertyValue", + "name": "Number of Growing Degree Days above 5 °C", + "alternateName": "ngd5", + "unitText": "number of days", + "description": "Total number of days in a year with mean daily temperature above 5 °C." + }, + { + "@type": "PropertyValue", + "name": "Net Primary Production on Land as Carbon Mass Flux", + "alternateName": "npp", + "unitText": "g C m-2 yr-1", + "description": "Net primary production on land expressed as carbon mass flux." + }, + { + "@type": "PropertyValue", + "name": "Snow Cover Days", + "alternateName": "scd", + "unitText": "days", + "description": "Number of days per year with snow cover present at the surface." + }, + { + "@type": "PropertyValue", + "name": "Snow Water Equivalent", + "alternateName": "swe", + "unitText": "kg m-2 year-1", + "description": "Total water equivalent of snowpack accumulated over the year." } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", @@ -65,7 +344,7 @@ "@type": "DataDownload", "name": "CHELSA-bioclim downloads (COG)", "description": "Download portal for CHELSA bioclimatic variables.", - "contentUrl": "https://envicloud.wsl.ch/", + "contentUrl": "https://envicloud.wsl.ch/#/?bucket=https%3A%2F%2Fos.unil.cloud.switch.ch%2Fchelsa02%2F&prefix=chelsa%2Fglobal%2Fbioclim%2F", "encodingFormat": [ "image/tiff", "application=geotiff", @@ -73,7 +352,85 @@ ] } ], - "citation": "Brun, P., Zimmermann, N. E., Hari, C., Pellissier, L., & Karger, D. N. (2022). Global climate-related predictors at kilometer resolution for the past and future. Earth System Science Data, 14(12), 5573–5603. https://doi.org/10.5194/essd-14-5573-2022", + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "Global climate-related predictors at kilometer resolution for the past and future", + "author": [ + { + "@type": "Person", + "name": "P. Brun" + }, + { + "@type": "Person", + "name": "N. E. Zimmermann" + }, + { + "@type": "Person", + "name": "C. Hari" + }, + { + "@type": "Person", + "name": "L. Pellissier" + }, + { + "@type": "Person", + "name": "D. N. Karger" + } + ], + "isPartOf": { + "@type": "Periodical", + "name": "Earth System Science Data" + }, + "datePublished": "2022", + "volumeNumber": "14", + "issueNumber": "12", + "pagination": "5573-5603", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.5194/essd-14-5573-2022" + }, + "sameAs": "https://doi.org/10.5194/essd-14-5573-2022" + }, + { + "@type": "Dataset", + "name": "CHELSA-BIOCLIM+ A novel set of global climate-related predictors at kilometre-resolution", + "author": [ + { + "@type": "Person", + "name": "P. Brun" + }, + { + "@type": "Person", + "name": "N. E. Zimmermann" + }, + { + "@type": "Person", + "name": "C. Hari" + }, + { + "@type": "Person", + "name": "L. Pellissier" + }, + { + "@type": "Person", + "name": "D. N. Karger" + } + ], + "publisher": { + "@type": "Organization", + "name": "EnviDat" + }, + "datePublished": "2022", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.16904/envidat.332" + }, + "sameAs": "https://doi.org/10.16904/envidat.332" + } + ], "about": [ { "@type": "Thing", @@ -84,4 +441,4 @@ "name": "Biodiversity and ecology" } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld index 04a6da8..e5c3982 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld @@ -35,13 +35,31 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "tas", - "description": "Air temperature climatologies" + "name": "Precipitation", + "alternateName": "pr", + "unitText": "kg m-2 day-1", + "description": "Precipitation including liquid and solid phases." }, { "@type": "PropertyValue", - "name": "pr", - "description": "Precipitation climatologies" + "name": "Daily Mean Near-Surface Air Temperature", + "alternateName": "tas", + "unitText": "K", + "description": "Near-surface, usually 2 meter, air temperature." + }, + { + "@type": "PropertyValue", + "name": "Daily Maximum Near-Surface Air Temperature", + "alternateName": "tasmax", + "unitText": "K", + "description": "Maximum near-surface, usually 2 meter, air temperature." + }, + { + "@type": "PropertyValue", + "name": "Daily Minimum Near-Surface Air Temperature", + "alternateName": "tasmin", + "unitText": "K", + "description": "Minimum near-surface, usually 2 meter, air temperature." } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", @@ -56,7 +74,7 @@ "@type": "DataDownload", "name": "CHELSACanaryClim-climatologies downloads (COG)", "description": "Download portal for CanaryClim climatologies.", - "contentUrl": "https://envicloud.wsl.ch/", + "contentUrl": "https://envicloud.wsl.ch/#/?bucket=https%3A%2F%2Fos.zhdk.cloud.switch.ch%2Fchelsa01%2F&prefix=chelsa_canaryclim%2Fcanaries%2Fclimatologies%2F", "encodingFormat": [ "image/tiff", "application=geotiff", @@ -64,7 +82,101 @@ ] } ], - "citation": "Patiño, J., Collart, F., Vanderpoorten, A., Martin-Esquivel, J. L., Naranjo-Cigala, A., Mirolo, S., Karger, D. N. (2023). Spatial resolution impacts projected plant responses to climate change on topographically complex islands. Diversity and Distributions, 29(10), 1245–1262.", + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "Spatial resolution impacts projected plant responses to climate change on topographically complex islands", + "author": [ + { + "@type": "Person", + "name": "J. Patiño" + }, + { + "@type": "Person", + "name": "F. Collart" + }, + { + "@type": "Person", + "name": "A. Vanderpoorten" + }, + { + "@type": "Person", + "name": "J. L. Martin-Esquivel" + }, + { + "@type": "Person", + "name": "A. Naranjo-Cigala" + }, + { + "@type": "Person", + "name": "S. Mirolo" + }, + { + "@type": "Person", + "name": "D. N. Karger" + } + ], + "datePublished": "2023", + "isPartOf": { + "@type": "Periodical", + "name": "Diversity and Distributions" + }, + "volumeNumber": "29", + "issueNumber": "10", + "pagination": "1245-1262", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.1111/ddi.13757" + }, + "sameAs": "https://doi.org/10.1111/ddi.13757" + }, + { + "@type": "Dataset", + "name": "CHELSACanaryClim-climatologies", + "author": [ + { + "@type": "Person", + "name": "D. N. Karger" + }, + { + "@type": "Person", + "name": "F. Collart" + }, + { + "@type": "Person", + "name": "A. Vanderpoorten" + }, + { + "@type": "Person", + "name": "J. L. Martin-Esquivel" + }, + { + "@type": "Person", + "name": "A. Naranjo-Cigala" + }, + { + "@type": "Person", + "name": "S. Mirolo" + }, + { + "@type": "Person", + "name": "J. Patiño" + } + ], + "publisher": { + "@type": "Organization", + "name": "EnviDat" + }, + "datePublished": "2025", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.16904/envidat.692" + }, + "sameAs": "https://doi.org/10.16904/envidat.692" + } + ], "about": [ { "@type": "Thing", @@ -75,4 +187,4 @@ "name": "Downscaled regional climate" } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld index a4ab667..8078985 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld @@ -2,10 +2,10 @@ "@context": "https://schema.org/", "@type": "Dataset", "comment": "This dataset metadata was generated by AI.", - "@id": "https://www.chelsa-climate.org/datasets#chelsacerra-daily-dataset", + "@id": "https://www.chelsa-climate.org/datasets/chelsacerra-daily#dataset", "name": "CHELSAcerra-daily (Europe) (V1.0)", "description": "CHELSAcerra-daily is a high-resolution climate dataset for air temperatures generated with the CHELSA downscaling model using the Copernicus European Regional ReAnalysis (CERRA) for Europe.", - "url": "https://www.chelsa-climate.org/datasets", + "url": "https://www.chelsa-climate.org/datasets/chelsacerra-daily", "version": "1.0", "creator": { "@type": "Organization", @@ -35,8 +35,10 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "tas", - "description": "Daily mean near-surface air temperature (downscaled from CERRA)" + "name": "Daily Mean Near-Surface Air Temperature", + "alternateName": "tas", + "unitText": "K", + "description": "Near-surface, usually 2 meter, air temperature." } ], "distribution": [ @@ -44,13 +46,94 @@ "@type": "DataDownload", "name": "CHELSAcerra-daily downloads (portal)", "description": "Download portal linked from the CHELSA catalog entry for CHELSAcerra-daily.", - "contentUrl": "https://envicloud.wsl.ch/", + "contentUrl": "https://envicloud.wsl.ch/#/?bucket=https%3A%2F%2Fos.unil.cloud.switch.ch%2Fchelsa02%2F&prefix=chelsa%2Feurope%2Fdaily%2F", "encodingFormat": [ "text/html" ] } ], - "citation": "Karger, D. N. and Janzing, J. (2025). CHELSAcerra-daily. EnviDat. https://doi.org/10.16904/envidat.703", + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "Climatologies at high resolution for the earth's land surface areas", + "author": [ + { + "@type": "Person", + "name": "D. N. Karger" + }, + { + "@type": "Person", + "name": "O. Conrad" + }, + { + "@type": "Person", + "name": "J. Böhner" + }, + { + "@type": "Person", + "name": "T. Kawohl" + }, + { + "@type": "Person", + "name": "H. Kreft" + }, + { + "@type": "Person", + "name": "R. W. Soria-Auza" + }, + { + "@type": "Person", + "name": "N. E. Zimmermann" + }, + { + "@type": "Person", + "name": "H. P. Linder" + }, + { + "@type": "Person", + "name": "M. Kessler" + } + ], + "datePublished": "2017", + "isPartOf": { + "@type": "Periodical", + "name": "Scientific Data" + }, + "volumeNumber": "4", + "pagination": "170122", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.1038/sdata.2017.122" + }, + "sameAs": "https://doi.org/10.1038/sdata.2017.122" + }, + { + "@type": "Dataset", + "name": "CHELSAcerra-daily", + "author": [ + { + "@type": "Person", + "name": "D. N. Karger" + }, + { + "@type": "Person", + "name": "J. Janzing" + } + ], + "publisher": { + "@type": "Organization", + "name": "EnviDat" + }, + "datePublished": "2025", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.16904/envidat.703" + }, + "sameAs": "https://doi.org/10.16904/envidat.703" + } + ], "about": [ { "@type": "Thing", @@ -61,4 +144,4 @@ "name": "Daily air temperature" } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld index cf623e8..c939405 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld @@ -35,23 +35,31 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "tas", - "description": "Near-surface air temperature (aggregated)" + "name": "Precipitation", + "alternateName": "pr", + "unitText": "kg m-2 day-1", + "description": "Precipitation including liquid and solid phases." }, { "@type": "PropertyValue", - "name": "tasmax", - "description": "Maximum near-surface air temperature (aggregated)" + "name": "Daily Mean Near-Surface Air Temperature", + "alternateName": "tas", + "unitText": "K", + "description": "Near-surface, usually 2 meter, air temperature." }, { "@type": "PropertyValue", - "name": "tasmin", - "description": "Minimum near-surface air temperature (aggregated)" + "name": "Daily Maximum Near-Surface Air Temperature", + "alternateName": "tasmax", + "unitText": "K", + "description": "Maximum near-surface, usually 2 meter, air temperature." }, { "@type": "PropertyValue", - "name": "pr", - "description": "Precipitation (aggregated)" + "name": "Daily Minimum Near-Surface Air Temperature", + "alternateName": "tasmin", + "unitText": "K", + "description": "Minimum near-surface, usually 2 meter, air temperature." } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", @@ -64,13 +72,94 @@ "@type": "DataDownload", "name": "CHELSAch-highres-climatologies downloads (NetCDF)", "description": "Download portal for CHELSAch-highres-climatologies.", - "contentUrl": "https://envicloud.wsl.ch/", + "contentUrl": "https://envicloud.wsl.ch/#/?bucket=https%3A%2F%2Fos.unil.cloud.switch.ch%2Fchelsa02%2F&prefix=chelsa%2Fch%2Fclimatologies%2F", "encodingFormat": [ "application/x-netcdf" ] } ], - "citation": "Karger, D. N.; Conrad, O.; Böhner, J.; Kawohl, T.; Kreft, H.; Soria-Auza, R. W.; Zimmermann, N. E.; Linder, H. P.; Kessler, M. (2017). Climatologies at high resolution for the earth's land surface areas. Scientific Data, 4, 170122. https://doi.org/10.1038/sdata.2017.122", + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "Climatologies at high resolution for the earth's land surface areas", + "author": [ + { + "@type": "Person", + "name": "D. N. Karger" + }, + { + "@type": "Person", + "name": "O. Conrad" + }, + { + "@type": "Person", + "name": "J. Böhner" + }, + { + "@type": "Person", + "name": "T. Kawohl" + }, + { + "@type": "Person", + "name": "H. Kreft" + }, + { + "@type": "Person", + "name": "R. W. Soria-Auza" + }, + { + "@type": "Person", + "name": "N. E. Zimmermann" + }, + { + "@type": "Person", + "name": "H. P. Linder" + }, + { + "@type": "Person", + "name": "M. Kessler" + } + ], + "datePublished": "2017", + "isPartOf": { + "@type": "Periodical", + "name": "Scientific Data" + }, + "volumeNumber": "4", + "pagination": "170122", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.1038/sdata.2017.122" + }, + "sameAs": "https://doi.org/10.1038/sdata.2017.122" + }, + { + "@type": "Dataset", + "name": "CHELSAch-highres-climatologies at high resolution", + "author": [ + { + "@type": "Person", + "name": "F. Zilker" + }, + { + "@type": "Person", + "name": "D. N. Karger" + } + ], + "publisher": { + "@type": "Organization", + "name": "EnviDat" + }, + "datePublished": "2025", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.16904/envidat.689" + }, + "sameAs": "https://doi.org/10.16904/envidat.689" + } + ], "about": [ { "@type": "Thing", @@ -81,4 +170,4 @@ "name": "Switzerland climate normals" } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld index c07d6db..7c3378a 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld @@ -35,23 +35,31 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "tas", - "description": "Daily mean near-surface air temperature" + "name": "Precipitation", + "alternateName": "pr", + "unitText": "kg m-2 day-1", + "description": "Precipitation including liquid and solid phases." }, { "@type": "PropertyValue", - "name": "tasmax", - "description": "Daily maximum near-surface air temperature" + "name": "Daily Mean Near-Surface Air Temperature", + "alternateName": "tas", + "unitText": "K", + "description": "Near-surface, usually 2 meter, air temperature." }, { "@type": "PropertyValue", - "name": "tasmin", - "description": "Daily minimum near-surface air temperature" + "name": "Daily Maximum Near-Surface Air Temperature", + "alternateName": "tasmax", + "unitText": "K", + "description": "Maximum near-surface, usually 2 meter, air temperature." }, { "@type": "PropertyValue", - "name": "pr", - "description": "Precipitation" + "name": "Daily Minimum Near-Surface Air Temperature", + "alternateName": "tasmin", + "unitText": "K", + "description": "Minimum near-surface, usually 2 meter, air temperature." } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", @@ -64,13 +72,94 @@ "@type": "DataDownload", "name": "CHELSAch-highres-daily downloads (NetCDF)", "description": "Download portal for CHELSAch-highres-daily.", - "contentUrl": "https://envicloud.wsl.ch/", + "contentUrl": "https://envicloud.wsl.ch/#/?bucket=https%3A%2F%2Fos.unil.cloud.switch.ch%2Fchelsa02%2F&prefix=chelsa%2Fch%2Fdaily%2F", "encodingFormat": [ "application/x-netcdf" ] } ], - "citation": "Zilker, F., Karger, D. N. (2025). CHELSAch-highres-daily climate data at high resolution. EnviDat. https://www.doi.org/10.16904/envidat.688", + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "Climatologies at high resolution for the earth's land surface areas", + "author": [ + { + "@type": "Person", + "name": "D. N. Karger" + }, + { + "@type": "Person", + "name": "O. Conrad" + }, + { + "@type": "Person", + "name": "J. Böhner" + }, + { + "@type": "Person", + "name": "T. Kawohl" + }, + { + "@type": "Person", + "name": "H. Kreft" + }, + { + "@type": "Person", + "name": "R. W. Soria-Auza" + }, + { + "@type": "Person", + "name": "N. E. Zimmermann" + }, + { + "@type": "Person", + "name": "H. P. Linder" + }, + { + "@type": "Person", + "name": "M. Kessler" + } + ], + "datePublished": "2017", + "isPartOf": { + "@type": "Periodical", + "name": "Scientific Data" + }, + "volumeNumber": "4", + "pagination": "170122", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.1038/sdata.2017.122" + }, + "sameAs": "https://doi.org/10.1038/sdata.2017.122" + }, + { + "@type": "Dataset", + "name": "CHELSAch-highres-daily climate data at high resolution", + "author": [ + { + "@type": "Person", + "name": "F. Zilker" + }, + { + "@type": "Person", + "name": "D. N. Karger" + } + ], + "publisher": { + "@type": "Organization", + "name": "EnviDat" + }, + "datePublished": "2025", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.16904/envidat.688" + }, + "sameAs": "https://doi.org/10.16904/envidat.688" + } + ], "about": [ { "@type": "Thing", @@ -81,4 +170,4 @@ "name": "Mountain climatology" } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld index 3671eb8..73d2a34 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld @@ -39,38 +39,80 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "tas", - "description": "Near-surface air temperature (climatological means)" + "name": "Total Cloud Cover Percentage", + "alternateName": "clt", + "unitText": "percent", + "description": "Total cloud area fraction, reported as a percentage, for the whole atmospheric column." }, { "@type": "PropertyValue", - "name": "tasmax", - "description": "Maximum near-surface air temperature (climatological means)" + "name": "Monthly Climate Moisture Index", + "alternateName": "cmi", + "unitText": "kg m-2 month-1", + "description": "Monthly ratio of precipitation to potential evapotranspiration; indicator of climatic water availability." }, { "@type": "PropertyValue", - "name": "tasmin", - "description": "Minimum near-surface air temperature (climatological means)" + "name": "Near-Surface Relative Humidity", + "alternateName": "hurs", + "unitText": "percent", + "description": "Relative humidity near the surface." }, { "@type": "PropertyValue", - "name": "pr", - "description": "Precipitation (climatological means/accumulations)" + "name": "Monthly Potential Evapotranspiration", + "alternateName": "pet", + "unitText": "kg m-2 month-1", + "description": "Total potential evapotranspiration for the month assuming unlimited water availability, calculated using Penman-Monteith." }, { "@type": "PropertyValue", - "name": "hurs", - "description": "Near-surface relative humidity" + "name": "Precipitation", + "alternateName": "pr", + "unitText": "kg m-2 day-1", + "description": "Precipitation including liquid and solid phases." }, { "@type": "PropertyValue", - "name": "clt", - "description": "Total cloud cover percentage" + "name": "Surface Downwelling Shortwave Flux in Air", + "alternateName": "rsds", + "unitText": "W m-2", + "description": "Surface solar irradiance for UV calculations." }, { "@type": "PropertyValue", - "name": "sfcWind", - "description": "Near-surface wind speed" + "name": "Near-Surface Wind Speed", + "alternateName": "sfcWind", + "unitText": "m s-1", + "description": "Near-surface, usually 10 meter, wind speed." + }, + { + "@type": "PropertyValue", + "name": "Daily Mean Near-Surface Air Temperature", + "alternateName": "tas", + "unitText": "K", + "description": "Near-surface, usually 2 meter, air temperature." + }, + { + "@type": "PropertyValue", + "name": "Daily Maximum Near-Surface Air Temperature", + "alternateName": "tasmax", + "unitText": "K", + "description": "Maximum near-surface, usually 2 meter, air temperature." + }, + { + "@type": "PropertyValue", + "name": "Daily Minimum Near-Surface Air Temperature", + "alternateName": "tasmin", + "unitText": "K", + "description": "Minimum near-surface, usually 2 meter, air temperature." + }, + { + "@type": "PropertyValue", + "name": "Vapor Pressure Deficit", + "alternateName": "vpd", + "unitText": "Pa", + "description": "Difference between saturation vapor pressure and actual vapor pressure." } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", @@ -85,7 +127,7 @@ "@type": "DataDownload", "name": "CHELSA-climatologies downloads (COG)", "description": "Download portal for CHELSA climatologies.", - "contentUrl": "https://envicloud.wsl.ch/", + "contentUrl": "https://envicloud.wsl.ch/#/?bucket=https%3A%2F%2Fos.unil.cloud.switch.ch%2Fchelsa02%2F&prefix=chelsa%2Fglobal%2Fclimatologies%2F", "encodingFormat": [ "image/tiff", "application=geotiff", @@ -93,7 +135,116 @@ ] } ], - "citation": "Karger, D. N.; Conrad, O.; Böhner, J.; Kawohl, T.; Kreft, H.; Soria-Auza, R. W.; Zimmermann, N. E.; Linder, H. P.; Kessler, M. (2017). Climatologies at high resolution for the earth's land surface areas. Scientific Data, 4, 170122. https://doi.org/10.1038/sdata.2017.122", + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "Climatologies at high resolution for the earth's land surface areas", + "author": [ + { + "@type": "Person", + "name": "D. N. Karger" + }, + { + "@type": "Person", + "name": "O. Conrad" + }, + { + "@type": "Person", + "name": "J. Böhner" + }, + { + "@type": "Person", + "name": "T. Kawohl" + }, + { + "@type": "Person", + "name": "H. Kreft" + }, + { + "@type": "Person", + "name": "R. W. Soria-Auza" + }, + { + "@type": "Person", + "name": "N. E. Zimmermann" + }, + { + "@type": "Person", + "name": "H. P. Linder" + }, + { + "@type": "Person", + "name": "M. Kessler" + } + ], + "datePublished": "2017", + "isPartOf": { + "@type": "Periodical", + "name": "Scientific Data" + }, + "volumeNumber": "4", + "pagination": "170122", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.1038/sdata.2017.122" + }, + "sameAs": "https://doi.org/10.1038/sdata.2017.122" + }, + { + "@type": "Dataset", + "name": "Climatologies at high resolution for the earth’s land surface areas", + "author": [ + { + "@type": "Person", + "name": "D. N. Karger" + }, + { + "@type": "Person", + "name": "O. Conrad" + }, + { + "@type": "Person", + "name": "J. Böhner" + }, + { + "@type": "Person", + "name": "T. Kawohl" + }, + { + "@type": "Person", + "name": "H. Kreft" + }, + { + "@type": "Person", + "name": "R. W. Soria-Auza" + }, + { + "@type": "Person", + "name": "N. E. Zimmermann" + }, + { + "@type": "Person", + "name": "H. P. Linder" + }, + { + "@type": "Person", + "name": "M. Kessler" + } + ], + "publisher": { + "@type": "Organization", + "name": "EnviDat" + }, + "datePublished": "2021", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.16904/envidat.228" + }, + "sameAs": "https://doi.org/10.16904/envidat.228" + } + ], "about": [ { "@type": "Thing", @@ -104,4 +255,4 @@ "name": "Baseline climate" } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld index c24e080..fe62159 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld @@ -41,58 +41,80 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "tas", - "description": "Daily mean near-surface air temperature" + "name": "Total Cloud Cover Percentage", + "alternateName": "clt", + "unitText": "percent", + "description": "Total cloud area fraction, reported as a percentage, for the whole atmospheric column." }, { "@type": "PropertyValue", - "name": "tasmax", - "description": "Daily maximum near-surface air temperature" + "name": "Near-Surface Relative Humidity", + "alternateName": "hurs", + "unitText": "percent", + "description": "Relative humidity near the surface." }, { "@type": "PropertyValue", - "name": "tasmin", - "description": "Daily minimum near-surface air temperature" + "name": "Precipitation", + "alternateName": "pr", + "unitText": "kg m-2 day-1", + "description": "Precipitation including liquid and solid phases." }, { "@type": "PropertyValue", - "name": "pr", - "description": "Precipitation" + "name": "Precipitation", + "alternateName": "prec", + "unitText": "kg m-2 day-1", + "description": "Downscaled forecast precipitation from ERA5; not bias corrected and should not be mixed with pr." }, { "@type": "PropertyValue", - "name": "hurs", - "description": "Near-surface relative humidity" + "name": "Surface Air Pressure", + "alternateName": "ps", + "unitText": "hPa", + "description": "Surface pressure, not mean sea-level pressure." }, { "@type": "PropertyValue", - "name": "sfcWind", - "description": "Near-surface wind speed" + "name": "Surface Downwelling Shortwave Flux in Air", + "alternateName": "rsds", + "unitText": "W m-2", + "description": "Surface solar irradiance for UV calculations." }, { "@type": "PropertyValue", - "name": "rsds", - "description": "Surface downwelling shortwave flux in air (solar irradiance)" + "name": "Near-Surface Wind Speed", + "alternateName": "sfcWind", + "unitText": "m s-1", + "description": "Near-surface, usually 10 meter, wind speed." }, { "@type": "PropertyValue", - "name": "clt", - "description": "Total cloud cover percentage" + "name": "Daily Mean Near-Surface Air Temperature", + "alternateName": "tas", + "unitText": "K", + "description": "Near-surface, usually 2 meter, air temperature." }, { "@type": "PropertyValue", - "name": "vpd", - "description": "Vapor pressure deficit" + "name": "Daily Maximum Near-Surface Air Temperature", + "alternateName": "tasmax", + "unitText": "K", + "description": "Maximum near-surface, usually 2 meter, air temperature." }, { "@type": "PropertyValue", - "name": "pet", - "description": "Potential evapotranspiration" + "name": "Daily Minimum Near-Surface Air Temperature", + "alternateName": "tasmin", + "unitText": "K", + "description": "Minimum near-surface, usually 2 meter, air temperature." }, { "@type": "PropertyValue", - "name": "cmi", - "description": "Climate moisture index" + "name": "Air Temperature Lapse Rate", + "alternateName": "tz", + "unitText": "K m-1", + "description": "Rate of change in air temperature with altitude calculated over the centennial period." } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", @@ -107,7 +129,7 @@ "@type": "DataDownload", "name": "CHELSA-daily downloads (COG)", "description": "Download portal for CHELSA-daily. Files are provided via the CHELSA download service; see dataset page for details and variable-specific subdirectories.", - "contentUrl": "https://envicloud.wsl.ch/", + "contentUrl": "https://envicloud.wsl.ch/#/?bucket=https%3A%2F%2Fos.unil.cloud.switch.ch%2Fchelsa02%2F&prefix=chelsa%2Fglobal%2Fdaily%2F", "encodingFormat": [ "image/tiff", "application=geotiff", @@ -124,7 +146,84 @@ ] } ], - "citation": "Karger, D. N.; Conrad, O.; Böhner, J.; Kawohl, T.; Kreft, H.; Soria-Auza, R. W.; Zimmermann, N. E.; Linder, H. P.; Kessler, M. (2017). Climatologies at high resolution for the earth's land surface areas. Scientific Data, 4, 170122. https://doi.org/10.1038/sdata.2017.122", + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "Climatologies at high resolution for the earth's land surface areas", + "author": [ + { + "@type": "Person", + "name": "D. N. Karger" + }, + { + "@type": "Person", + "name": "O. Conrad" + }, + { + "@type": "Person", + "name": "J. Böhner" + }, + { + "@type": "Person", + "name": "T. Kawohl" + }, + { + "@type": "Person", + "name": "H. Kreft" + }, + { + "@type": "Person", + "name": "R. W. Soria-Auza" + }, + { + "@type": "Person", + "name": "N. E. Zimmermann" + }, + { + "@type": "Person", + "name": "H. P. Linder" + }, + { + "@type": "Person", + "name": "M. Kessler" + } + ], + "datePublished": "2017", + "isPartOf": { + "@type": "Periodical", + "name": "Scientific Data" + }, + "volumeNumber": "4", + "pagination": "170122", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.1038/sdata.2017.122" + }, + "sameAs": "https://doi.org/10.1038/sdata.2017.122" + }, + { + "@type": "Dataset", + "name": "CHELSA-daily climate data at high resolution", + "author": [ + { + "@type": "Person", + "name": "D. N. Karger" + } + ], + "publisher": { + "@type": "Organization", + "name": "EnviDat" + }, + "datePublished": "2025", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.16904/envidat.687" + }, + "sameAs": "https://doi.org/10.16904/envidat.687" + } + ], "about": [ { "@type": "Thing", @@ -139,4 +238,4 @@ "name": "High-resolution gridded data" } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld index ecad538..1851d39 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld @@ -39,18 +39,38 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "spei12", - "description": "Standardized precipitation evapotranspiration index (12-month)" + "name": "Multiyear Meteorological Drought", + "alternateName": "mymd", + "unitText": "id", + "description": "Identifier for multiyear meteorological drought events." }, { "@type": "PropertyValue", - "name": "spi", - "description": "Standardized precipitation index (multiple timescales)" + "name": "Multiyear Meteorological Drought at 10 km Resolution", + "alternateName": "mymd10", + "unitText": "id", + "description": "Identifier for multiyear meteorological drought events at 10 km resolution." }, { "@type": "PropertyValue", - "name": "mymd", - "description": "Identifier for multiyear meteorological drought events (and related fields)" + "name": "Kernel Normalized Difference Vegetation Index Anomaly", + "alternateName": "qkndvi", + "unitText": "1", + "description": "Annual anomaly in vegetation greenness derived from kernel normalized difference vegetation index." + }, + { + "@type": "PropertyValue", + "name": "Standardized Precipitation Evapotranspiration Index", + "alternateName": "spei12", + "unitText": "1", + "description": "Standardized climatic water balance index over a 12-month integration period." + }, + { + "@type": "PropertyValue", + "name": "Standardized Precipitation Index", + "alternateName": "spi12", + "unitText": "1", + "description": "Standardized precipitation anomaly index over a 12-month integration period." } ], "license": "https://creativecommons.org/licenses/by/4.0/", @@ -65,7 +85,7 @@ "@type": "DataDownload", "name": "CHELSA-drought-indices downloads (COG)", "description": "Download portal for CHELSA drought indices.", - "contentUrl": "https://envicloud.wsl.ch/", + "contentUrl": "https://envicloud.wsl.ch/#/?bucket=https%3A%2F%2Fos.unil.cloud.switch.ch%2Fchelsa02%2F&prefix=chelsa%2Fglobal%2Fannual%2F", "encodingFormat": [ "image/tiff", "application=geotiff", @@ -73,7 +93,114 @@ ] } ], - "citation": "Chen, L., Brun, P., Buri, P., Fatichi, S., Gessler, A., McCarthy, M. J., Pelicciotti, F., Stocker, B., Karger, D. N. (2024). High resolution global standardized drought indices. EnviDat. https://doi.org/10.16904/envidat.530", + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "Global increase in the occurrence and impact of megadroughts", + "author": [ + { + "@type": "Person", + "name": "L. Chen" + }, + { + "@type": "Person", + "name": "P. Brun" + }, + { + "@type": "Person", + "name": "P. Buri" + }, + { + "@type": "Person", + "name": "S. Fatichi" + }, + { + "@type": "Person", + "name": "A. Gessler" + }, + { + "@type": "Person", + "name": "M. J. McCarthy" + }, + { + "@type": "Person", + "name": "F. Pelicciotti" + }, + { + "@type": "Person", + "name": "B. Stocker" + }, + { + "@type": "Person", + "name": "D. N. Karger" + } + ], + "datePublished": "2025", + "isPartOf": { + "@type": "Periodical", + "name": "Science" + }, + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.1126/science.ado4245" + }, + "sameAs": "https://doi.org/10.1126/science.ado4245" + }, + { + "@type": "Dataset", + "name": "High resolution global standardized drought indices", + "author": [ + { + "@type": "Person", + "name": "L. Chen" + }, + { + "@type": "Person", + "name": "P. Brun" + }, + { + "@type": "Person", + "name": "P. Buri" + }, + { + "@type": "Person", + "name": "S. Fatichi" + }, + { + "@type": "Person", + "name": "A. Gessler" + }, + { + "@type": "Person", + "name": "M. J. McCarthy" + }, + { + "@type": "Person", + "name": "F. Pelicciotti" + }, + { + "@type": "Person", + "name": "B. Stocker" + }, + { + "@type": "Person", + "name": "D. N. Karger" + } + ], + "publisher": { + "@type": "Organization", + "name": "EnviDat" + }, + "datePublished": "2024", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.16904/envidat.530" + }, + "sameAs": "https://doi.org/10.16904/envidat.530" + } + ], "about": [ { "@type": "Thing", @@ -84,4 +211,4 @@ "name": "Climate risk" } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld index fb9d384..8b3672c 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld @@ -39,23 +39,94 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "tas", - "description": "Near-surface air temperature (monthly aggregated)" + "name": "Total Cloud Cover Percentage", + "alternateName": "clt", + "unitText": "percent", + "description": "Total cloud area fraction, reported as a percentage, for the whole atmospheric column." }, { "@type": "PropertyValue", - "name": "tasmax", - "description": "Maximum near-surface air temperature (monthly aggregated)" + "name": "Monthly Climate Moisture Index", + "alternateName": "cmi", + "unitText": "kg m-2 month-1", + "description": "Monthly ratio of precipitation to potential evapotranspiration; indicator of climatic water availability." }, { "@type": "PropertyValue", - "name": "tasmin", - "description": "Minimum near-surface air temperature (monthly aggregated)" + "name": "Near-Surface Relative Humidity", + "alternateName": "hurs", + "unitText": "percent", + "description": "Relative humidity near the surface." }, { "@type": "PropertyValue", - "name": "pr", - "description": "Precipitation (monthly accumulated/aggregated)" + "name": "Monthly Potential Evapotranspiration", + "alternateName": "pet", + "unitText": "kg m-2 month-1", + "description": "Total potential evapotranspiration for the month assuming unlimited water availability, calculated using Penman-Monteith." + }, + { + "@type": "PropertyValue", + "name": "Precipitation", + "alternateName": "pr", + "unitText": "kg m-2 month-1", + "description": "Precipitation including liquid and solid phases." + }, + { + "@type": "PropertyValue", + "name": "Surface Downwelling Shortwave Flux in Air", + "alternateName": "rsds", + "unitText": "W m-2", + "description": "Surface solar irradiance for UV calculations." + }, + { + "@type": "PropertyValue", + "name": "Near-Surface Wind Speed", + "alternateName": "sfcWind", + "unitText": "m s-1", + "description": "Near-surface, usually 10 meter, wind speed." + }, + { + "@type": "PropertyValue", + "name": "Standardized Precipitation Evapotranspiration Index", + "alternateName": "spei12", + "unitText": "1", + "description": "Standardized climatic water balance index over a 12-month integration period." + }, + { + "@type": "PropertyValue", + "name": "Standardized Precipitation Index", + "alternateName": "spi12", + "unitText": "1", + "description": "Standardized precipitation anomaly index over a 12-month integration period." + }, + { + "@type": "PropertyValue", + "name": "Daily Mean Near-Surface Air Temperature", + "alternateName": "tas", + "unitText": "K", + "description": "Near-surface, usually 2 meter, air temperature." + }, + { + "@type": "PropertyValue", + "name": "Daily Maximum Near-Surface Air Temperature", + "alternateName": "tasmax", + "unitText": "K", + "description": "Maximum near-surface, usually 2 meter, air temperature." + }, + { + "@type": "PropertyValue", + "name": "Daily Minimum Near-Surface Air Temperature", + "alternateName": "tasmin", + "unitText": "K", + "description": "Minimum near-surface, usually 2 meter, air temperature." + }, + { + "@type": "PropertyValue", + "name": "Vapor Pressure Deficit", + "alternateName": "vpd", + "unitText": "Pa", + "description": "Difference between saturation vapor pressure and actual vapor pressure." } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", @@ -70,7 +141,7 @@ "@type": "DataDownload", "name": "CHELSA-monthly downloads (COG)", "description": "Download portal for CHELSA-monthly.", - "contentUrl": "https://envicloud.wsl.ch/", + "contentUrl": "https://envicloud.wsl.ch/#/?bucket=https%3A%2F%2Fos.unil.cloud.switch.ch%2Fchelsa02%2F&prefix=chelsa%2Fglobal%2Fmonthly%2F", "encodingFormat": [ "image/tiff", "application=geotiff", @@ -87,7 +158,92 @@ ] } ], - "citation": "Karger, D. N.; Conrad, O.; Böhner, J.; Kawohl, T.; Kreft, H.; Soria-Auza, R. W.; Zimmermann, N. E.; Linder, H. P.; Kessler, M. (2017). Climatologies at high resolution for the earth's land surface areas. Scientific Data, 4, 170122. https://doi.org/10.1038/sdata.2017.122", + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "Climatologies at high resolution for the earth's land surface areas", + "author": [ + { + "@type": "Person", + "name": "D. N. Karger" + }, + { + "@type": "Person", + "name": "O. Conrad" + }, + { + "@type": "Person", + "name": "J. Böhner" + }, + { + "@type": "Person", + "name": "T. Kawohl" + }, + { + "@type": "Person", + "name": "H. Kreft" + }, + { + "@type": "Person", + "name": "R. W. Soria-Auza" + }, + { + "@type": "Person", + "name": "N. E. Zimmermann" + }, + { + "@type": "Person", + "name": "H. P. Linder" + }, + { + "@type": "Person", + "name": "M. Kessler" + } + ], + "datePublished": "2017", + "isPartOf": { + "@type": "Periodical", + "name": "Scientific Data" + }, + "volumeNumber": "4", + "pagination": "170122", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.1038/sdata.2017.122" + }, + "sameAs": "https://doi.org/10.1038/sdata.2017.122" + }, + { + "@type": "Dataset", + "name": "CHELSA-monthly climate data at high resolution", + "author": [ + { + "@type": "Person", + "name": "D. N. Karger" + }, + { + "@type": "Person", + "name": "P. Brun" + }, + { + "@type": "Person", + "name": "F. Zilker" + } + ], + "publisher": { + "@type": "Organization", + "name": "EnviDat" + }, + "datePublished": "2025", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.16904/envidat.686" + }, + "sameAs": "https://doi.org/10.16904/envidat.686" + } + ], "about": [ { "@type": "Thing", @@ -98,4 +254,4 @@ "name": "Monthly aggregates" } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld index 076d7d7..21f25c5 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld @@ -36,23 +36,31 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "pr", - "description": "Precipitation" + "name": "Precipitation", + "alternateName": "pr", + "unitText": "kg m-2 day-1", + "description": "Precipitation including liquid and solid phases." }, { "@type": "PropertyValue", - "name": "tasmax", - "description": "Near-surface air temperature (maximum)" + "name": "Daily Maximum Near-Surface Air Temperature", + "alternateName": "tasmax", + "unitText": "K", + "description": "Maximum near-surface, usually 2 meter, air temperature." }, { "@type": "PropertyValue", - "name": "tasmin", - "description": "Near-surface air temperature (minimum)" + "name": "Daily Minimum Near-Surface Air Temperature", + "alternateName": "tasmin", + "unitText": "K", + "description": "Minimum near-surface, usually 2 meter, air temperature." }, { "@type": "PropertyValue", - "name": "tz", - "description": "Air temperature lapse rate" + "name": "Air Temperature Lapse Rate", + "alternateName": "tz", + "unitText": "K m-1", + "description": "Rate of change in air temperature with altitude calculated over the centennial period." } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", @@ -67,7 +75,7 @@ "@type": "DataDownload", "name": "CHELSA-TraCE21k-centennial downloads (COG)", "description": "Download portal for TraCE21k centennial climatologies.", - "contentUrl": "https://envicloud.wsl.ch/", + "contentUrl": "https://envicloud.wsl.ch/#/?bucket=https%3A%2F%2Fos.zhdk.cloud.switch.ch%2Fchelsa01%2F&prefix=chelsa_trace21k%2Fglobal%2Fcentennial%2F", "encodingFormat": [ "image/tiff", "application=geotiff", @@ -75,7 +83,84 @@ ] } ], - "citation": "Karger, D. N., Nobis, M. P., Normand, S., Graham, C. H., & Zimmermann, N. E. (2020). CHELSA-TraCE21k: Downscaled transient temperature and precipitation data since the last glacial maximum. EnviDat. https://doi.org/10.16904/envidat.211", + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "CHELSA-TraCE21k: high-resolution (1 km) downscaled transient temperature and precipitation data since the Last Glacial Maximum", + "author": [ + { + "@type": "Person", + "name": "D. N. Karger" + }, + { + "@type": "Person", + "name": "M. P. Nobis" + }, + { + "@type": "Person", + "name": "S. Normand" + }, + { + "@type": "Person", + "name": "C. H. Graham" + }, + { + "@type": "Person", + "name": "N. E. Zimmermann" + } + ], + "datePublished": "2023", + "isPartOf": { + "@type": "Periodical", + "name": "Climate of the Past" + }, + "volumeNumber": "19", + "pagination": "439-456", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.5194/cp-19-439-2023" + }, + "sameAs": "https://doi.org/10.5194/cp-19-439-2023" + }, + { + "@type": "Dataset", + "name": "CHELSA-TraCE21k: Downscaled transient temperature and precipitation data since the last glacial maximum", + "author": [ + { + "@type": "Person", + "name": "D. N. Karger" + }, + { + "@type": "Person", + "name": "M. P. Nobis" + }, + { + "@type": "Person", + "name": "S. Normand" + }, + { + "@type": "Person", + "name": "C. H. Graham" + }, + { + "@type": "Person", + "name": "N. E. Zimmermann" + } + ], + "publisher": { + "@type": "Organization", + "name": "EnviDat" + }, + "datePublished": "2020", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.16904/envidat.211" + }, + "sameAs": "https://doi.org/10.16904/envidat.211" + } + ], "about": [ { "@type": "Thing", @@ -86,4 +171,4 @@ "name": "Glacial-interglacial climate variability" } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld index b5febbf..d162967 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld @@ -35,13 +35,157 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "BIO1–BIO19", - "description": "Bioclimatic variables derived from temperature and precipitation" + "name": "Mean Annual Near-Surface Air Temperature", + "alternateName": "bio01", + "unitText": "K", + "description": "Mean annual temperature calculated as the average of mean monthly temperatures over the year" }, { "@type": "PropertyValue", - "name": "topographic predictors", - "description": "Topographic variables included with bioclim products (where provided)" + "name": "Mean Diurnal Near-Surface Air Temperature Range", + "alternateName": "bio02", + "unitText": "K", + "description": "Mean diurnal temperature range computed as the average of monthly daily maximum minus daily minimum near-surface air temperature" + }, + { + "@type": "PropertyValue", + "name": "Isothermality", + "alternateName": "bio03", + "unitText": "K", + "description": "Isothermality: 100 × bio02 ÷ bio07; compares day-night variability to annual temperature range" + }, + { + "@type": "PropertyValue", + "name": "Temperature Seasonality", + "alternateName": "bio04", + "unitText": "K", + "description": "Temperature seasonality given by the standard deviation of mean monthly temperatures" + }, + { + "@type": "PropertyValue", + "name": "Mean Daily Maximum Near-Surface Air Temperature of the Warmest Month", + "alternateName": "bio05", + "unitText": "K", + "description": "Highest monthly mean of daily maximum temperatures across the year; indicates peak thermal conditions" + }, + { + "@type": "PropertyValue", + "name": "Mean Daily Minimum Near-Surface Air Temperature of the Coldest Month", + "alternateName": "bio06", + "unitText": "K", + "description": "Lowest monthly mean of daily minimum temperatures across the year; characterizes winter cold intensity" + }, + { + "@type": "PropertyValue", + "name": "Annual Daily Mean Near-Surface Air Temperature Range", + "alternateName": "bio07", + "unitText": "K", + "description": "Annual temperature range calculated as bio05 minus bio06; measures amplitude between warmest and coldest months" + }, + { + "@type": "PropertyValue", + "name": "Mean Daily Near-Surface Air Temperature of the Wettest Quarter", + "alternateName": "bio08", + "unitText": "K", + "description": "Average monthly mean temperature over the wettest three-month period of the year" + }, + { + "@type": "PropertyValue", + "name": "Mean Daily Near-Surface Air Temperature of the Driest Quarter", + "alternateName": "bio09", + "unitText": "K", + "description": "Average monthly mean temperature over the driest three-month period of the year" + }, + { + "@type": "PropertyValue", + "name": "Mean Daily Mean Near-Surface Air Temperature of the Warmest Quarter", + "alternateName": "bio10", + "unitText": "K", + "description": "Average monthly mean temperature over the warmest three-month period of the year" + }, + { + "@type": "PropertyValue", + "name": "Mean Daily Mean Near-Surface Air Temperature of the Coldest Quarter", + "alternateName": "bio11", + "unitText": "K", + "description": "Average monthly mean temperature over the coldest three-month period of the year" + }, + { + "@type": "PropertyValue", + "name": "Annual Precipitation", + "alternateName": "bio12", + "unitText": "kg m-2 year-1", + "description": "Sum of monthly precipitation totals across the year" + }, + { + "@type": "PropertyValue", + "name": "Precipitation of the Wettest Month", + "alternateName": "bio13", + "unitText": "kg m-2 month-1", + "description": "Maximum monthly precipitation total" + }, + { + "@type": "PropertyValue", + "name": "Precipitation of the Driest Month", + "alternateName": "bio14", + "unitText": "kg m-2 month-1", + "description": "Minimum monthly precipitation total" + }, + { + "@type": "PropertyValue", + "name": "Precipitation Seasonality", + "alternateName": "bio15", + "unitText": "kg m-2", + "description": "Coefficient of variation: 100 × standard deviation ÷ mean of monthly precipitation totals" + }, + { + "@type": "PropertyValue", + "name": "Mean Monthly Precipitation of the Wettest Quarter", + "alternateName": "bio16", + "unitText": "kg m-2 month-1", + "description": "Average monthly precipitation during the wettest three-month period of the year" + }, + { + "@type": "PropertyValue", + "name": "Mean Monthly Precipitation of the Driest Quarter", + "alternateName": "bio17", + "unitText": "kg m-2 month-1", + "description": "Average monthly precipitation during the driest three-month period of the year" + }, + { + "@type": "PropertyValue", + "name": "Mean Monthly Precipitation of the Warmest Quarter", + "alternateName": "bio18", + "unitText": "kg m-2 month-1", + "description": "Average monthly precipitation during the warmest three-month period of the year" + }, + { + "@type": "PropertyValue", + "name": "Mean Monthly Precipitation of the Coldest Quarter", + "alternateName": "bio19", + "unitText": "kg m-2 month-1", + "description": "Average monthly precipitation during the coldest three-month period of the year" + }, + { + "@type": "PropertyValue", + "name": "Ice Sheet Surface Altitude", + "alternateName": "glz", + "unitText": "m", + "description": "Elevation of the ice sheet surface above sea level." + }, + { + "@type": "PropertyValue", + "name": "Surface Altitude", + "alternateName": "orog", + "unitText": "m", + "description": "Geometric height of the land surface above the geoid." + }, + { + "@type": "PropertyValue", + "name": "Snow Cover Days", + "alternateName": "scd", + "unitText": "days", + "description": "Number of days per year with snow cover present at the surface." } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", @@ -56,7 +200,7 @@ "@type": "DataDownload", "name": "CHELSA-TraCE21k-centennial-bioclim downloads (COG)", "description": "Download portal for TraCE21k centennial bioclim variables.", - "contentUrl": "https://envicloud.wsl.ch/", + "contentUrl": "https://envicloud.wsl.ch/#/?bucket=https%3A%2F%2Fos.zhdk.cloud.switch.ch%2Fchelsa01%2F&prefix=chelsa_trace21k%2Fglobal%2Fbioclim%2F", "encodingFormat": [ "image/tiff", "application=geotiff", @@ -64,7 +208,68 @@ ] } ], - "citation": "Karger, D. N., Nobis, M. P., Normand, S., Graham, C. H., & Zimmermann, N. E. (2020). CHELSA-TraCE21k: Downscaled transient temperature and precipitation data since the last glacial maximum. EnviDat. https://doi.org/10.16904/envidat.211", + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "CHELSA-TraCE21k: high-resolution (1 km) downscaled transient temperature and precipitation data since the Last Glacial Maximum", + "author": [ + { + "@type": "Person", + "name": "D. N. Karger" + }, + { + "@type": "Person", + "name": "M. P. Nobis" + }, + { + "@type": "Person", + "name": "S. Normand" + }, + { + "@type": "Person", + "name": "C. H. Graham" + }, + { + "@type": "Person", + "name": "N. E. Zimmermann" + } + ], + "datePublished": "2023", + "isPartOf": { + "@type": "Periodical", + "name": "Climate of the Past" + }, + "volumeNumber": "19", + "pagination": "439-456", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.5194/cp-19-439-2023" + }, + "sameAs": "https://doi.org/10.5194/cp-19-439-2023" + }, + { + "@type": "Dataset", + "name": "CHELSA-TraCE21k-centennial-bioclim and topographic data since the Last Glacial Maximum", + "author": [ + { + "@type": "Person", + "name": "D. N. Karger" + } + ], + "publisher": { + "@type": "Organization", + "name": "EnviDat" + }, + "datePublished": "2025", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.16904/envidat.691" + }, + "sameAs": "https://doi.org/10.16904/envidat.691" + } + ], "about": [ { "@type": "Thing", @@ -75,4 +280,4 @@ "name": "Species distribution modeling (paleo)" } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld index c214eb7..e21ae08 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld @@ -36,13 +36,38 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "tas/tasmax/tasmin", - "description": "Near-surface air temperature summaries" + "name": "Precipitation", + "alternateName": "pr", + "unitText": "kg m-2 day-1", + "description": "Precipitation including liquid and solid phases." }, { "@type": "PropertyValue", - "name": "pr", - "description": "Precipitation" + "name": "Surface Downwelling Shortwave Flux in Air", + "alternateName": "rsds", + "unitText": "W m-2", + "description": "Surface solar irradiance for UV calculations." + }, + { + "@type": "PropertyValue", + "name": "Daily Mean Near-Surface Air Temperature", + "alternateName": "tas", + "unitText": "K", + "description": "Near-surface, usually 2 meter, air temperature." + }, + { + "@type": "PropertyValue", + "name": "Daily Maximum Near-Surface Air Temperature", + "alternateName": "tasmax", + "unitText": "K", + "description": "Maximum near-surface, usually 2 meter, air temperature." + }, + { + "@type": "PropertyValue", + "name": "Daily Minimum Near-Surface Air Temperature", + "alternateName": "tasmin", + "unitText": "K", + "description": "Minimum near-surface, usually 2 meter, air temperature." } ], "distribution": [ @@ -50,7 +75,7 @@ "@type": "DataDownload", "name": "CHELSA-W5E5-daily downloads (ISIMIP portal)", "description": "The catalog links the W5E5-based CHELSA daily product to the ISIMIP data portal.", - "contentUrl": "https://data.isimip.org/", + "contentUrl": "https://data.isimip.org/10.48364/ISIMIP.836809.3", "encodingFormat": [ "text/html" ] @@ -68,5 +93,54 @@ "@type": "Thing", "name": "Impact modeling" } + ], + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "CHELSA-W5E5: daily 1 km meteorological forcing data for climate impact studies", + "author": [ + { + "@type": "Person", + "name": "D. N. Karger" + }, + { + "@type": "Person", + "name": "S. Lange" + }, + { + "@type": "Person", + "name": "C. Hari" + }, + { + "@type": "Person", + "name": "C. P. O. Reyer" + }, + { + "@type": "Person", + "name": "O. Conrad" + }, + { + "@type": "Person", + "name": "N. E. Zimmermann" + }, + { + "@type": "Person", + "name": "K. Frieler" + } + ], + "datePublished": "2023", + "isPartOf": { + "@type": "Periodical", + "name": "Earth System Science Data" + }, + "volumeNumber": "15", + "pagination": "2445-2464", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.5194/essd-15-2445-2023" + }, + "sameAs": "https://doi.org/10.5194/essd-15-2445-2023" + } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld b/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld index 8c0099b..0189211 100644 --- a/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld +++ b/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld @@ -69,7 +69,7 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "Evergreen/Deciduous Needleleaf Trees", + "name": "Evergreen and Deciduous Needleleaf Trees", "description": "Consensus prevalence (0–100%)" }, { @@ -84,7 +84,7 @@ }, { "@type": "PropertyValue", - "name": "Mixed/Other Trees", + "name": "Mixed and Other Trees", "description": "Consensus prevalence (0–100%)" }, { @@ -109,12 +109,12 @@ }, { "@type": "PropertyValue", - "name": "Urban/Built-up", + "name": "Urban and Built-Up", "description": "Consensus prevalence (0–100%)" }, { "@type": "PropertyValue", - "name": "Snow/Ice", + "name": "Snow and Ice", "description": "Consensus prevalence (0–100%)" }, { @@ -216,4 +216,4 @@ "name": "Ecosystem modelling" } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/FLO1K/flo1k.jsonld b/data/objects/summoned/generated/FLO1K/flo1k.jsonld index 4578ee6..74fd801 100644 --- a/data/objects/summoned/generated/FLO1K/flo1k.jsonld +++ b/data/objects/summoned/generated/FLO1K/flo1k.jsonld @@ -50,9 +50,10 @@ "temporalCoverage": "1960-01-01/2015-12-31", "spatialCoverage": { "@type": "Place", + "name": "Global land areas excluding Antarctica", "geo": { "@type": "GeoShape", - "box": "20,-40 50,10" + "box": "-180,-90 180,90" } }, "spatialResolution": "30 arc-second (~1 km)", diff --git a/data/objects/summoned/generated/G-RUN/g-run.jsonld b/data/objects/summoned/generated/G-RUN/g-run.jsonld index c7a4f0d..a371387 100644 --- a/data/objects/summoned/generated/G-RUN/g-run.jsonld +++ b/data/objects/summoned/generated/G-RUN/g-run.jsonld @@ -79,9 +79,10 @@ "temporalCoverage": "1902-01-01/2014-12-31", "spatialCoverage": { "@type": "Place", + "name": "Global gridded land runoff reconstruction", "geo": { "@type": "GeoShape", - "box": "20,-40 50,10" + "box": "-180,-90 180,90" } }, "spatialResolution": "0.5 degree", diff --git a/data/objects/summoned/generated/GFC/gfc.jsonld b/data/objects/summoned/generated/GFC/gfc.jsonld index 0f14af1..376d147 100644 --- a/data/objects/summoned/generated/GFC/gfc.jsonld +++ b/data/objects/summoned/generated/GFC/gfc.jsonld @@ -119,39 +119,52 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "treecover2000", + "name": "Tree Canopy Cover in 2000", + "alternateName": "treecover2000", "unitText": "percent", - "description": "Tree canopy cover for year 2000 (0–100), defined as canopy closure for vegetation taller than 5 m." + "description": "Tree canopy cover for year 2000, defined as canopy closure for vegetation taller than 5 m." }, { "@type": "PropertyValue", - "name": "gain", - "description": "Forest gain during 2000–2012 (binary: 1 gain, 0 no gain)." + "name": "Forest Cover Gain", + "alternateName": "gain", + "unitText": "binary", + "description": "Forest gain during 2000-2012." }, { "@type": "PropertyValue", - "name": "lossyear", - "description": "Year of gross forest cover loss event (0 no loss; 1–23 correspond primarily to 2001–2023)." + "name": "Year of Gross Forest Cover Loss", + "alternateName": "lossyear", + "unitText": "year code", + "description": "Year of gross forest cover loss event; 0 indicates no loss and 1-23 correspond primarily to 2001-2023." }, { "@type": "PropertyValue", - "name": "datamask", - "description": "Data mask: 0 no data, 1 mapped land surface, 2 persistent water bodies (based on 2000–2012)." + "name": "Data Mask", + "alternateName": "datamask", + "unitText": "class code", + "description": "Data mask classes for no data, mapped land surface, and persistent water bodies." }, { "@type": "PropertyValue", - "name": "first", - "description": "Circa year-2000 Landsat cloud-free composite in red, NIR, SWIR1, SWIR2 bands (median, quality-assessed growing-season observations)." + "name": "First Reference Landsat Composite", + "alternateName": "first", + "unitText": "digital number", + "description": "Circa year-2000 Landsat cloud-free composite in red, NIR, SWIR1, and SWIR2 bands." }, { "@type": "PropertyValue", - "name": "last", - "description": "Circa year-2023 Landsat cloud-free composite in red, NIR, SWIR1, SWIR2 bands (median, quality-assessed growing-season observations)." + "name": "Last Reference Landsat Composite", + "alternateName": "last", + "unitText": "digital number", + "description": "Circa year-2023 Landsat cloud-free composite in red, NIR, SWIR1, and SWIR2 bands." }, { "@type": "PropertyValue", - "name": "loss (derived)", - "description": "Not released as a separate download layer in newer versions; corresponds to pixels where lossyear > 0." + "name": "Gross Forest Cover Loss", + "alternateName": "loss", + "unitText": "derived binary", + "description": "Derived indicator for pixels where lossyear is greater than 0." } ], "measurementTechnique": [ @@ -333,4 +346,4 @@ "name": "Landsat" } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/GHSL/ghsl.jsonld b/data/objects/summoned/generated/GHSL/ghsl.jsonld index 323b5fc..9413b26 100644 --- a/data/objects/summoned/generated/GHSL/ghsl.jsonld +++ b/data/objects/summoned/generated/GHSL/ghsl.jsonld @@ -51,9 +51,10 @@ "temporalCoverage": "1975-01-01/2030-12-31", "spatialCoverage": { "@type": "Place", + "name": "Global Human Settlement Layer product coverage", "geo": { "@type": "GeoShape", - "box": "20,-40 50,10" + "box": "-180,-90 180,90" } }, "variableMeasured": [ @@ -79,7 +80,7 @@ }, { "@type": "PropertyValue", - "name": "Settlement typology / Degree of Urbanisation", + "name": "Settlement Typology and Degree of Urbanisation", "description": "Settlement model classification based on the UN-recommended Degree of Urbanisation methodology." } ], diff --git a/data/objects/summoned/generated/GPP_MOD17/gpp_mod17.jsonld b/data/objects/summoned/generated/GPP_MOD17/gpp_mod17.jsonld index cc8c2c5..ce06e08 100644 --- a/data/objects/summoned/generated/GPP_MOD17/gpp_mod17.jsonld +++ b/data/objects/summoned/generated/GPP_MOD17/gpp_mod17.jsonld @@ -53,9 +53,10 @@ ], "spatialCoverage": { "@type": "Place", + "name": "Earth's vegetated land surface", "geo": { "@type": "GeoShape", - "box": "20,-40 50,10" + "box": "-180,-90 180,90" } }, "temporalCoverage": "2000-01-01/2013-12-31", @@ -64,7 +65,6 @@ "image/tiff", "application/netcdf" ], - "license": "https://files.ntsg.umt.edu/", "conditionsOfAccess": "Access is provided via HTTP file listing at the dataset URL. Refer to the provider's data policies and downloading guidance on the hosting site.", "isAccessibleForFree": true, "distribution": [ @@ -72,19 +72,74 @@ "@type": "DataDownload", "name": "MOD17 products (directory listing)", "contentUrl": "http://files.ntsg.umt.edu/data/NTSG_Products/MOD17/", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "MOD17 GeoTIFF products (typical distribution)", "contentUrl": "http://files.ntsg.umt.edu/data/NTSG_Products/MOD17/GeoTIFF/", - "encodingFormat": ["image/tiff"] + "encodingFormat": [ + "image/tiff" + ] }, { "@type": "DataDownload", "name": "MOD17 HDF-EOS products (typical distribution)", "contentUrl": "http://files.ntsg.umt.edu/data/NTSG_Products/MOD17/", - "encodingFormat": ["application/x-hdf"] + "encodingFormat": [ + "application/x-hdf" + ] + } + ], + "citation": [ + { + "@type": "CreativeWork", + "name": "MOD17 User’s Guide", + "publisher": { + "@type": "Organization", + "name": "Numerical Terradynamic Simulation Group, University of Montana" + }, + "sameAs": "https://scholarworks.umt.edu/ntsg_pubs/227/" + }, + { + "@type": "ScholarlyArticle", + "name": "A continuous satellite-derived measure of global terrestrial primary production", + "author": [ + { + "@type": "Person", + "name": "S. W. Running" + }, + { + "@type": "Person", + "name": "R. R. Nemani" + }, + { + "@type": "Person", + "name": "F. A. Heinsch" + }, + { + "@type": "Person", + "name": "M. Zhao" + }, + { + "@type": "Person", + "name": "M. Reeves" + }, + { + "@type": "Person", + "name": "H. Hashimoto" + } + ], + "datePublished": "2004", + "isPartOf": { + "@type": "Periodical", + "name": "BioScience" + }, + "volumeNumber": "54", + "issueNumber": "6", + "pagination": "547-560" } ] } diff --git a/data/objects/summoned/generated/GRACE-REC/grace-rec.jsonld b/data/objects/summoned/generated/GRACE-REC/grace-rec.jsonld index 0d7e052..6b7478a 100644 --- a/data/objects/summoned/generated/GRACE-REC/grace-rec.jsonld +++ b/data/objects/summoned/generated/GRACE-REC/grace-rec.jsonld @@ -64,9 +64,10 @@ "temporalCoverage": "1901-01-01/2019-12-31", "spatialCoverage": { "@type": "Place", + "name": "Global terrestrial water storage reconstruction", "geo": { "@type": "GeoShape", - "box": "20,-40 50,10" + "box": "-180,-90 180,90" } }, "spatialResolution": "0.5 degree", diff --git a/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/global-multi-layer-soil-moisture.jsonld b/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/global-multi-layer-soil-moisture.jsonld index 4020296..29ba1ec 100644 --- a/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/global-multi-layer-soil-moisture.jsonld +++ b/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/global-multi-layer-soil-moisture.jsonld @@ -60,9 +60,10 @@ "temporalCoverage": "1970-01-01/2016-12-31", "spatialCoverage": { "@type": "Place", + "name": "Global land areas", "geo": { "@type": "GeoShape", - "box": "20,-40 50,10" + "box": "-180,-90 180,90" } }, "spatialResolution": "0.5 degree", diff --git a/data/objects/summoned/generated/HydroSHEDS/hydrosheds.jsonld b/data/objects/summoned/generated/HydroSHEDS/hydrosheds.jsonld index 896a332..0397515 100644 --- a/data/objects/summoned/generated/HydroSHEDS/hydrosheds.jsonld +++ b/data/objects/summoned/generated/HydroSHEDS/hydrosheds.jsonld @@ -52,10 +52,7 @@ "isAccessibleForFree": true, "spatialCoverage": { "@type": "Place", - "geo": { - "@type": "GeoShape", - "box": "20,-40 50,10" - } + "name": "Global and regional hydrographic products; product-specific coverage varies" }, "spatialResolution": [ "3 arc-second", @@ -65,7 +62,7 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "Catchment / sub-basin boundaries", + "name": "Catchment and Sub-Basin Boundaries", "description": "Vector catchment and sub-basin boundary products derived from HydroSHEDS hydrography." }, { diff --git a/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld b/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld index 11b4972..051a634 100644 --- a/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld +++ b/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld @@ -2,10 +2,10 @@ "@context": "https://schema.org/", "@type": "Dataset", "comment": "This dataset metadata was generated by AI.", - "@id": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/", + "@id": "https://global-hydrodynamics.github.io/MERIT_DEM/#dataset", "name": "MERIT DEM (Multi-Error-Removed Improved-Terrain Digital Elevation Model)", "description": "MERIT DEM is a high-accuracy global digital elevation model (DEM) at 3 arc-second resolution (~90 m at the equator), developed by removing multiple error components from existing spaceborne DEMs. The developers separated and eliminated absolute bias, stripe noise, speckle noise, and tree height bias using multiple satellite datasets and filtering techniques. MERIT DEM was created by processing baseline DEMs including NASA SRTM3 DEM v2.1, JAXA AW3D-30m DEM v1, and Viewfinder Panoramas' DEM. After error removal, land areas mapped with 2 m or better vertical accuracy increased from 39% to 58%. The dataset represents terrain elevations in meters referenced to WGS84 horizontal datum and the EGM96 geoid, covering global land areas between 90°N and 60°S. Data are organized as 5°×5° tiles (6000×6000 pixels) and distributed in 30°×30° packages; filenames encode the center of the lower-left pixel (e.g., \"n30w120_dem.tif\" covers N30–N35 and W120–W115) and package names encode the lower-left corner of the 30°×30° domain (e.g., \"dem_tif_n30w120.tar\" contains tiles for N30–N60 and W120–W090).", - "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/", + "url": "https://global-hydrodynamics.github.io/MERIT_DEM/", "keywords": [ "topography", "Digital Elevation Model", @@ -88,44 +88,21 @@ "distribution": [ { "@type": "DataDownload", - "name": "MERIT DEM – ESRI EHdr (FLT) packages (30°×30° tar.gz)", - "description": "Elevation tiles distributed as 30°×30° packages containing 5°×5° tiles (6000×6000 pixels). ESRI FLT rasters with HDR (Fortran Direct Access / ESRI FLT style; 4-byte float, little endian). Package names encode the lower-left corner (e.g., dem_flt_n30w120.tar.gz contains tiles for N30–N60, W120–W090). Filenames encode the center of the lower-left pixel (e.g., n30w120_dem.* covers N30–N35, W120–W115). Access may require registration/password per the provider instructions.", + "name": "MERIT DEM official product page", + "description": "Current official product page describing MERIT DEM, registration, download access, version, data format, coverage, license terms, and citation guidance.", "encodingFormat": [ - "application/octet-stream" - ], - "contentUrl": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/dem_flt_n30w120.tar.gz", - "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" - }, - { - "@type": "DataDownload", - "name": "MERIT DEM – GeoTIFF packages (30°×30° tar)", - "description": "Elevation tiles distributed as 30°×30° packages containing 5°×5° GeoTIFF tiles (6000×6000 pixels). Package names encode the lower-left corner (e.g., dem_tif_n30w120.tar contains tiles for N30–N60, W120–W090). Individual tile filenames encode the center of the lower-left pixel (e.g., n30w120_dem.tif covers N30–N35, W120–W115). Access may require registration/password per the provider instructions.", - "encodingFormat": [ - "image/tiff", - "application=geotiff" - ], - "contentUrl": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/dem_tif_n30w120.tar", - "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" - }, - { - "@type": "DataDownload", - "name": "MERIT DEM – MRR (single merged raster)", - "description": "Single merged MRR raster created by merging all MERIT DEM source tiles; vertical resolution converted to 0.01 meters by rounding to the nearest centimeter. MRR rasters can be displayed in the MapInfo Pro GIS platform. Prepared by Sam Roberts (Roberts Geospatial).", - "encodingFormat": [ - "application/octet-stream" + "text/html" ], - "contentUrl": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/MERIT_DEM.mrr", - "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" + "contentUrl": "https://global-hydrodynamics.github.io/MERIT_DEM/" }, { "@type": "DataDownload", - "name": "MERIT DEM – 5°×5° tile download page (GeoTIFF tiles)", - "description": "Alternative download page for individual 5°×5° tiles (useful when large package downloads are difficult). Tile filenames encode the center of the lower-left pixel (e.g., n30w120_dem.tif covers N30–N35, W120–W115). Access may require registration/password per the provider instructions.", + "name": "MERIT DEM download access", + "description": "Provider download access page reached after completing the MERIT DEM registration and license agreement process; current provider documentation states data are distributed in GeoTIFF format.", "encodingFormat": [ "text/html" ], - "contentUrl": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/list_5deg.html", - "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/list_5deg.html" + "contentUrl": "https://global-hydrodynamics.github.io/MERIT_DEM/" } ], "citation": [ @@ -205,4 +182,4 @@ "name": "Geoscience applications" } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld b/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld index d230fd0..1c7a364 100644 --- a/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld +++ b/data/objects/summoned/generated/MERIT_DEM/webpage.jsonld @@ -2,20 +2,20 @@ "@context": "https://schema.org/", "@type": "WebPage", "comment": "This dataset metadata was generated by AI.", - "@id": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/", + "@id": "https://global-hydrodynamics.github.io/MERIT_DEM/", "name": "MERIT DEM: Multi-Error-Removed Improved-Terrain DEM", "description": "Webpage for the MERIT DEM (Multi-Error-Removed Improved-Terrain Digital Elevation Model) dataset. MERIT DEM is a high-accuracy global digital elevation model at 3 arc-second resolution (~90 m at the equator), developed by removing multiple error components from existing spaceborne DEMs. The site provides information about the dataset, download instructions, licensing, and citation information.", - "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/", + "url": "https://global-hydrodynamics.github.io/MERIT_DEM/", "inLanguage": "en", "isPartOf": { "@type": "WebSite", - "name": "University of Tokyo, Institute of Industrial Science", - "url": "https://www.iis.u-tokyo.ac.jp/en/" + "name": "Global Hydrodynamics Lab Yamazaki Lab", + "url": "https://global-hydrodynamics.github.io/" }, "about": { "@type": "Dataset", "name": "MERIT DEM (Multi-Error-Removed Improved-Terrain Digital Elevation Model)", - "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" + "url": "https://global-hydrodynamics.github.io/MERIT_DEM/" }, "publisher": { "@type": "Organization", @@ -32,7 +32,7 @@ "mainEntity": { "@type": "Dataset", "name": "MERIT DEM (Multi-Error-Removed Improved-Terrain Digital Elevation Model)", - "url": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" + "url": "https://global-hydrodynamics.github.io/MERIT_DEM/" }, "breadcrumb": { "@type": "BreadcrumbList", @@ -40,20 +40,20 @@ { "@type": "ListItem", "position": 1, - "name": "University of Tokyo", - "item": "https://www.u-tokyo.ac.jp/en/" + "name": "Global Hydrodynamics Lab Yamazaki Lab", + "item": "https://global-hydrodynamics.github.io/" }, { "@type": "ListItem", "position": 2, - "name": "Institute of Industrial Science", - "item": "https://www.iis.u-tokyo.ac.jp/en/" + "name": "Products", + "item": "https://global-hydrodynamics.github.io/" }, { "@type": "ListItem", "position": 3, "name": "MERIT DEM", - "item": "http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/" + "item": "https://global-hydrodynamics.github.io/MERIT_DEM/" } ] }, @@ -68,4 +68,4 @@ "SRTM", "AW3D" ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld b/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld index 58be9f0..c53f59b 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld @@ -72,7 +72,18 @@ "Land cover classification and change detection using modern modeling approaches (including deep learning and time-series analysis)" ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", - "citation": "U.S. Geological Survey (USGS), 2024, Annual NLCD Collection 1 Science Products (ver. 1.1, June 2025): U.S. Geological Survey data release, https://doi.org/10.5066/P94UXNTS.", + "citation": [ + { + "@type": "CreativeWork", + "name": "U.S. Geological Survey (USGS), 2024, Annual NLCD Collection 1 Science Products (ver. 1.1, June 2025): U.S. Geological Survey data release, https://doi.org/10.5066/P94UXNTS", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.5066/P94UXNTS" + }, + "sameAs": "https://doi.org/10.5066/P94UXNTS" + } + ], "distribution": [ { "@type": "DataDownload", @@ -117,4 +128,4 @@ "name": "Land change" } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld b/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld index 3b233b7..735095b 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld @@ -71,7 +71,18 @@ "Field observations (BLM AIM plots) and machine learning regression modeling" ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", - "citation": "Dahal, D., Boyte, S., Megard, L., Postma, K., and Pastick, N., 2025, Early Estimates of Exotic Annual Grass (EAG) in the Sagebrush Biome, USA, 2025: U.S. Geological Survey data release, https://doi.org/10.5066/P14VQEGO.", + "citation": [ + { + "@type": "CreativeWork", + "name": "Dahal, D., Boyte, S., Megard, L., Postma, K., and Pastick, N., 2025, Early Estimates of Exotic Annual Grass (EAG) in the Sagebrush Biome, USA, 2025: U.S. Geological Survey data release, https://doi.org/10.5066/P14VQEGO", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.5066/P14VQEGO" + }, + "sameAs": "https://doi.org/10.5066/P14VQEGO" + } + ], "distribution": [ { "@type": "DataDownload", @@ -109,4 +120,4 @@ "name": "Remote sensing" } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld b/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld index 13eb1cb..9442aad 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld @@ -63,7 +63,16 @@ ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", "citation": [ - "Dewitz, J., and U.S. Geological Survey, 2021, National Land Cover Database (NLCD) 2019 Products (ver. 2.0, June 2021): U.S. Geological Survey data release, https://doi.org/10.5066/P9KZCM54." + { + "@type": "CreativeWork", + "name": "Dewitz, J., and U.S. Geological Survey, 2021, National Land Cover Database (NLCD) 2019 Products (ver. 2.0, June 2021): U.S. Geological Survey data release, https://doi.org/10.5066/P9KZCM54", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.5066/P9KZCM54" + }, + "sameAs": "https://doi.org/10.5066/P9KZCM54" + } ], "distribution": [ { @@ -102,4 +111,4 @@ "name": "Remote sensing" } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld b/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld index 4330e91..1106ccb 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld @@ -92,5 +92,17 @@ "@type": "Thing", "name": "Continental land monitoring" } + ], + "citation": [ + { + "@type": "CreativeWork", + "name": "NALCMS: The North American Land Change Monitoring System", + "datePublished": "2024", + "publisher": { + "@type": "Organization", + "name": "Commission for Environmental Cooperation" + }, + "sameAs": "https://www.cec.org/publications/nalcms/" + } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld b/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld index 98908a8..9a19d35 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld @@ -135,5 +135,22 @@ "@type": "Thing", "name": "Remote sensing" } + ], + "citation": [ + { + "@type": "CreativeWork", + "name": "Rangeland Condition Monitoring Assessment and Projection (RCMAP)", + "datePublished": "2022", + "publisher": { + "@type": "Organization", + "name": "U.S. Geological Survey" + }, + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.3133/fs20223036" + }, + "sameAs": "https://doi.org/10.3133/fs20223036" + } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/Shale_Network/shale-network.jsonld b/data/objects/summoned/generated/Shale_Network/shale-network.jsonld index 13fdc40..cea03f1 100644 --- a/data/objects/summoned/generated/Shale_Network/shale-network.jsonld +++ b/data/objects/summoned/generated/Shale_Network/shale-network.jsonld @@ -59,11 +59,7 @@ "temporalCoverage": "2011-01-01/..", "spatialCoverage": { "@type": "Place", - "name": "Energy production regions (primarily northeastern USA; see dataset portals for exact coverage)", - "geo": { - "@type": "GeoShape", - "box": "20,-40 50,10" - } + "name": "Energy production regions, primarily the northeastern United States; see dataset portals for exact site coverage" }, "variableMeasured": [ { diff --git a/data/objects/summoned/generated/SoilGrids2/soilgrids2.jsonld b/data/objects/summoned/generated/SoilGrids2/soilgrids2.jsonld index 4909dfa..5ff27d8 100644 --- a/data/objects/summoned/generated/SoilGrids2/soilgrids2.jsonld +++ b/data/objects/summoned/generated/SoilGrids2/soilgrids2.jsonld @@ -48,9 +48,10 @@ ], "spatialCoverage": { "@type": "Place", + "name": "Global soil property maps", "geo": { "@type": "GeoShape", - "box": "20,-40 50,10" + "box": "-180,-90 180,90" } }, "spatialResolution": "Approximately 250 m", @@ -96,45 +97,117 @@ "name": "ISRIC Data Hub (GeoNetwork) catalog record", "description": "GeoNetwork catalog record in the ISRIC Data Hub describing a SoilGrids-related dataset entry.", "contentUrl": "https://data.isric.org/geonetwork/srv/api/records/41cb0ae9-1604-4807-96e6-0dc8c94c5d22?language=all", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "ISRIC Data Hub sitemap / discovery endpoint", "description": "GeoNetwork API sitemap endpoint used for dataset discovery and indexing.", "contentUrl": "https://data.isric.org/geonetwork/srv/api/sitemap", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "ISRIC SoilGrids file distribution (WebDAV root)", "description": "ISRIC WebDAV directory listing for SoilGrids distributions (includes latest and former releases).", "contentUrl": "https://files.isric.org/soilgrids/", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "SoilGrids latest data directory (WebDAV)", "description": "Directory listing for SoilGrids latest data products (organized by variable).", "contentUrl": "https://files.isric.org/soilgrids/latest/data/", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "ISRIC SoilGrids overview page", "description": "ISRIC overview page describing SoilGrids, licensing, and access methods.", "contentUrl": "https://isric.org/explore/soilgrids", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] } ], "about": [ - { "@type": "Thing", "name": "Soil" }, - { "@type": "Thing", "name": "Soil properties" }, - { "@type": "Thing", "name": "Digital soil mapping" }, - { "@type": "Thing", "name": "Geospatial raster data" } + { + "@type": "Thing", + "name": "Soil" + }, + { + "@type": "Thing", + "name": "Soil properties" + }, + { + "@type": "Thing", + "name": "Digital soil mapping" + }, + { + "@type": "Thing", + "name": "Geospatial raster data" + } ], "sameAs": [ "https://isric.org/explore/soilgrids", "https://files.isric.org/soilgrids/" + ], + "citation": [ + { + "@type": "ScholarlyArticle", + "name": "SoilGrids 2.0: producing soil information for the globe with quantified spatial uncertainty", + "author": [ + { + "@type": "Person", + "name": "L. Poggio" + }, + { + "@type": "Person", + "name": "L. M. de Sousa" + }, + { + "@type": "Person", + "name": "N. H. Batjes" + }, + { + "@type": "Person", + "name": "G. B. M. Heuvelink" + }, + { + "@type": "Person", + "name": "B. Kempen" + }, + { + "@type": "Person", + "name": "E. Ribeiro" + }, + { + "@type": "Person", + "name": "D. Rossiter" + } + ], + "datePublished": "2021", + "isPartOf": { + "@type": "Periodical", + "name": "SOIL" + }, + "volumeNumber": "7", + "issueNumber": "1", + "pagination": "217-240", + "identifier": { + "@type": "PropertyValue", + "propertyID": "doi", + "value": "10.5194/soil-7-217-2021" + }, + "sameAs": "https://doi.org/10.5194/soil-7-217-2021" + } ] } diff --git a/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld b/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld index d6619f6..e0b95ac 100644 --- a/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld +++ b/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld @@ -4,7 +4,7 @@ "comment": "This dataset metadata was generated by AI.", "@id": "https://www.climatologylab.org/terraclimate.html#dataset", "name": "TerraClimate: Monthly Climate and Climatic Water Balance for Global Terrestrial Surfaces", - "description": "TerraClimate is a dataset of monthly climate and climatic water balance for global terrestrial land surfaces at ~4-km (1/24°) spatial resolution. It provides time-varying monthly climate fields derived via climatically aided interpolation: high-resolution climatological normals from WorldClim are combined with interpolated monthly anomalies from CRU TS (for most temperature, precipitation, and vapor pressure) and JRA-55 (used where CRU station influence is absent and used for solar radiation and wind speed). TerraClimate also produces monthly surface water balance variables using a modified Thornthwaite–Mather climatic water-balance model incorporating precipitation, temperature, reference evapotranspiration (ASCE Penman–Monteith), and soil water capacity. The core historical record covers 1958–2020 with planned periodic updates, and additional future layers are provided for +2°C and +4°C global mean temperature futures for pseudo-years 1985–2015 plus climatological summaries. Data are distributed primarily as compressed NetCDF (NetCDF4) via THREDDS/OPeNDAP and related web services and are also available as a Google Earth Engine image collection.", + "description": "TerraClimate is a dataset of monthly climate and climatic water balance for global terrestrial land surfaces at ~4-km (1/24°) spatial resolution. TerraClimate v1.1 provides time-varying monthly climate fields from 1950-present using climatically aided interpolation: high-resolution climatological normals from WorldClim are combined with monthly anomalies from ERA5 reanalysis. TerraClimate also produces monthly surface water balance variables using a modified Thornthwaite-Mather climatic water-balance model incorporating precipitation, temperature, reference evapotranspiration, and soil water capacity. Additional future layers are provided for +2°C and +4°C global mean temperature futures, plus a counterfactual climate scenario, for pseudo-years 1950-2025. Data are distributed primarily as compressed NetCDF (NetCDF4) via THREDDS/OPeNDAP and related web services and are also available as a Google Earth Engine image collection.", "url": "https://www.climatologylab.org/terraclimate.html", "isAccessibleForFree": true, "keywords": [ @@ -23,8 +23,7 @@ "PDSI", "drought", "WorldClim", - "CRU TS", - "JRA-55", + "ERA5", "NetCDF4", "THREDDS", "OPeNDAP", @@ -80,8 +79,8 @@ "url": "https://creativecommons.org/publicdomain/zero/1.0/" }, "temporalCoverage": [ - "1958-01-01/2020-12-31", - "1985-01-01/2015-12-31" + "1950-01-01/..", + "1950-01-01/2025-12-31" ], "spatialCoverage": { "@type": "Place", @@ -95,8 +94,7 @@ "temporalResolution": "P1M", "encodingFormat": [ "application/x-netcdf", - "application/x-netcdf", - "version=4" + "text/html" ], "variableMeasured": [ { @@ -171,7 +169,7 @@ } ], "measurementTechnique": [ - "Climatically aided interpolation using high-resolution climatological normals (WorldClim) combined with time-varying anomalies from CRU TS and JRA-55", + "Climatically aided interpolation using high-resolution climatological normals (WorldClim) combined with monthly anomalies from ERA5 reanalysis", "Modified Thornthwaite–Mather climatic water-balance model for derived water-balance variables" ], "distribution": [ @@ -187,7 +185,7 @@ { "@type": "DataDownload", "name": "TerraClimate THREDDS aggregated catalog (monthly aggregations by variable)", - "description": "THREDDS aggregated catalogs providing service endpoints (OPeNDAP/NetCDF Subset/NCSS/WMS/WCS) for 1958–current-year monthly aggregations by variable.", + "description": "THREDDS aggregated catalogs providing service endpoints (OPeNDAP/NetCDF Subset/NCSS/WMS/WCS) for monthly aggregations by variable.", "encodingFormat": [ "text/html" ], @@ -200,7 +198,7 @@ "encodingFormat": [ "application/x-netcdf" ], - "contentUrl": "https://tds-proxy.nkn.uidaho.edu/thredds/dodsC/agg_terraclimate_tmax_1958_CurrentYear_GLOBE.nc" + "contentUrl": "https://tds-proxy.nkn.uidaho.edu/thredds/dodsC/agg_terraclimate_tmax_1950_CurrentYear_GLOBE.nc" }, { "@type": "DataDownload", @@ -279,4 +277,4 @@ "name": "Ecological and hydrological studies" } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/WATERBASE/waterbase.jsonld b/data/objects/summoned/generated/WATERBASE/waterbase.jsonld index 0a037f4..ecb4baa 100644 --- a/data/objects/summoned/generated/WATERBASE/waterbase.jsonld +++ b/data/objects/summoned/generated/WATERBASE/waterbase.jsonld @@ -54,11 +54,7 @@ "temporalCoverage": "1900-01-01/2024-12-31", "spatialCoverage": { "@type": "Place", - "name": "Europe (reporting countries; see dataset documentation for exact coverage)", - "geo": { - "@type": "GeoShape", - "box": "20,-40 50,10" - } + "name": "Europe, EEA member countries, and cooperating reporting countries; see dataset documentation for exact country coverage" }, "variableMeasured": [ { @@ -96,35 +92,70 @@ "name": "EEA Data Hub item page", "description": "Landing page for Waterbase - Water Quality ICM with dataset versions, temporal coverage, and access links.", "contentUrl": "https://www.eea.europa.eu/en/datahub/datahubitem-view/fbf3717c-cd7b-4785-933a-d0cf510542e1", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "Direct download (EEA SDI DataShare)", "description": "Direct download endpoint for the Waterbase - Water Quality ICM 2024 release (bulk download).", "contentUrl": "https://sdi.eea.europa.eu/datashare/s/3JiTia3qePyGxyA/download", - "encodingFormat": ["application/zip"] + "encodingFormat": [ + "application/zip" + ] }, { "@type": "DataDownload", "name": "Metadata factsheet (PDF)", "description": "Metadata factsheet for Waterbase - Water Quality ICM, 2024 release.", "contentUrl": "https://sdi.eea.europa.eu/catalogue/datahub/api/records/77976729-1aeb-4b61-a673-83db6c6a2ab2/formatters/xsl-view?approved=true&language=eng&output=pdf", - "encodingFormat": ["application/pdf"] + "encodingFormat": [ + "application/pdf" + ] }, { "@type": "DataDownload", "name": "DISCODATA endpoint (direct database access and filtering)", "description": "Portal and endpoint for accessing and filtering data directly in the database, supporting application-specific access and user-driven downloads.", "contentUrl": "https://discodata.eea.europa.eu/", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] } ], "about": [ - { "@type": "Thing", "name": "Hydrogeochemistry" }, - { "@type": "Thing", "name": "Water quality" }, - { "@type": "Thing", "name": "Environmental monitoring" }, - { "@type": "Thing", "name": "Surface water" }, - { "@type": "Thing", "name": "Groundwater" } + { + "@type": "Thing", + "name": "Hydrogeochemistry" + }, + { + "@type": "Thing", + "name": "Water quality" + }, + { + "@type": "Thing", + "name": "Environmental monitoring" + }, + { + "@type": "Thing", + "name": "Surface water" + }, + { + "@type": "Thing", + "name": "Groundwater" + } + ], + "citation": [ + { + "@type": "CreativeWork", + "name": "Waterbase - Water Quality ICM", + "datePublished": "2025", + "publisher": { + "@type": "Organization", + "name": "European Environment Agency" + }, + "sameAs": "https://www.eea.europa.eu/en/datahub/datahubitem-view/fbf3717c-cd7b-4785-933a-d0cf510542e1" + } ] } diff --git a/data/objects/summoned/generated/Water_Quality_Portal/water-quality-portal.jsonld b/data/objects/summoned/generated/Water_Quality_Portal/water-quality-portal.jsonld index 86bcdfe..d87f7f8 100644 --- a/data/objects/summoned/generated/Water_Quality_Portal/water-quality-portal.jsonld +++ b/data/objects/summoned/generated/Water_Quality_Portal/water-quality-portal.jsonld @@ -72,10 +72,7 @@ "temporalCoverage": "1900-01-01/..", "spatialCoverage": { "@type": "Place", - "geo": { - "@type": "GeoShape", - "box": "20,-40 50,10" - } + "name": "United States and beyond; coverage depends on contributing organizations and query parameters" }, "variableMeasured": [ { @@ -85,7 +82,7 @@ }, { "@type": "PropertyValue", - "name": "Station / monitoring location metadata", + "name": "Station and Monitoring Location Metadata", "description": "Locations where samples and observations were collected, including identifiers, coordinates, and site descriptors." }, { diff --git a/docs/jsonld-validation-plan.md b/docs/jsonld-validation-plan.md new file mode 100644 index 0000000..8f31dba --- /dev/null +++ b/docs/jsonld-validation-plan.md @@ -0,0 +1,23 @@ +# JSON-LD Metadata Validation Plan + +## Summary + +Validate generated JSON-LD against the authoritative dataset webpage, linked download targets, cited papers, and repository or data-release pages. Corrections should be conservative: revise only fields supported by visible source evidence, and leave an inspectable Git diff. + +## Manual Checks Added + +- Expand source-listed variables into one `variableMeasured` entry per variable. Use the physical variable label as `name` and put short codes such as `bio01`, `tas`, or `lossyear` in `alternateName`. +- Include all citations shown by the source page. When a page has both `Model Citation` and `Data Citation`, represent both as separate structured citation objects. +- Use the exact URL behind a source page's Download button or direct data/API endpoint for `distribution[].contentUrl`; do not use a generic portal root when a more specific target is available. + +## Generation Safeguards + +- The generation prompt requires separate variable rows, structured citation arrays, and exact download targets. +- `generate_jsonld.py` extracts source-page download links, citation text, and variable rows and passes them into the generation prompt. +- `generate_jsonld.py` emits review warnings when generated JSON-LD still contains lumped variable names, plain-string citations, or distributions that omit exact source-page download links. + +## Validation + +- Run `python3 scripts/validate_jsonld_batch.py data/objects/summoned/generated`. +- Run `git diff --check`. +- Review `git diff` for metadata-only changes and confirm no unrelated files are modified. diff --git a/prompts/jsonld-generation-prompt.txt b/prompts/jsonld-generation-prompt.txt index 0e18755..5051429 100644 --- a/prompts/jsonld-generation-prompt.txt +++ b/prompts/jsonld-generation-prompt.txt @@ -45,6 +45,14 @@ You are generating a JSON-LD (JSON for Linking Data) description for a scientifi **Additional extracted or inferred metadata** (optional; from page content or prior knowledge): {EXTRACTED_METADATA} +**Source-page fidelity requirements**: +- If the source page has a variable table, create one `variableMeasured` entry per source-table row. Do not lump ranges or groups such as "BIO1-BIO19", "tas/tasmax/tasmin", "bands 1-7", or "all layers" into one entry when the page lists individual variables. +- Use the physically meaningful variable name from the source page as `variableMeasured[].name`. Put short codes such as `bio01`, `tas`, `pr`, `lossyear`, or `treecover2000` in `alternateName`. +- If the source page has both "Model Citation" and "Data Citation", include both as separate entries in `citation`. Do not drop either one. +- Always represent `citation` as an array of structured `CreativeWork`, `ScholarlyArticle`, or `Dataset` objects; do not use a plain citation string. +- For `distribution[].contentUrl`, use the exact URL linked from the source page's Download button or direct data/API endpoint when available. Do not use a generic portal root if a more specific download link, bucket prefix, DOI landing page, API endpoint, or file list is available. +- If a source page lists several download links for the same dataset, include each materially different download target as a separate `distribution` entry with a clear `name`. + **Reference Example** (structure only; no fixed URLs): {EXAMPLE_JSONLD} @@ -61,6 +69,7 @@ You are generating a JSON-LD (JSON for Linking Data) description for a scientifi 10. Use proper JSON-LD structure (arrays for multiple values, nested objects where appropriate) 11. Set "keywords" as a JSON array of strings — never a single semicolon- or comma-separated string 12. Set "encodingFormat" (at dataset level and in each distribution) as a JSON array of strings — never a single semicolon- or comma-separated string -13. Add exactly: "comment": "This dataset metadata was generated by AI." +13. Set "citation" as a JSON array of structured objects — never as a single string +14. Add exactly: "comment": "This dataset metadata was generated by AI." **Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/scripts/generate_jsonld.py b/scripts/generate_jsonld.py index 0c9166d..27fcf15 100644 --- a/scripts/generate_jsonld.py +++ b/scripts/generate_jsonld.py @@ -777,6 +777,123 @@ def extract_text_content(html: str) -> str: return html[:HTML_FALLBACK_LIMIT] # Return first N chars if parsing fails +def extract_source_facts(html: str, base_url: str) -> Dict: + """Extract source-page facts that commonly prevent metadata drift. + + These facts are passed to the generation prompt and used for post-generation + warnings. The extraction is intentionally conservative and does not try to + infer values that are not visible in the source page markup/text. + """ + facts = { + "download_links": [], + "citation_text": [], + "variables": [], + } + if not html: + return facts + try: + from urllib.parse import urljoin + + soup = BeautifulSoup(html, 'html.parser') + for script in soup(["script", "style"]): + script.decompose() + + seen_links = set() + for a in soup.find_all("a", href=True): + label = a.get_text(" ", strip=True) + href = urljoin(base_url, a["href"]) + label_l = label.lower() + href_l = href.lower() + if "download" in label_l or "download" in href_l or "data" in label_l: + key = (label, href) + if href and key not in seen_links: + facts["download_links"].append({"label": label or "link", "href": href}) + seen_links.add(key) + + lines = [line.strip() for line in soup.get_text("\n", strip=True).splitlines() if line.strip()] + for i, line in enumerate(lines): + if line in ("Model Citation", "Data Citation") and i + 1 < len(lines): + facts["citation_text"].append({"label": line, "text": lines[i + 1]}) + + for i, line in enumerate(lines): + code_match = re.match(r"^\(([^)]+)\)(.*)$", line) + if not code_match or i == 0: + continue + name = lines[i - 1] + code = code_match.group(1).strip() + rest = code_match.group(2).strip() + if not name or name in ("Name Unit Description", "Overview Variables"): + continue + unit = "" + description = rest + if rest: + parts = rest.split(None, 1) + if parts: + unit = parts[0] + description = parts[1] if len(parts) > 1 else "" + facts["variables"].append({ + "name": name, + "alternateName": code, + "unitText": unit, + "description": description, + }) + except Exception as e: + print(f"Warning: could not extract source-page facts from {base_url}: {e}") + return facts + + +def _looks_like_lumped_or_code_name(name: str) -> bool: + """Detect suspicious variable names that should be expanded or renamed.""" + if not name: + return False + if re.search(r"\b[A-Za-z]+\d+\s*[–-]\s*[A-Za-z]*\d+\b", name): + return True + if "/" in name: + return True + if re.fullmatch(r"[A-Za-z]+[A-Za-z0-9_-]*", name) and (name.islower() or any(ch.isdigit() for ch in name)): + return True + return False + + +def audit_generated_jsonld(data: Dict, source_facts: Dict) -> List[str]: + """Return warnings for metadata patterns that need manual review.""" + warnings = [] + if not isinstance(data, dict): + return ["Generated JSON-LD is not an object"] + + citation = data.get("citation") + if isinstance(citation, str): + warnings.append("citation is a string; expected an array of structured citation objects") + elif isinstance(citation, list) and any(isinstance(item, str) for item in citation): + warnings.append("citation array contains string entries; expected structured citation objects") + + variables = data.get("variableMeasured") + if isinstance(variables, str): + warnings.append("variableMeasured is a string; expected separate PropertyValue objects") + elif isinstance(variables, list): + for idx, item in enumerate(variables): + if not isinstance(item, dict): + continue + if _looks_like_lumped_or_code_name(str(item.get("name", ""))): + warnings.append(f"variableMeasured[{idx}].name looks like a code or lumped range") + + download_links = { + link.get("href") for link in (source_facts or {}).get("download_links", []) + if isinstance(link, dict) and link.get("href") + } + if download_links: + dist = data.get("distribution") + content_urls = set() + if isinstance(dist, list): + content_urls = { + item.get("contentUrl") for item in dist + if isinstance(item, dict) and item.get("contentUrl") + } + if not (content_urls & download_links): + warnings.append("distribution does not include any exact source-page Download href") + return warnings + + def load_example_jsonld() -> str: """Load an example JSON-LD file for reference.""" example_path = DATA_DIR / "gpp" / "2d78c4242a108f70ea2c0604964dc095b34bfd7b.jsonld" @@ -1048,6 +1165,18 @@ def main(): print(f"\n [{j}/{len(detected_datasets)}] Processing dataset: {dataset_name}") # Prepare metadata for this specific dataset + source_facts = {} + source_html = fetch_webpage(dataset_url) + if source_html: + source_facts = extract_source_facts(source_html, dataset_url) + if source_facts.get("download_links"): + print(f" Found {len(source_facts['download_links'])} source download link(s)") + if source_facts.get("citation_text"): + print(f" Found {len(source_facts['citation_text'])} source citation text item(s)") + if source_facts.get("variables"): + print(f" Found {len(source_facts['variables'])} source variable row(s)") + detected_extracted = dict(detected_dataset) + detected_extracted["source_facts"] = source_facts metadata = { 'name': dataset_name, 'url': dataset_url, @@ -1065,7 +1194,7 @@ def main(): if dataset.get(CSV_FIELDS['BOX_LON_MIN']) else '' ) ), - 'extracted': detected_dataset + 'extracted': detected_extracted } # Generate JSON-LD @@ -1075,8 +1204,10 @@ def main(): # Validate JSON try: - json.loads(jsonld) + parsed_jsonld = json.loads(jsonld) print(f" Valid JSON") + for warning in audit_generated_jsonld(parsed_jsonld, source_facts): + print(f" Review warning: {warning}") except json.JSONDecodeError as e: print(f" Warning: Generated JSON may be invalid: {e}") @@ -1125,4 +1256,3 @@ def main(): if __name__ == '__main__': main() - From d7a9d378c9e355aecb0b4272dbe7fe95b5d55706 Mon Sep 17 00:00:00 2001 From: jaywt Date: Wed, 20 May 2026 15:12:26 -0400 Subject: [PATCH 57/58] Add variable-level coverage metadata --- .../generated/CHELSA/chelsa_bioclim.jsonld | 168 +++++++++++++----- .../chelsa_canaryclim_climatologies.jsonld | 16 +- .../CHELSA/chelsa_cerra_daily.jsonld | 4 +- .../chelsa_ch_highres_climatologies.jsonld | 16 +- .../CHELSA/chelsa_ch_highres_daily.jsonld | 16 +- .../CHELSA/chelsa_climatologies.jsonld | 44 +++-- .../generated/CHELSA/chelsa_daily.jsonld | 44 +++-- .../CHELSA/chelsa_drought_indices.jsonld | 20 ++- .../generated/CHELSA/chelsa_monthly.jsonld | 52 ++++-- .../CHELSA/chelsa_trace21k_centennial.jsonld | 16 +- .../chelsa_trace21k_centennial_bioclim.jsonld | 88 ++++++--- .../generated/CHELSA/chelsa_w5e5_daily.jsonld | 20 ++- .../consensus-land-cover.jsonld | 48 +++-- .../summoned/generated/FLO1K/flo1k.jsonld | 105 ++++++++--- .../summoned/generated/G-RUN/g-run.jsonld | 62 +++++-- .../objects/summoned/generated/GFC/gfc.jsonld | 28 ++- .../summoned/generated/GHSL/ghsl.jsonld | 96 +++++++--- .../generated/GPP_MOD17/gpp_mod17.jsonld | 8 +- .../generated/GRACE-REC/grace-rec.jsonld | 56 ++++-- .../global-multi-layer-soil-moisture.jsonld | 59 ++++-- .../global-tree-density.jsonld | 10 +- .../generated/HydroSHEDS/hydrosheds.jsonld | 102 ++++++++--- .../Hydrography90m/hydrography90m.jsonld | 154 ++++++++++++---- .../generated/MERIT_DEM/merit-dem.jsonld | 4 +- .../generated/MRLC_NLCD/annual-nlcd.jsonld | 24 ++- .../MRLC_NLCD/exotic-annual-grass.jsonld | 24 ++- .../generated/MRLC_NLCD/legacy-nlcd.jsonld | 16 +- .../generated/MRLC_NLCD/nalcms.jsonld | 4 +- .../summoned/generated/MRLC_NLCD/rcmap.jsonld | 24 ++- .../Shale_Network/shale-network.jsonld | 71 ++++++-- .../generated/SoilGrids2/soilgrids2.jsonld | 121 +++++++++++-- .../TerraClimate/terraclimate.jsonld | 98 ++++++++-- .../generated/WATERBASE/waterbase.jsonld | 12 +- .../water-quality-portal.jsonld | 88 +++++++-- docs/jsonld-validation-plan.md | 7 +- prompts/jsonld-generation-prompt.txt | 7 +- scripts/generate_jsonld.py | 41 +++++ 37 files changed, 1396 insertions(+), 377 deletions(-) diff --git a/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld index 13347f4..73c598a 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld @@ -42,294 +42,378 @@ "name": "Mean Annual Near-Surface Air Temperature", "alternateName": "bio01", "unitText": "°C", - "description": "Mean annual temperature calculated as the average of mean monthly temperatures over the year" + "description": "Mean annual temperature calculated as the average of mean monthly temperatures over the year", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Diurnal Near-Surface Air Temperature Range", "alternateName": "bio02", "unitText": "°C", - "description": "Mean diurnal temperature range computed as the average of monthly daily maximum minus daily minimum near-surface air temperature" + "description": "Mean diurnal temperature range computed as the average of monthly daily maximum minus daily minimum near-surface air temperature", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Isothermality", "alternateName": "bio03", "unitText": "°C", - "description": "Isothermality: 100 × bio02 ÷ bio07; compares day-night variability to annual temperature range" + "description": "Isothermality: 100 × bio02 ÷ bio07; compares day-night variability to annual temperature range", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Temperature Seasonality", "alternateName": "bio04", "unitText": "°C/100", - "description": "Temperature seasonality given by the standard deviation of mean monthly temperatures" + "description": "Temperature seasonality given by the standard deviation of mean monthly temperatures", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Daily Maximum Near-Surface Air Temperature of the Warmest Month", "alternateName": "bio05", "unitText": "°C", - "description": "Highest monthly mean of daily maximum temperatures across the year; indicates peak thermal conditions" + "description": "Highest monthly mean of daily maximum temperatures across the year; indicates peak thermal conditions", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Daily Minimum Near-Surface Air Temperature of the Coldest Month", "alternateName": "bio06", "unitText": "°C", - "description": "Lowest monthly mean of daily minimum temperatures across the year; characterizes winter cold intensity" + "description": "Lowest monthly mean of daily minimum temperatures across the year; characterizes winter cold intensity", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Annual Daily Mean Near-Surface Air Temperature Range", "alternateName": "bio07", "unitText": "°C", - "description": "Annual temperature range calculated as bio05 minus bio06; measures amplitude between warmest and coldest months" + "description": "Annual temperature range calculated as bio05 minus bio06; measures amplitude between warmest and coldest months", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Daily Near-Surface Air Temperature of the Wettest Quarter", "alternateName": "bio08", "unitText": "°C", - "description": "Average monthly mean temperature over the wettest three-month period of the year" + "description": "Average monthly mean temperature over the wettest three-month period of the year", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Daily Near-Surface Air Temperature of the Driest Quarter", "alternateName": "bio09", "unitText": "°C", - "description": "Average monthly mean temperature over the driest three-month period of the year" + "description": "Average monthly mean temperature over the driest three-month period of the year", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Daily Mean Near-Surface Air Temperature of the Warmest Quarter", "alternateName": "bio10", "unitText": "°C", - "description": "Average monthly mean temperature over the warmest three-month period of the year" + "description": "Average monthly mean temperature over the warmest three-month period of the year", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Daily Mean Near-Surface Air Temperature of the Coldest Quarter", "alternateName": "bio11", "unitText": "°C", - "description": "Average monthly mean temperature over the coldest three-month period of the year" + "description": "Average monthly mean temperature over the coldest three-month period of the year", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Annual Precipitation", "alternateName": "bio12", "unitText": "kg m-2 year-1", - "description": "Sum of monthly precipitation totals across the year" + "description": "Sum of monthly precipitation totals across the year", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Precipitation of the Wettest Month", "alternateName": "bio13", "unitText": "kg m-2 month-1", - "description": "Maximum monthly precipitation total" + "description": "Maximum monthly precipitation total", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Precipitation of the Driest Month", "alternateName": "bio14", "unitText": "kg m-2 month-1", - "description": "Minimum monthly precipitation total" + "description": "Minimum monthly precipitation total", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Precipitation Seasonality", "alternateName": "bio15", "unitText": "kg m-2", - "description": "Coefficient of variation: 100 × standard deviation ÷ mean of monthly precipitation totals" + "description": "Coefficient of variation: 100 × standard deviation ÷ mean of monthly precipitation totals", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Monthly Precipitation of the Wettest Quarter", "alternateName": "bio16", "unitText": "kg m-2 month-1", - "description": "Average monthly precipitation during the wettest three-month period of the year" + "description": "Average monthly precipitation during the wettest three-month period of the year", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Monthly Precipitation of the Driest Quarter", "alternateName": "bio17", "unitText": "kg m-2 month-1", - "description": "Average monthly precipitation during the driest three-month period of the year" + "description": "Average monthly precipitation during the driest three-month period of the year", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Monthly Precipitation of the Warmest Quarter", "alternateName": "bio18", "unitText": "kg m-2 month-1", - "description": "Average monthly precipitation during the warmest three-month period of the year" + "description": "Average monthly precipitation during the warmest three-month period of the year", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Monthly Precipitation of the Coldest Quarter", "alternateName": "bio19", "unitText": "kg m-2 month-1", - "description": "Average monthly precipitation during the coldest three-month period of the year" + "description": "Average monthly precipitation during the coldest three-month period of the year", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Frost Change Frequency", "alternateName": "fcf", "unitText": "count", - "description": "Number of freeze-thaw transitions per year." + "description": "Number of freeze-thaw transitions per year.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "First Day of the Growing Season TREELIM", "alternateName": "fgd", "unitText": "julian day", - "description": "Julian day marking the first occurrence of growing season conditions." + "description": "Julian day marking the first occurrence of growing season conditions.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Growing Degree Days Heat Sum above 0 °C", "alternateName": "gdd0", "unitText": "°C", - "description": "Sum of daily mean temperatures above 0 °C accumulated over the year." + "description": "Sum of daily mean temperatures above 0 °C accumulated over the year.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Growing Degree Days Heat Sum above 10 °C", "alternateName": "gdd10", "unitText": "°C", - "description": "Sum of daily mean temperatures above 10 °C accumulated over the year." + "description": "Sum of daily mean temperatures above 10 °C accumulated over the year.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Growing Degree Days Heat Sum above 5 °C", "alternateName": "gdd5", "unitText": "°C", - "description": "Sum of daily mean temperatures above 5 °C accumulated over the year." + "description": "Sum of daily mean temperatures above 5 °C accumulated over the year.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "First Growing Degree Day above 10 °C", "alternateName": "gdgfgd10", "unitText": "julian day", - "description": "Julian day of the first occurrence of a daily mean temperature above 10 °C." + "description": "Julian day of the first occurrence of a daily mean temperature above 10 °C.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "First Growing Degree Day above 5 °C", "alternateName": "gdgfgd5", "unitText": "julian day", - "description": "Julian day of the first occurrence of a daily mean temperature above 5 °C." + "description": "Julian day of the first occurrence of a daily mean temperature above 5 °C.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Growing Season Length", "alternateName": "gsl", "unitText": "days", - "description": "Number of days between the first and last occurrence of growing season conditions." + "description": "Number of days between the first and last occurrence of growing season conditions.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Accumulated Precipitation Amount on Growing Season Days", "alternateName": "gsp", "unitText": "kg m-2 gsl-1", - "description": "Total precipitation accumulated during the growing season period." + "description": "Total precipitation accumulated during the growing season period.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Temperature of Growing Season Days", "alternateName": "gst", "unitText": "°C", - "description": "Average daily mean temperature over all growing season days." + "description": "Average daily mean temperature over all growing season days.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Köppen-Geiger Climate Classification", "alternateName": "kg0", "unitText": "category", - "description": "Köppen-Geiger climate classification." + "description": "Köppen-Geiger climate classification.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Köppen-Geiger Climate Classification without As and Aw Differentiation", "alternateName": "kg1", "unitText": "category", - "description": "Köppen-Geiger climate classification without As and Aw differentiation." + "description": "Köppen-Geiger climate classification without As and Aw differentiation.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Köppen-Geiger Climate Classification after Peel et al. 2007", "alternateName": "kg2", "unitText": "category", - "description": "Köppen-Geiger climate classification after Peel et al. 2007." + "description": "Köppen-Geiger climate classification after Peel et al. 2007.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Climate Classification after Wissmann 1939", "alternateName": "kg3", "unitText": "category", - "description": "Climate classification after Wissmann 1939." + "description": "Climate classification after Wissmann 1939.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Climate Classification after Thornthwaite 1931", "alternateName": "kg4", "unitText": "category", - "description": "Climate classification after Thornthwaite 1931." + "description": "Climate classification after Thornthwaite 1931.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Climate Classification after Troll-Pfaffen", "alternateName": "kg5", "unitText": "category", - "description": "Climate classification after Troll-Pfaffen." + "description": "Climate classification after Troll-Pfaffen.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Last Day of the Growing Season TREELIM", "alternateName": "lgd", "unitText": "julian day", - "description": "Julian day of the last occurrence of growing season conditions." + "description": "Julian day of the last occurrence of growing season conditions.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Number of Growing Degree Days above 0 °C", "alternateName": "ngd0", "unitText": "number of days", - "description": "Total number of days in a year with mean daily temperature above 0 °C." + "description": "Total number of days in a year with mean daily temperature above 0 °C.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Number of Growing Degree Days above 10 °C", "alternateName": "ngd10", "unitText": "number of days", - "description": "Total number of days in a year with mean daily temperature above 10 °C." + "description": "Total number of days in a year with mean daily temperature above 10 °C.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Number of Growing Degree Days above 5 °C", "alternateName": "ngd5", "unitText": "number of days", - "description": "Total number of days in a year with mean daily temperature above 5 °C." + "description": "Total number of days in a year with mean daily temperature above 5 °C.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Net Primary Production on Land as Carbon Mass Flux", "alternateName": "npp", "unitText": "g C m-2 yr-1", - "description": "Net primary production on land expressed as carbon mass flux." + "description": "Net primary production on land expressed as carbon mass flux.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Snow Cover Days", "alternateName": "scd", "unitText": "days", - "description": "Number of days per year with snow cover present at the surface." + "description": "Number of days per year with snow cover present at the surface.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Snow Water Equivalent", "alternateName": "swe", "unitText": "kg m-2 year-1", - "description": "Total water equivalent of snowpack accumulated over the year." + "description": "Total water equivalent of snowpack accumulated over the year.", + "temporalCoverage": "1981-2010/1971-2100", + "spatialCoverage": "not detected" } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld index e5c3982..0ac5a80 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld @@ -38,28 +38,36 @@ "name": "Precipitation", "alternateName": "pr", "unitText": "kg m-2 day-1", - "description": "Precipitation including liquid and solid phases." + "description": "Precipitation including liquid and solid phases.", + "temporalCoverage": "1979-01-01/2013-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Mean Near-Surface Air Temperature", "alternateName": "tas", "unitText": "K", - "description": "Near-surface, usually 2 meter, air temperature." + "description": "Near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1979-01-01/2013-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Maximum Near-Surface Air Temperature", "alternateName": "tasmax", "unitText": "K", - "description": "Maximum near-surface, usually 2 meter, air temperature." + "description": "Maximum near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1979-01-01/2013-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Minimum Near-Surface Air Temperature", "alternateName": "tasmin", "unitText": "K", - "description": "Minimum near-surface, usually 2 meter, air temperature." + "description": "Minimum near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1979-01-01/2013-12-31", + "spatialCoverage": "not detected" } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld index 8078985..38d6af1 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld @@ -38,7 +38,9 @@ "name": "Daily Mean Near-Surface Air Temperature", "alternateName": "tas", "unitText": "K", - "description": "Near-surface, usually 2 meter, air temperature." + "description": "Near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1985-01-01/2015-12-31", + "spatialCoverage": "not detected" } ], "distribution": [ diff --git a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld index c939405..e1d7b5e 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld @@ -38,28 +38,36 @@ "name": "Precipitation", "alternateName": "pr", "unitText": "kg m-2 day-1", - "description": "Precipitation including liquid and solid phases." + "description": "Precipitation including liquid and solid phases.", + "temporalCoverage": "1981-01-01/2010-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Mean Near-Surface Air Temperature", "alternateName": "tas", "unitText": "K", - "description": "Near-surface, usually 2 meter, air temperature." + "description": "Near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1981-01-01/2010-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Maximum Near-Surface Air Temperature", "alternateName": "tasmax", "unitText": "K", - "description": "Maximum near-surface, usually 2 meter, air temperature." + "description": "Maximum near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1981-01-01/2010-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Minimum Near-Surface Air Temperature", "alternateName": "tasmin", "unitText": "K", - "description": "Minimum near-surface, usually 2 meter, air temperature." + "description": "Minimum near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1981-01-01/2010-12-31", + "spatialCoverage": "not detected" } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld index 7c3378a..7511309 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld @@ -38,28 +38,36 @@ "name": "Precipitation", "alternateName": "pr", "unitText": "kg m-2 day-1", - "description": "Precipitation including liquid and solid phases." + "description": "Precipitation including liquid and solid phases.", + "temporalCoverage": "1981-01-01/2022-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Mean Near-Surface Air Temperature", "alternateName": "tas", "unitText": "K", - "description": "Near-surface, usually 2 meter, air temperature." + "description": "Near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1981-01-01/2022-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Maximum Near-Surface Air Temperature", "alternateName": "tasmax", "unitText": "K", - "description": "Maximum near-surface, usually 2 meter, air temperature." + "description": "Maximum near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1981-01-01/2022-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Minimum Near-Surface Air Temperature", "alternateName": "tasmin", "unitText": "K", - "description": "Minimum near-surface, usually 2 meter, air temperature." + "description": "Minimum near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1981-01-01/2022-12-31", + "spatialCoverage": "not detected" } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld index 73d2a34..91d62be 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld @@ -42,77 +42,99 @@ "name": "Total Cloud Cover Percentage", "alternateName": "clt", "unitText": "percent", - "description": "Total cloud area fraction, reported as a percentage, for the whole atmospheric column." + "description": "Total cloud area fraction, reported as a percentage, for the whole atmospheric column.", + "temporalCoverage": "1981-2010/2071-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Monthly Climate Moisture Index", "alternateName": "cmi", "unitText": "kg m-2 month-1", - "description": "Monthly ratio of precipitation to potential evapotranspiration; indicator of climatic water availability." + "description": "Monthly ratio of precipitation to potential evapotranspiration; indicator of climatic water availability.", + "temporalCoverage": "1981-2010/2071-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Near-Surface Relative Humidity", "alternateName": "hurs", "unitText": "percent", - "description": "Relative humidity near the surface." + "description": "Relative humidity near the surface.", + "temporalCoverage": "1981-2010/2071-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Monthly Potential Evapotranspiration", "alternateName": "pet", "unitText": "kg m-2 month-1", - "description": "Total potential evapotranspiration for the month assuming unlimited water availability, calculated using Penman-Monteith." + "description": "Total potential evapotranspiration for the month assuming unlimited water availability, calculated using Penman-Monteith.", + "temporalCoverage": "1981-2010/2071-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Precipitation", "alternateName": "pr", "unitText": "kg m-2 day-1", - "description": "Precipitation including liquid and solid phases." + "description": "Precipitation including liquid and solid phases.", + "temporalCoverage": "1981-2010/2071-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Surface Downwelling Shortwave Flux in Air", "alternateName": "rsds", "unitText": "W m-2", - "description": "Surface solar irradiance for UV calculations." + "description": "Surface solar irradiance for UV calculations.", + "temporalCoverage": "1981-2010/2071-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Near-Surface Wind Speed", "alternateName": "sfcWind", "unitText": "m s-1", - "description": "Near-surface, usually 10 meter, wind speed." + "description": "Near-surface, usually 10 meter, wind speed.", + "temporalCoverage": "1981-2010/2071-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Mean Near-Surface Air Temperature", "alternateName": "tas", "unitText": "K", - "description": "Near-surface, usually 2 meter, air temperature." + "description": "Near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1981-2010/2071-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Maximum Near-Surface Air Temperature", "alternateName": "tasmax", "unitText": "K", - "description": "Maximum near-surface, usually 2 meter, air temperature." + "description": "Maximum near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1981-2010/2071-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Minimum Near-Surface Air Temperature", "alternateName": "tasmin", "unitText": "K", - "description": "Minimum near-surface, usually 2 meter, air temperature." + "description": "Minimum near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1981-2010/2071-2100", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Vapor Pressure Deficit", "alternateName": "vpd", "unitText": "Pa", - "description": "Difference between saturation vapor pressure and actual vapor pressure." + "description": "Difference between saturation vapor pressure and actual vapor pressure.", + "temporalCoverage": "1981-2010/2071-2100", + "spatialCoverage": "not detected" } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld index fe62159..3abf4a3 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld @@ -44,77 +44,99 @@ "name": "Total Cloud Cover Percentage", "alternateName": "clt", "unitText": "percent", - "description": "Total cloud area fraction, reported as a percentage, for the whole atmospheric column." + "description": "Total cloud area fraction, reported as a percentage, for the whole atmospheric column.", + "temporalCoverage": "1979-01-01/2025-08-29", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Near-Surface Relative Humidity", "alternateName": "hurs", "unitText": "percent", - "description": "Relative humidity near the surface." + "description": "Relative humidity near the surface.", + "temporalCoverage": "1979-01-01/2025-08-29", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Precipitation", "alternateName": "pr", "unitText": "kg m-2 day-1", - "description": "Precipitation including liquid and solid phases." + "description": "Precipitation including liquid and solid phases.", + "temporalCoverage": "1979-01-01/2025-08-29", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Precipitation", "alternateName": "prec", "unitText": "kg m-2 day-1", - "description": "Downscaled forecast precipitation from ERA5; not bias corrected and should not be mixed with pr." + "description": "Downscaled forecast precipitation from ERA5; not bias corrected and should not be mixed with pr.", + "temporalCoverage": "1979-01-01/2025-08-29", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Surface Air Pressure", "alternateName": "ps", "unitText": "hPa", - "description": "Surface pressure, not mean sea-level pressure." + "description": "Surface pressure, not mean sea-level pressure.", + "temporalCoverage": "1979-01-01/2025-08-29", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Surface Downwelling Shortwave Flux in Air", "alternateName": "rsds", "unitText": "W m-2", - "description": "Surface solar irradiance for UV calculations." + "description": "Surface solar irradiance for UV calculations.", + "temporalCoverage": "1979-01-01/2025-08-29", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Near-Surface Wind Speed", "alternateName": "sfcWind", "unitText": "m s-1", - "description": "Near-surface, usually 10 meter, wind speed." + "description": "Near-surface, usually 10 meter, wind speed.", + "temporalCoverage": "1979-01-01/2025-08-29", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Mean Near-Surface Air Temperature", "alternateName": "tas", "unitText": "K", - "description": "Near-surface, usually 2 meter, air temperature." + "description": "Near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1979-01-01/2025-08-29", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Maximum Near-Surface Air Temperature", "alternateName": "tasmax", "unitText": "K", - "description": "Maximum near-surface, usually 2 meter, air temperature." + "description": "Maximum near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1979-01-01/2025-08-29", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Minimum Near-Surface Air Temperature", "alternateName": "tasmin", "unitText": "K", - "description": "Minimum near-surface, usually 2 meter, air temperature." + "description": "Minimum near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1979-01-01/2025-08-29", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Air Temperature Lapse Rate", "alternateName": "tz", "unitText": "K m-1", - "description": "Rate of change in air temperature with altitude calculated over the centennial period." + "description": "Rate of change in air temperature with altitude calculated over the centennial period.", + "temporalCoverage": "1979-01-01/2025-08-29", + "spatialCoverage": "not detected" } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld index 1851d39..08c8a6d 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld @@ -42,35 +42,45 @@ "name": "Multiyear Meteorological Drought", "alternateName": "mymd", "unitText": "id", - "description": "Identifier for multiyear meteorological drought events." + "description": "Identifier for multiyear meteorological drought events.", + "temporalCoverage": "1980-06-15/2018-07-15", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Multiyear Meteorological Drought at 10 km Resolution", "alternateName": "mymd10", "unitText": "id", - "description": "Identifier for multiyear meteorological drought events at 10 km resolution." + "description": "Identifier for multiyear meteorological drought events at 10 km resolution.", + "temporalCoverage": "1980-06-15/2018-07-15", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Kernel Normalized Difference Vegetation Index Anomaly", "alternateName": "qkndvi", "unitText": "1", - "description": "Annual anomaly in vegetation greenness derived from kernel normalized difference vegetation index." + "description": "Annual anomaly in vegetation greenness derived from kernel normalized difference vegetation index.", + "temporalCoverage": "1980-06-15/2018-07-15", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Standardized Precipitation Evapotranspiration Index", "alternateName": "spei12", "unitText": "1", - "description": "Standardized climatic water balance index over a 12-month integration period." + "description": "Standardized climatic water balance index over a 12-month integration period.", + "temporalCoverage": "1980-06-15/2018-07-15", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Standardized Precipitation Index", "alternateName": "spi12", "unitText": "1", - "description": "Standardized precipitation anomaly index over a 12-month integration period." + "description": "Standardized precipitation anomaly index over a 12-month integration period.", + "temporalCoverage": "1980-06-15/2018-07-15", + "spatialCoverage": "not detected" } ], "license": "https://creativecommons.org/licenses/by/4.0/", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld index 8b3672c..63b2426 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld @@ -42,91 +42,117 @@ "name": "Total Cloud Cover Percentage", "alternateName": "clt", "unitText": "percent", - "description": "Total cloud area fraction, reported as a percentage, for the whole atmospheric column." + "description": "Total cloud area fraction, reported as a percentage, for the whole atmospheric column.", + "temporalCoverage": "1979-01-15/2021-12-15", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Monthly Climate Moisture Index", "alternateName": "cmi", "unitText": "kg m-2 month-1", - "description": "Monthly ratio of precipitation to potential evapotranspiration; indicator of climatic water availability." + "description": "Monthly ratio of precipitation to potential evapotranspiration; indicator of climatic water availability.", + "temporalCoverage": "1979-01-15/2021-12-15", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Near-Surface Relative Humidity", "alternateName": "hurs", "unitText": "percent", - "description": "Relative humidity near the surface." + "description": "Relative humidity near the surface.", + "temporalCoverage": "1979-01-15/2021-12-15", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Monthly Potential Evapotranspiration", "alternateName": "pet", "unitText": "kg m-2 month-1", - "description": "Total potential evapotranspiration for the month assuming unlimited water availability, calculated using Penman-Monteith." + "description": "Total potential evapotranspiration for the month assuming unlimited water availability, calculated using Penman-Monteith.", + "temporalCoverage": "1979-01-15/2021-12-15", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Precipitation", "alternateName": "pr", "unitText": "kg m-2 month-1", - "description": "Precipitation including liquid and solid phases." + "description": "Precipitation including liquid and solid phases.", + "temporalCoverage": "1979-01-15/2021-12-15", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Surface Downwelling Shortwave Flux in Air", "alternateName": "rsds", "unitText": "W m-2", - "description": "Surface solar irradiance for UV calculations." + "description": "Surface solar irradiance for UV calculations.", + "temporalCoverage": "1979-01-15/2021-12-15", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Near-Surface Wind Speed", "alternateName": "sfcWind", "unitText": "m s-1", - "description": "Near-surface, usually 10 meter, wind speed." + "description": "Near-surface, usually 10 meter, wind speed.", + "temporalCoverage": "1979-01-15/2021-12-15", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Standardized Precipitation Evapotranspiration Index", "alternateName": "spei12", "unitText": "1", - "description": "Standardized climatic water balance index over a 12-month integration period." + "description": "Standardized climatic water balance index over a 12-month integration period.", + "temporalCoverage": "1979-01-15/2021-12-15", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Standardized Precipitation Index", "alternateName": "spi12", "unitText": "1", - "description": "Standardized precipitation anomaly index over a 12-month integration period." + "description": "Standardized precipitation anomaly index over a 12-month integration period.", + "temporalCoverage": "1979-01-15/2021-12-15", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Mean Near-Surface Air Temperature", "alternateName": "tas", "unitText": "K", - "description": "Near-surface, usually 2 meter, air temperature." + "description": "Near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1979-01-15/2021-12-15", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Maximum Near-Surface Air Temperature", "alternateName": "tasmax", "unitText": "K", - "description": "Maximum near-surface, usually 2 meter, air temperature." + "description": "Maximum near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1979-01-15/2021-12-15", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Minimum Near-Surface Air Temperature", "alternateName": "tasmin", "unitText": "K", - "description": "Minimum near-surface, usually 2 meter, air temperature." + "description": "Minimum near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1979-01-15/2021-12-15", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Vapor Pressure Deficit", "alternateName": "vpd", "unitText": "Pa", - "description": "Difference between saturation vapor pressure and actual vapor pressure." + "description": "Difference between saturation vapor pressure and actual vapor pressure.", + "temporalCoverage": "1979-01-15/2021-12-15", + "spatialCoverage": "not detected" } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld index 21f25c5..ceeb4f9 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld @@ -39,28 +39,36 @@ "name": "Precipitation", "alternateName": "pr", "unitText": "kg m-2 day-1", - "description": "Precipitation including liquid and solid phases." + "description": "Precipitation including liquid and solid phases.", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Maximum Near-Surface Air Temperature", "alternateName": "tasmax", "unitText": "K", - "description": "Maximum near-surface, usually 2 meter, air temperature." + "description": "Maximum near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Minimum Near-Surface Air Temperature", "alternateName": "tasmin", "unitText": "K", - "description": "Minimum near-surface, usually 2 meter, air temperature." + "description": "Minimum near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Air Temperature Lapse Rate", "alternateName": "tz", "unitText": "K m-1", - "description": "Rate of change in air temperature with altitude calculated over the centennial period." + "description": "Rate of change in air temperature with altitude calculated over the centennial period.", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld index d162967..fad1782 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld @@ -38,154 +38,198 @@ "name": "Mean Annual Near-Surface Air Temperature", "alternateName": "bio01", "unitText": "K", - "description": "Mean annual temperature calculated as the average of mean monthly temperatures over the year" + "description": "Mean annual temperature calculated as the average of mean monthly temperatures over the year", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Diurnal Near-Surface Air Temperature Range", "alternateName": "bio02", "unitText": "K", - "description": "Mean diurnal temperature range computed as the average of monthly daily maximum minus daily minimum near-surface air temperature" + "description": "Mean diurnal temperature range computed as the average of monthly daily maximum minus daily minimum near-surface air temperature", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Isothermality", "alternateName": "bio03", "unitText": "K", - "description": "Isothermality: 100 × bio02 ÷ bio07; compares day-night variability to annual temperature range" + "description": "Isothermality: 100 × bio02 ÷ bio07; compares day-night variability to annual temperature range", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Temperature Seasonality", "alternateName": "bio04", "unitText": "K", - "description": "Temperature seasonality given by the standard deviation of mean monthly temperatures" + "description": "Temperature seasonality given by the standard deviation of mean monthly temperatures", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Daily Maximum Near-Surface Air Temperature of the Warmest Month", "alternateName": "bio05", "unitText": "K", - "description": "Highest monthly mean of daily maximum temperatures across the year; indicates peak thermal conditions" + "description": "Highest monthly mean of daily maximum temperatures across the year; indicates peak thermal conditions", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Daily Minimum Near-Surface Air Temperature of the Coldest Month", "alternateName": "bio06", "unitText": "K", - "description": "Lowest monthly mean of daily minimum temperatures across the year; characterizes winter cold intensity" + "description": "Lowest monthly mean of daily minimum temperatures across the year; characterizes winter cold intensity", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Annual Daily Mean Near-Surface Air Temperature Range", "alternateName": "bio07", "unitText": "K", - "description": "Annual temperature range calculated as bio05 minus bio06; measures amplitude between warmest and coldest months" + "description": "Annual temperature range calculated as bio05 minus bio06; measures amplitude between warmest and coldest months", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Daily Near-Surface Air Temperature of the Wettest Quarter", "alternateName": "bio08", "unitText": "K", - "description": "Average monthly mean temperature over the wettest three-month period of the year" + "description": "Average monthly mean temperature over the wettest three-month period of the year", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Daily Near-Surface Air Temperature of the Driest Quarter", "alternateName": "bio09", "unitText": "K", - "description": "Average monthly mean temperature over the driest three-month period of the year" + "description": "Average monthly mean temperature over the driest three-month period of the year", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Daily Mean Near-Surface Air Temperature of the Warmest Quarter", "alternateName": "bio10", "unitText": "K", - "description": "Average monthly mean temperature over the warmest three-month period of the year" + "description": "Average monthly mean temperature over the warmest three-month period of the year", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Daily Mean Near-Surface Air Temperature of the Coldest Quarter", "alternateName": "bio11", "unitText": "K", - "description": "Average monthly mean temperature over the coldest three-month period of the year" + "description": "Average monthly mean temperature over the coldest three-month period of the year", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Annual Precipitation", "alternateName": "bio12", "unitText": "kg m-2 year-1", - "description": "Sum of monthly precipitation totals across the year" + "description": "Sum of monthly precipitation totals across the year", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Precipitation of the Wettest Month", "alternateName": "bio13", "unitText": "kg m-2 month-1", - "description": "Maximum monthly precipitation total" + "description": "Maximum monthly precipitation total", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Precipitation of the Driest Month", "alternateName": "bio14", "unitText": "kg m-2 month-1", - "description": "Minimum monthly precipitation total" + "description": "Minimum monthly precipitation total", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Precipitation Seasonality", "alternateName": "bio15", "unitText": "kg m-2", - "description": "Coefficient of variation: 100 × standard deviation ÷ mean of monthly precipitation totals" + "description": "Coefficient of variation: 100 × standard deviation ÷ mean of monthly precipitation totals", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Monthly Precipitation of the Wettest Quarter", "alternateName": "bio16", "unitText": "kg m-2 month-1", - "description": "Average monthly precipitation during the wettest three-month period of the year" + "description": "Average monthly precipitation during the wettest three-month period of the year", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Monthly Precipitation of the Driest Quarter", "alternateName": "bio17", "unitText": "kg m-2 month-1", - "description": "Average monthly precipitation during the driest three-month period of the year" + "description": "Average monthly precipitation during the driest three-month period of the year", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Monthly Precipitation of the Warmest Quarter", "alternateName": "bio18", "unitText": "kg m-2 month-1", - "description": "Average monthly precipitation during the warmest three-month period of the year" + "description": "Average monthly precipitation during the warmest three-month period of the year", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mean Monthly Precipitation of the Coldest Quarter", "alternateName": "bio19", "unitText": "kg m-2 month-1", - "description": "Average monthly precipitation during the coldest three-month period of the year" + "description": "Average monthly precipitation during the coldest three-month period of the year", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Ice Sheet Surface Altitude", "alternateName": "glz", "unitText": "m", - "description": "Elevation of the ice sheet surface above sea level." + "description": "Elevation of the ice sheet surface above sea level.", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Surface Altitude", "alternateName": "orog", "unitText": "m", - "description": "Geometric height of the land surface above the geoid." + "description": "Geometric height of the land surface above the geoid.", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Snow Cover Days", "alternateName": "scd", "unitText": "days", - "description": "Number of days per year with snow cover present at the surface." + "description": "Number of days per year with snow cover present at the surface.", + "temporalCoverage": "21000BP/0BP", + "spatialCoverage": "not detected" } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld index e21ae08..720fb7c 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld @@ -39,35 +39,45 @@ "name": "Precipitation", "alternateName": "pr", "unitText": "kg m-2 day-1", - "description": "Precipitation including liquid and solid phases." + "description": "Precipitation including liquid and solid phases.", + "temporalCoverage": "1979-01-01/2016-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Surface Downwelling Shortwave Flux in Air", "alternateName": "rsds", "unitText": "W m-2", - "description": "Surface solar irradiance for UV calculations." + "description": "Surface solar irradiance for UV calculations.", + "temporalCoverage": "1979-01-01/2016-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Mean Near-Surface Air Temperature", "alternateName": "tas", "unitText": "K", - "description": "Near-surface, usually 2 meter, air temperature." + "description": "Near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1979-01-01/2016-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Maximum Near-Surface Air Temperature", "alternateName": "tasmax", "unitText": "K", - "description": "Maximum near-surface, usually 2 meter, air temperature." + "description": "Maximum near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1979-01-01/2016-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Daily Minimum Near-Surface Air Temperature", "alternateName": "tasmin", "unitText": "K", - "description": "Minimum near-surface, usually 2 meter, air temperature." + "description": "Minimum near-surface, usually 2 meter, air temperature.", + "temporalCoverage": "1979-01-01/2016-12-31", + "spatialCoverage": "not detected" } ], "distribution": [ diff --git a/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld b/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld index 0189211..b5caed1 100644 --- a/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld +++ b/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld @@ -70,62 +70,86 @@ { "@type": "PropertyValue", "name": "Evergreen and Deciduous Needleleaf Trees", - "description": "Consensus prevalence (0–100%)" + "description": "Consensus prevalence (0–100%)", + "temporalCoverage": "1992-01-01/2006-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Evergreen Broadleaf Trees", - "description": "Consensus prevalence (0–100%)" + "description": "Consensus prevalence (0–100%)", + "temporalCoverage": "1992-01-01/2006-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Deciduous Broadleaf Trees", - "description": "Consensus prevalence (0–100%)" + "description": "Consensus prevalence (0–100%)", + "temporalCoverage": "1992-01-01/2006-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Mixed and Other Trees", - "description": "Consensus prevalence (0–100%)" + "description": "Consensus prevalence (0–100%)", + "temporalCoverage": "1992-01-01/2006-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Shrubs", - "description": "Consensus prevalence (0–100%)" + "description": "Consensus prevalence (0–100%)", + "temporalCoverage": "1992-01-01/2006-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Herbaceous Vegetation", - "description": "Consensus prevalence (0–100%)" + "description": "Consensus prevalence (0–100%)", + "temporalCoverage": "1992-01-01/2006-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Cultivated and Managed Vegetation", - "description": "Consensus prevalence (0–100%)" + "description": "Consensus prevalence (0–100%)", + "temporalCoverage": "1992-01-01/2006-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Regularly Flooded Vegetation", - "description": "Consensus prevalence (0–100%)" + "description": "Consensus prevalence (0–100%)", + "temporalCoverage": "1992-01-01/2006-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Urban and Built-Up", - "description": "Consensus prevalence (0–100%)" + "description": "Consensus prevalence (0–100%)", + "temporalCoverage": "1992-01-01/2006-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Snow and Ice", - "description": "Consensus prevalence (0–100%)" + "description": "Consensus prevalence (0–100%)", + "temporalCoverage": "1992-01-01/2006-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Barren", - "description": "Consensus prevalence (0–100%)" + "description": "Consensus prevalence (0–100%)", + "temporalCoverage": "1992-01-01/2006-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Open Water", - "description": "Consensus prevalence (0–100%)" + "description": "Consensus prevalence (0–100%)", + "temporalCoverage": "1992-01-01/2006-12-31", + "spatialCoverage": "not detected" } ], "measurementTechnique": "Integration of multiple global remote sensing-derived land-cover products to estimate per-class consensus prevalence (percent) at 1-km resolution.", diff --git a/data/objects/summoned/generated/FLO1K/flo1k.jsonld b/data/objects/summoned/generated/FLO1K/flo1k.jsonld index 74fd801..48669f5 100644 --- a/data/objects/summoned/generated/FLO1K/flo1k.jsonld +++ b/data/objects/summoned/generated/FLO1K/flo1k.jsonld @@ -25,12 +25,30 @@ "Figshare" ], "creator": [ - { "@type": "Person", "name": "Valerio Barbarossa" }, - { "@type": "Person", "name": "Mark A. J. Huijbregts" }, - { "@type": "Person", "name": "Arthur H. W. Beusen" }, - { "@type": "Person", "name": "Hylke E. Beck" }, - { "@type": "Person", "name": "Henry King" }, - { "@type": "Person", "name": "Aafke M. Schipper" } + { + "@type": "Person", + "name": "Valerio Barbarossa" + }, + { + "@type": "Person", + "name": "Mark A. J. Huijbregts" + }, + { + "@type": "Person", + "name": "Arthur H. W. Beusen" + }, + { + "@type": "Person", + "name": "Hylke E. Beck" + }, + { + "@type": "Person", + "name": "Henry King" + }, + { + "@type": "Person", + "name": "Aafke M. Schipper" + } ], "provider": [ { @@ -62,17 +80,23 @@ { "@type": "PropertyValue", "name": "Mean annual streamflow", - "description": "Mean annual flow for each year (gridded, global, ~1 km)." + "description": "Mean annual flow for each year (gridded, global, ~1 km).", + "temporalCoverage": "1960-01-01/2015-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Maximum annual streamflow (maximum monthly flow per year)", - "description": "Maximum monthly flow within each year (gridded, global, ~1 km)." + "description": "Maximum monthly flow within each year (gridded, global, ~1 km).", + "temporalCoverage": "1960-01-01/2015-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Minimum annual streamflow (minimum monthly flow per year)", - "description": "Minimum monthly flow within each year (gridded, global, ~1 km)." + "description": "Minimum monthly flow within each year (gridded, global, ~1 km).", + "temporalCoverage": "1960-01-01/2015-12-31", + "spatialCoverage": "not detected" } ], "encodingFormat": [ @@ -87,14 +111,18 @@ "name": "Figshare collection landing page", "description": "Dataset collection landing page and downloads hosted on Figshare.", "contentUrl": "https://figshare.com/collections/FLO1K_global_maps_of_mean_maximum_and_minimum_annual_streamflow_at_1_km_resolution_from_1960_through_2015/3890224", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "Persistent identifier (Figshare collection DOI landing page)", "description": "Persistent DOI for the FLO1K Figshare collection.", "contentUrl": "https://doi.org/10.6084/m9.figshare.c.3890224.v1", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] } ], "citation": [ @@ -102,12 +130,30 @@ "@type": "ScholarlyArticle", "name": "FLO1K, global maps of mean, maximum and minimum annual streamflow at 1 km resolution from 1960 through 2015", "author": [ - { "@type": "Person", "name": "Valerio Barbarossa" }, - { "@type": "Person", "name": "Mark A. J. Huijbregts" }, - { "@type": "Person", "name": "Arthur H. W. Beusen" }, - { "@type": "Person", "name": "Hylke E. Beck" }, - { "@type": "Person", "name": "Henry King" }, - { "@type": "Person", "name": "Aafke M. Schipper" } + { + "@type": "Person", + "name": "Valerio Barbarossa" + }, + { + "@type": "Person", + "name": "Mark A. J. Huijbregts" + }, + { + "@type": "Person", + "name": "Arthur H. W. Beusen" + }, + { + "@type": "Person", + "name": "Hylke E. Beck" + }, + { + "@type": "Person", + "name": "Henry King" + }, + { + "@type": "Person", + "name": "Aafke M. Schipper" + } ], "isPartOf": { "@type": "Periodical", @@ -125,11 +171,26 @@ } ], "about": [ - { "@type": "Thing", "name": "Hydrology" }, - { "@type": "Thing", "name": "Streamflow" }, - { "@type": "Thing", "name": "Runoff" }, - { "@type": "Thing", "name": "River discharge" }, - { "@type": "Thing", "name": "Freshwater resources" } + { + "@type": "Thing", + "name": "Hydrology" + }, + { + "@type": "Thing", + "name": "Streamflow" + }, + { + "@type": "Thing", + "name": "Runoff" + }, + { + "@type": "Thing", + "name": "River discharge" + }, + { + "@type": "Thing", + "name": "Freshwater resources" + } ], "sameAs": [ "https://doi.org/10.6084/m9.figshare.c.3890224.v1" diff --git a/data/objects/summoned/generated/G-RUN/g-run.jsonld b/data/objects/summoned/generated/G-RUN/g-run.jsonld index a371387..1e27d75 100644 --- a/data/objects/summoned/generated/G-RUN/g-run.jsonld +++ b/data/objects/summoned/generated/G-RUN/g-run.jsonld @@ -91,7 +91,9 @@ { "@type": "PropertyValue", "name": "Runoff", - "description": "Monthly gridded runoff rates reconstructed using machine learning trained on in-situ streamflow observations." + "description": "Monthly gridded runoff rates reconstructed using machine learning trained on in-situ streamflow observations.", + "temporalCoverage": "1902-01-01/2014-12-31", + "spatialCoverage": "not detected" } ], "measurementTechnique": [ @@ -110,14 +112,18 @@ "name": "Figshare dataset landing page", "description": "Dataset landing page and downloads hosted on Figshare.", "contentUrl": "https://figshare.com/articles/dataset/GRUN_Global_Runoff_Reconstruction/9228176", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "Persistent identifier (Figshare DOI landing page)", "description": "Persistent DOI for the GRUN dataset on Figshare.", "contentUrl": "https://doi.org/10.6084/m9.figshare.9228176", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] } ], "citation": [ @@ -125,10 +131,22 @@ "@type": "ScholarlyArticle", "name": "GRUN: an observation-based global gridded runoff dataset from 1902 to 2014", "author": [ - { "@type": "Person", "name": "Gionata Ghiggi" }, - { "@type": "Person", "name": "Vincent Humphrey" }, - { "@type": "Person", "name": "Sonia I. Seneviratne" }, - { "@type": "Person", "name": "Lukas Gudmundsson" } + { + "@type": "Person", + "name": "Gionata Ghiggi" + }, + { + "@type": "Person", + "name": "Vincent Humphrey" + }, + { + "@type": "Person", + "name": "Sonia I. Seneviratne" + }, + { + "@type": "Person", + "name": "Lukas Gudmundsson" + } ], "isPartOf": { "@type": "Periodical", @@ -146,12 +164,30 @@ } ], "about": [ - { "@type": "Thing", "name": "Hydrology" }, - { "@type": "Thing", "name": "Runoff" }, - { "@type": "Thing", "name": "Streamflow" }, - { "@type": "Thing", "name": "Hydroclimate variability" }, - { "@type": "Thing", "name": "Drought" }, - { "@type": "Thing", "name": "Freshwater resources" } + { + "@type": "Thing", + "name": "Hydrology" + }, + { + "@type": "Thing", + "name": "Runoff" + }, + { + "@type": "Thing", + "name": "Streamflow" + }, + { + "@type": "Thing", + "name": "Hydroclimate variability" + }, + { + "@type": "Thing", + "name": "Drought" + }, + { + "@type": "Thing", + "name": "Freshwater resources" + } ], "sameAs": [ "https://doi.org/10.6084/m9.figshare.9228176" diff --git a/data/objects/summoned/generated/GFC/gfc.jsonld b/data/objects/summoned/generated/GFC/gfc.jsonld index 376d147..5d526a1 100644 --- a/data/objects/summoned/generated/GFC/gfc.jsonld +++ b/data/objects/summoned/generated/GFC/gfc.jsonld @@ -122,49 +122,63 @@ "name": "Tree Canopy Cover in 2000", "alternateName": "treecover2000", "unitText": "percent", - "description": "Tree canopy cover for year 2000, defined as canopy closure for vegetation taller than 5 m." + "description": "Tree canopy cover for year 2000, defined as canopy closure for vegetation taller than 5 m.", + "temporalCoverage": "2000-01-01/2023-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Forest Cover Gain", "alternateName": "gain", "unitText": "binary", - "description": "Forest gain during 2000-2012." + "description": "Forest gain during 2000-2012.", + "temporalCoverage": "2000-01-01/2023-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Year of Gross Forest Cover Loss", "alternateName": "lossyear", "unitText": "year code", - "description": "Year of gross forest cover loss event; 0 indicates no loss and 1-23 correspond primarily to 2001-2023." + "description": "Year of gross forest cover loss event; 0 indicates no loss and 1-23 correspond primarily to 2001-2023.", + "temporalCoverage": "2000-01-01/2023-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Data Mask", "alternateName": "datamask", "unitText": "class code", - "description": "Data mask classes for no data, mapped land surface, and persistent water bodies." + "description": "Data mask classes for no data, mapped land surface, and persistent water bodies.", + "temporalCoverage": "2000-01-01/2023-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "First Reference Landsat Composite", "alternateName": "first", "unitText": "digital number", - "description": "Circa year-2000 Landsat cloud-free composite in red, NIR, SWIR1, and SWIR2 bands." + "description": "Circa year-2000 Landsat cloud-free composite in red, NIR, SWIR1, and SWIR2 bands.", + "temporalCoverage": "2000-01-01/2023-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Last Reference Landsat Composite", "alternateName": "last", "unitText": "digital number", - "description": "Circa year-2023 Landsat cloud-free composite in red, NIR, SWIR1, and SWIR2 bands." + "description": "Circa year-2023 Landsat cloud-free composite in red, NIR, SWIR1, and SWIR2 bands.", + "temporalCoverage": "2000-01-01/2023-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Gross Forest Cover Loss", "alternateName": "loss", "unitText": "derived binary", - "description": "Derived indicator for pixels where lossyear is greater than 0." + "description": "Derived indicator for pixels where lossyear is greater than 0.", + "temporalCoverage": "2000-01-01/2023-12-31", + "spatialCoverage": "not detected" } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/GHSL/ghsl.jsonld b/data/objects/summoned/generated/GHSL/ghsl.jsonld index 9413b26..cdf8899 100644 --- a/data/objects/summoned/generated/GHSL/ghsl.jsonld +++ b/data/objects/summoned/generated/GHSL/ghsl.jsonld @@ -61,27 +61,37 @@ { "@type": "PropertyValue", "name": "Built-up surface", - "description": "Gridded built-up surface area (including total and non-residential components for some products)." + "description": "Gridded built-up surface area (including total and non-residential components for some products).", + "temporalCoverage": "1975-01-01/2030-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Built-up volume", - "description": "Gridded built-up volume estimates (including total and non-residential components for some products)." + "description": "Gridded built-up volume estimates (including total and non-residential components for some products).", + "temporalCoverage": "1975-01-01/2030-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Building height", - "description": "Gridded building height estimates for selected reference years/products." + "description": "Gridded building height estimates for selected reference years/products.", + "temporalCoverage": "1975-01-01/2030-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Population", - "description": "Residential population grid (number of people per cell) for multiple epochs and projections." + "description": "Residential population grid (number of people per cell) for multiple epochs and projections.", + "temporalCoverage": "1975-01-01/2030-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Settlement Typology and Degree of Urbanisation", - "description": "Settlement model classification based on the UN-recommended Degree of Urbanisation methodology." + "description": "Settlement model classification based on the UN-recommended Degree of Urbanisation methodology.", + "temporalCoverage": "1975-01-01/2030-12-31", + "spatialCoverage": "not detected" } ], "encodingFormat": [ @@ -99,28 +109,36 @@ "name": "GHSL direct download (advanced users)", "description": "Direct download interface for GHSL products (tile-based and single-file downloads).", "contentUrl": "https://human-settlement.emergency.copernicus.eu/download.php", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "GHSL download wizard (step-by-step)", "description": "Interactive wizard to select GHSL product group, dataset, epoch, resolution, and coordinate system for download.", "contentUrl": "https://human-settlement.emergency.copernicus.eu/downloadWizard.php", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "Use conditions and how to cite", "description": "GHSL use conditions and citation guidance, including license and reference publication pointers.", "contentUrl": "https://human-settlement.emergency.copernicus.eu/GHSLhowToCite.php", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "JRC GHSL collection landing page", "description": "European Commission JRC collection page aggregating GHSL datasets and releases.", "contentUrl": "https://data.jrc.ec.europa.eu/collection/ghsl", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] } ], "citation": [ @@ -128,13 +146,34 @@ "@type": "ScholarlyArticle", "name": "Advances on the Global Human Settlement Layer by joint assessment of Earth Observation and population survey data", "author": [ - { "@type": "Person", "name": "M. Pesaresi" }, - { "@type": "Person", "name": "M. Schiavina" }, - { "@type": "Person", "name": "P. Politis" }, - { "@type": "Person", "name": "S. Freire" }, - { "@type": "Person", "name": "K. Krasnodębska" }, - { "@type": "Person", "name": "J. H. Uhl" }, - { "@type": "Person", "name": "T. Kemper" } + { + "@type": "Person", + "name": "M. Pesaresi" + }, + { + "@type": "Person", + "name": "M. Schiavina" + }, + { + "@type": "Person", + "name": "P. Politis" + }, + { + "@type": "Person", + "name": "S. Freire" + }, + { + "@type": "Person", + "name": "K. Krasnodębska" + }, + { + "@type": "Person", + "name": "J. H. Uhl" + }, + { + "@type": "Person", + "name": "T. Kemper" + } ], "isPartOf": { "@type": "Periodical", @@ -145,11 +184,26 @@ } ], "about": [ - { "@type": "Thing", "name": "Human settlements" }, - { "@type": "Thing", "name": "Built environment" }, - { "@type": "Thing", "name": "Population distribution" }, - { "@type": "Thing", "name": "Urbanization" }, - { "@type": "Thing", "name": "Remote sensing" } + { + "@type": "Thing", + "name": "Human settlements" + }, + { + "@type": "Thing", + "name": "Built environment" + }, + { + "@type": "Thing", + "name": "Population distribution" + }, + { + "@type": "Thing", + "name": "Urbanization" + }, + { + "@type": "Thing", + "name": "Remote sensing" + } ], "sameAs": [ "https://human-settlement.emergency.copernicus.eu/" diff --git a/data/objects/summoned/generated/GPP_MOD17/gpp_mod17.jsonld b/data/objects/summoned/generated/GPP_MOD17/gpp_mod17.jsonld index ce06e08..9280c68 100644 --- a/data/objects/summoned/generated/GPP_MOD17/gpp_mod17.jsonld +++ b/data/objects/summoned/generated/GPP_MOD17/gpp_mod17.jsonld @@ -43,12 +43,16 @@ { "@type": "PropertyValue", "name": "GPP", - "description": "Gross Primary Production" + "description": "Gross Primary Production", + "temporalCoverage": "2000-01-01/2013-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "NPP", - "description": "Net Primary Production" + "description": "Net Primary Production", + "temporalCoverage": "2000-01-01/2013-12-31", + "spatialCoverage": "not detected" } ], "spatialCoverage": { diff --git a/data/objects/summoned/generated/GRACE-REC/grace-rec.jsonld b/data/objects/summoned/generated/GRACE-REC/grace-rec.jsonld index 6b7478a..6045353 100644 --- a/data/objects/summoned/generated/GRACE-REC/grace-rec.jsonld +++ b/data/objects/summoned/generated/GRACE-REC/grace-rec.jsonld @@ -79,12 +79,16 @@ { "@type": "PropertyValue", "name": "Terrestrial water storage anomaly", - "description": "Reconstructed climate-driven terrestrial water storage anomalies (TWSA) derived from statistical modeling calibrated to GRACE observations." + "description": "Reconstructed climate-driven terrestrial water storage anomalies (TWSA) derived from statistical modeling calibrated to GRACE observations.", + "temporalCoverage": "1901-01-01/2019-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Ensemble members", - "description": "Multiple ensemble realizations to quantify predictive uncertainty for reconstructed TWS anomalies." + "description": "Multiple ensemble realizations to quantify predictive uncertainty for reconstructed TWS anomalies.", + "temporalCoverage": "1901-01-01/2019-12-31", + "spatialCoverage": "not detected" } ], "measurementTechnique": [ @@ -103,14 +107,18 @@ "name": "Figshare dataset landing page", "description": "Dataset landing page and downloads hosted on Figshare.", "contentUrl": "https://figshare.com/articles/dataset/GRACE-REC_A_reconstruction_of_climate-driven_water_storage_changes_over_the_last_century/7670849", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "Figshare DOI landing page", "description": "Persistent DOI landing page for the dataset on Figshare.", "contentUrl": "https://doi.org/10.6084/m9.figshare.7670849", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] } ], "citation": [ @@ -118,8 +126,14 @@ "@type": "ScholarlyArticle", "name": "GRACE-REC: a reconstruction of climate-driven water storage changes over the last century", "author": [ - { "@type": "Person", "name": "Vincent Humphrey" }, - { "@type": "Person", "name": "Lukas Gudmundsson" } + { + "@type": "Person", + "name": "Vincent Humphrey" + }, + { + "@type": "Person", + "name": "Lukas Gudmundsson" + } ], "isPartOf": { "@type": "Periodical", @@ -137,12 +151,30 @@ } ], "about": [ - { "@type": "Thing", "name": "Hydrology" }, - { "@type": "Thing", "name": "Terrestrial water storage" }, - { "@type": "Thing", "name": "Climate variability" }, - { "@type": "Thing", "name": "Drought" }, - { "@type": "Thing", "name": "Floods" }, - { "@type": "Thing", "name": "GRACE satellite gravimetry" } + { + "@type": "Thing", + "name": "Hydrology" + }, + { + "@type": "Thing", + "name": "Terrestrial water storage" + }, + { + "@type": "Thing", + "name": "Climate variability" + }, + { + "@type": "Thing", + "name": "Drought" + }, + { + "@type": "Thing", + "name": "Floods" + }, + { + "@type": "Thing", + "name": "GRACE satellite gravimetry" + } ], "sameAs": [ "https://doi.org/10.6084/m9.figshare.7670849" diff --git a/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/global-multi-layer-soil-moisture.jsonld b/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/global-multi-layer-soil-moisture.jsonld index 29ba1ec..3d21e40 100644 --- a/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/global-multi-layer-soil-moisture.jsonld +++ b/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/global-multi-layer-soil-moisture.jsonld @@ -72,22 +72,30 @@ { "@type": "PropertyValue", "name": "Soil moisture (0–10 cm)", - "description": "Monthly soil moisture for the 0–10 cm layer." + "description": "Monthly soil moisture for the 0–10 cm layer.", + "temporalCoverage": "1970-01-01/2016-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Soil moisture (10–30 cm)", - "description": "Monthly soil moisture for the 10–30 cm layer." + "description": "Monthly soil moisture for the 10–30 cm layer.", + "temporalCoverage": "1970-01-01/2016-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Soil moisture (30–50 cm)", - "description": "Monthly soil moisture for the 30–50 cm layer." + "description": "Monthly soil moisture for the 30–50 cm layer.", + "temporalCoverage": "1970-01-01/2016-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Soil moisture (50–100 cm)", - "description": "Monthly soil moisture for the 50–100 cm layer." + "description": "Monthly soil moisture for the 50–100 cm layer.", + "temporalCoverage": "1970-01-01/2016-12-31", + "spatialCoverage": "not detected" } ], "encodingFormat": [ @@ -102,14 +110,18 @@ "name": "Figshare dataset landing page (DOI)", "description": "Persistent DOI landing page for the dataset on Figshare.", "contentUrl": "https://doi.org/10.6084/m9.figshare.13661312.v1", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "Figshare dataset file access (version 1, file link)", "description": "Direct access link to a dataset file hosted on Figshare (may require browser access controls).", "contentUrl": "https://figshare.com/articles/dataset/Global_Multi-layer_Soil_Moisture_Products/13661312/1?file=26220602", - "encodingFormat": ["application/zip"] + "encodingFormat": [ + "application/zip" + ] } ], "citation": [ @@ -117,8 +129,14 @@ "@type": "ScholarlyArticle", "name": "Development of observation-based global multilayer soil moisture products for 1970 to 2016", "author": [ - { "@type": "Person", "name": "Yaoping Wang" }, - { "@type": "Person", "name": "Jiafu Mao" } + { + "@type": "Person", + "name": "Yaoping Wang" + }, + { + "@type": "Person", + "name": "Jiafu Mao" + } ], "isPartOf": { "@type": "Periodical", @@ -136,11 +154,26 @@ } ], "about": [ - { "@type": "Thing", "name": "Soil" }, - { "@type": "Thing", "name": "Soil moisture" }, - { "@type": "Thing", "name": "Hydrology" }, - { "@type": "Thing", "name": "Climate" }, - { "@type": "Thing", "name": "Land surface processes" } + { + "@type": "Thing", + "name": "Soil" + }, + { + "@type": "Thing", + "name": "Soil moisture" + }, + { + "@type": "Thing", + "name": "Hydrology" + }, + { + "@type": "Thing", + "name": "Climate" + }, + { + "@type": "Thing", + "name": "Land surface processes" + } ], "sameAs": [ "https://doi.org/10.6084/m9.figshare.13661312.v1" diff --git a/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld b/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld index 0d386d3..9cc20ef 100644 --- a/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld +++ b/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld @@ -94,12 +94,16 @@ { "@type": "PropertyValue", "name": "Tree density (biome-level model)", - "description": "Biome-level regression model predictions applied at biome scale." + "description": "Biome-level regression model predictions applied at biome scale.", + "temporalCoverage": "2015-09-02", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Tree density (ecoregion-level model)", - "description": "Ecoregion-level regression model predictions applied at ecoregion scale." + "description": "Ecoregion-level regression model predictions applied at ecoregion scale.", + "temporalCoverage": "2015-09-02", + "spatialCoverage": "not detected" } ], "measurementTechnique": [ @@ -213,4 +217,4 @@ "name": "Global environmental mapping" } ] -} \ No newline at end of file +} diff --git a/data/objects/summoned/generated/HydroSHEDS/hydrosheds.jsonld b/data/objects/summoned/generated/HydroSHEDS/hydrosheds.jsonld index 0397515..0954cc1 100644 --- a/data/objects/summoned/generated/HydroSHEDS/hydrosheds.jsonld +++ b/data/objects/summoned/generated/HydroSHEDS/hydrosheds.jsonld @@ -63,47 +63,65 @@ { "@type": "PropertyValue", "name": "Catchment and Sub-Basin Boundaries", - "description": "Vector catchment and sub-basin boundary products derived from HydroSHEDS hydrography." + "description": "Vector catchment and sub-basin boundary products derived from HydroSHEDS hydrography.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "River networks", - "description": "Vector river/stream network products derived from HydroSHEDS hydrography." + "description": "Vector river/stream network products derived from HydroSHEDS hydrography.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Lakes and water bodies", - "description": "Lake and water body products distributed as part of the HydroSHEDS product suite." + "description": "Lake and water body products distributed as part of the HydroSHEDS product suite.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Void-filled DEM", - "description": "Digital elevation model underpinning HydroSHEDS core layers." + "description": "Digital elevation model underpinning HydroSHEDS core layers.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Conditioned DEM", - "description": "Hydrologically conditioned DEM used to derive flow products." + "description": "Hydrologically conditioned DEM used to derive flow products.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Flow direction", - "description": "Drainage direction grid derived from the conditioned DEM." + "description": "Drainage direction grid derived from the conditioned DEM.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Flow accumulation", - "description": "Upstream contributing area / upstream cell count derived from flow direction." + "description": "Upstream contributing area / upstream cell count derived from flow direction.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Flow length", - "description": "Upstream and/or downstream flow length derived from flow direction." + "description": "Upstream and/or downstream flow length derived from flow direction.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Land mask and sinks", - "description": "Land/ocean mask and coastal/inland sink indicators used in HydroSHEDS processing." + "description": "Land/ocean mask and coastal/inland sink indicators used in HydroSHEDS processing.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" } ], "encodingFormat": [ @@ -120,28 +138,36 @@ "name": "HydroSHEDS website (overview)", "description": "HydroSHEDS main website with product overview and navigation to downloads.", "contentUrl": "https://www.hydrosheds.org/", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "HydroSHEDS core data downloads (GeoTIFF)", "description": "Download page for HydroSHEDS core raster layers (e.g., DEM, conditioned DEM, flow direction, flow accumulation, flow length, land mask) in multiple resolutions, provided as GeoTIFF tiles and regional/global bundles.", "contentUrl": "https://www.hydrosheds.org/hydrosheds-core-downloads", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "HydroSHEDS products index", "description": "Products landing page providing access to HydroSHEDS datasets (core layers and derived hydrographic products).", "contentUrl": "https://www.hydrosheds.org/products", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "HydroSHEDS technical documentation and license agreement", "description": "Technical documentation for HydroSHEDS v1 including the HydroSHEDS v1 license agreement (Appendix A).", "contentUrl": "https://data.hydrosheds.org/file/technical-documentation/HydroSHEDS_TechDoc_v1_4.pdf", - "encodingFormat": ["application/pdf"] + "encodingFormat": [ + "application/pdf" + ] } ], "measurementTechnique": [ @@ -154,9 +180,18 @@ "@type": "ScholarlyArticle", "name": "New global hydrography derived from spaceborne elevation data", "author": [ - { "@type": "Person", "name": "B. Lehner" }, - { "@type": "Person", "name": "K. Verdin" }, - { "@type": "Person", "name": "A. Jarvis" } + { + "@type": "Person", + "name": "B. Lehner" + }, + { + "@type": "Person", + "name": "K. Verdin" + }, + { + "@type": "Person", + "name": "A. Jarvis" + } ], "isPartOf": { "@type": "Periodical", @@ -174,13 +209,34 @@ } ], "about": [ - { "@type": "Thing", "name": "Hydrology" }, - { "@type": "Thing", "name": "Hydrography" }, - { "@type": "Thing", "name": "Watersheds" }, - { "@type": "Thing", "name": "River networks" }, - { "@type": "Thing", "name": "Catchments" }, - { "@type": "Thing", "name": "Digital elevation models" }, - { "@type": "Thing", "name": "Freshwater conservation" } + { + "@type": "Thing", + "name": "Hydrology" + }, + { + "@type": "Thing", + "name": "Hydrography" + }, + { + "@type": "Thing", + "name": "Watersheds" + }, + { + "@type": "Thing", + "name": "River networks" + }, + { + "@type": "Thing", + "name": "Catchments" + }, + { + "@type": "Thing", + "name": "Digital elevation models" + }, + { + "@type": "Thing", + "name": "Freshwater conservation" + } ], "sameAs": [ "https://www.worldwildlife.org/our-work/science/hydrosheds/" diff --git a/data/objects/summoned/generated/Hydrography90m/hydrography90m.jsonld b/data/objects/summoned/generated/Hydrography90m/hydrography90m.jsonld index 51395b7..8aec239 100644 --- a/data/objects/summoned/generated/Hydrography90m/hydrography90m.jsonld +++ b/data/objects/summoned/generated/Hydrography90m/hydrography90m.jsonld @@ -28,14 +28,38 @@ "global" ], "creator": [ - { "@type": "Person", "name": "Giuseppe Amatulli" }, - { "@type": "Person", "name": "Jaime R. Garcia Marquez" }, - { "@type": "Person", "name": "Tushar Sethi" }, - { "@type": "Person", "name": "Jens Kiesel" }, - { "@type": "Person", "name": "Afroditi Grigoropoulou" }, - { "@type": "Person", "name": "Maria M. Üblacker" }, - { "@type": "Person", "name": "Longzhu Q. Shen" }, - { "@type": "Person", "name": "Sami Domisch" } + { + "@type": "Person", + "name": "Giuseppe Amatulli" + }, + { + "@type": "Person", + "name": "Jaime R. Garcia Marquez" + }, + { + "@type": "Person", + "name": "Tushar Sethi" + }, + { + "@type": "Person", + "name": "Jens Kiesel" + }, + { + "@type": "Person", + "name": "Afroditi Grigoropoulou" + }, + { + "@type": "Person", + "name": "Maria M. Üblacker" + }, + { + "@type": "Person", + "name": "Longzhu Q. Shen" + }, + { + "@type": "Person", + "name": "Sami Domisch" + } ], "provider": [ { @@ -74,37 +98,51 @@ { "@type": "PropertyValue", "name": "Stream channels", - "description": "Global stream channel network with unique segment identifiers and topology attributes." + "description": "Global stream channel network with unique segment identifiers and topology attributes.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Drainage basins", - "description": "Global drainage basin delineations derived from flow routing." + "description": "Global drainage basin delineations derived from flow routing.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Sub-catchments", - "description": "Sub-catchment polygons linked to individual stream segments." + "description": "Sub-catchment polygons linked to individual stream segments.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Network topology", - "description": "Upstream/downstream connectivity and routing attributes for stream segments." + "description": "Upstream/downstream connectivity and routing attributes for stream segments.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Stream order", - "description": "Stream order metrics computed for the network." + "description": "Stream order metrics computed for the network.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Stream slope", - "description": "Slope metrics computed along stream segments." + "description": "Slope metrics computed along stream segments.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Distance metrics", - "description": "In-stream and among-stream distance measures for network analysis." + "description": "In-stream and among-stream distance measures for network analysis.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" } ], "measurementTechnique": [ @@ -126,28 +164,36 @@ "name": "Hydrography90m website (project landing page)", "description": "Project website for Hydrography90m with documentation and navigation to layers and downloads.", "contentUrl": "https://hydrography.org/", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "Hydrography90m layers overview", "description": "Overview of Hydrography90m layers available for download.", "contentUrl": "https://hydrography.org/hydrography90m/hydrography90m_layers", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "Hydrography90m batch download script", "description": "Scripted procedure to download tiled raster and vector layers of Hydrography90m.", "contentUrl": "https://hydrography.org/hydrography90m/hydrography90m_download_script", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "IGB FRED dataset landing page (DOI)", "description": "Institutional dataset record and distribution entry for Hydrography90m hosted by IGB (FRED).", "contentUrl": "https://doi.org/10.18728/igb-fred-762.1", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] } ], "citation": [ @@ -155,14 +201,38 @@ "@type": "ScholarlyArticle", "name": "Hydrography90m: A new high-resolution global hydrographic dataset", "author": [ - { "@type": "Person", "name": "Giuseppe Amatulli" }, - { "@type": "Person", "name": "Jaime R. Garcia Marquez" }, - { "@type": "Person", "name": "Tushar Sethi" }, - { "@type": "Person", "name": "Jens Kiesel" }, - { "@type": "Person", "name": "Afroditi Grigoropoulou" }, - { "@type": "Person", "name": "Maria M. Üblacker" }, - { "@type": "Person", "name": "Longzhu Q. Shen" }, - { "@type": "Person", "name": "Sami Domisch" } + { + "@type": "Person", + "name": "Giuseppe Amatulli" + }, + { + "@type": "Person", + "name": "Jaime R. Garcia Marquez" + }, + { + "@type": "Person", + "name": "Tushar Sethi" + }, + { + "@type": "Person", + "name": "Jens Kiesel" + }, + { + "@type": "Person", + "name": "Afroditi Grigoropoulou" + }, + { + "@type": "Person", + "name": "Maria M. Üblacker" + }, + { + "@type": "Person", + "name": "Longzhu Q. Shen" + }, + { + "@type": "Person", + "name": "Sami Domisch" + } ], "isPartOf": { "@type": "Periodical", @@ -180,12 +250,30 @@ } ], "about": [ - { "@type": "Thing", "name": "Hydrology" }, - { "@type": "Thing", "name": "Hydrography" }, - { "@type": "Thing", "name": "Drainage basins" }, - { "@type": "Thing", "name": "River networks" }, - { "@type": "Thing", "name": "Flow routing" }, - { "@type": "Thing", "name": "Global environmental mapping" } + { + "@type": "Thing", + "name": "Hydrology" + }, + { + "@type": "Thing", + "name": "Hydrography" + }, + { + "@type": "Thing", + "name": "Drainage basins" + }, + { + "@type": "Thing", + "name": "River networks" + }, + { + "@type": "Thing", + "name": "Flow routing" + }, + { + "@type": "Thing", + "name": "Global environmental mapping" + } ], "sameAs": [ "https://www.igb-berlin.de/en/hydrography90m-dataset", diff --git a/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld b/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld index 051a634..6116161 100644 --- a/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld +++ b/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld @@ -69,7 +69,9 @@ "@type": "PropertyValue", "name": "Elevation", "unitText": "meter", - "description": "Terrain elevation in meters referenced to WGS84 and the EGM96 geoid." + "description": "Terrain elevation in meters referenced to WGS84 and the EGM96 geoid.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld b/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld index c53f59b..bfaac60 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld @@ -44,27 +44,39 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "Land Cover" + "name": "Land Cover", + "temporalCoverage": "1985-01-01/2024-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Land Cover Change" + "name": "Land Cover Change", + "temporalCoverage": "1985-01-01/2024-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Land Cover Confidence" + "name": "Land Cover Confidence", + "temporalCoverage": "1985-01-01/2024-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Fractional Impervious Surface" + "name": "Fractional Impervious Surface", + "temporalCoverage": "1985-01-01/2024-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Impervious Descriptor" + "name": "Impervious Descriptor", + "temporalCoverage": "1985-01-01/2024-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Spectral Change Day of Year" + "name": "Spectral Change Day of Year", + "temporalCoverage": "1985-01-01/2024-12-31", + "spatialCoverage": "not detected" } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld b/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld index 735095b..8785233 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld @@ -43,27 +43,39 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "EAG fractional cover (multiple species group)" + "name": "EAG fractional cover (multiple species group)", + "temporalCoverage": "2016-01-01/2025-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Cheatgrass (Bromus tectorum) fractional cover" + "name": "Cheatgrass (Bromus tectorum) fractional cover", + "temporalCoverage": "2016-01-01/2025-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Other Bromus spp. fractional cover (Field brome + Japanese brome)" + "name": "Other Bromus spp. fractional cover (Field brome + Japanese brome)", + "temporalCoverage": "2016-01-01/2025-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Medusahead (Taeniatherum caput-medusae) fractional cover" + "name": "Medusahead (Taeniatherum caput-medusae) fractional cover", + "temporalCoverage": "2016-01-01/2025-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Sandberg bluegrass (Poa secunda) fractional cover" + "name": "Sandberg bluegrass (Poa secunda) fractional cover", + "temporalCoverage": "2016-01-01/2025-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Confidence maps (per target)" + "name": "Confidence maps (per target)", + "temporalCoverage": "2016-01-01/2025-12-31", + "spatialCoverage": "not detected" } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld b/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld index 9442aad..a15f489 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld @@ -42,19 +42,27 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "Land cover class (16-class legend)" + "name": "Land cover class (16-class legend)", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Land cover change (varies by product)" + "name": "Land cover change (varies by product)", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Percent impervious surface (selected releases)" + "name": "Percent impervious surface (selected releases)", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Tree canopy cover (selected releases)" + "name": "Tree canopy cover (selected releases)", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld b/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld index 1106ccb..acc7836 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld @@ -58,7 +58,9 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "Land cover class" + "name": "Land cover class", + "temporalCoverage": "2019-01-01/2021-12-31", + "spatialCoverage": "not detected" } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld b/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld index 9a19d35..7dff542 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld @@ -43,27 +43,39 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "Bare ground cover (percent)" + "name": "Bare ground cover (percent)", + "temporalCoverage": "1985-01-01/2024-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Herbaceous cover (percent)" + "name": "Herbaceous cover (percent)", + "temporalCoverage": "1985-01-01/2024-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Litter cover (percent)" + "name": "Litter cover (percent)", + "temporalCoverage": "1985-01-01/2024-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Shrub cover (percent)" + "name": "Shrub cover (percent)", + "temporalCoverage": "1985-01-01/2024-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Sagebrush cover (percent)" + "name": "Sagebrush cover (percent)", + "temporalCoverage": "1985-01-01/2024-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Tree cover (percent)" + "name": "Tree cover (percent)", + "temporalCoverage": "1985-01-01/2024-12-31", + "spatialCoverage": "not detected" } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/Shale_Network/shale-network.jsonld b/data/objects/summoned/generated/Shale_Network/shale-network.jsonld index cea03f1..f9a1da0 100644 --- a/data/objects/summoned/generated/Shale_Network/shale-network.jsonld +++ b/data/objects/summoned/generated/Shale_Network/shale-network.jsonld @@ -65,27 +65,37 @@ { "@type": "PropertyValue", "name": "Common water quality measurements", - "description": "Common field and laboratory measurements such as pH, major ions (Na, K, Mg, Ca, sulfate, chloride, bromide), nutrients (ammonium, nitrate, nitrite, total N), alkalinity/acidity, hardness, TDS, and related parameters." + "description": "Common field and laboratory measurements such as pH, major ions (Na, K, Mg, Ca, sulfate, chloride, bromide), nutrients (ammonium, nitrate, nitrite, total N), alkalinity/acidity, hardness, TDS, and related parameters.", + "temporalCoverage": "2011-01-01/..", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Trace elements", - "description": "Trace element concentrations such as Al, As, Ba, B, Cd, Cr, Co, Cu, Fe, Pb, Li, Mn, Hg, Mo, Ni, Se, Ag, Sr, Th, U, Zn (availability varies by site and dataset)." + "description": "Trace element concentrations such as Al, As, Ba, B, Cd, Cr, Co, Cu, Fe, Pb, Li, Mn, Hg, Mo, Ni, Se, Ag, Sr, Th, U, Zn (availability varies by site and dataset).", + "temporalCoverage": "2011-01-01/..", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Naturally occurring radioactive material (NORM)", - "description": "Radiological measurements such as gross alpha/beta, Ra-226, and Ra-228 (availability varies)." + "description": "Radiological measurements such as gross alpha/beta, Ra-226, and Ra-228 (availability varies).", + "temporalCoverage": "2011-01-01/..", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Organic constituents", - "description": "Organic compounds and indicators such as benzene, toluene, ethylbenzene, xylenes, naphthalene, oil and grease, phenolics, and related constituents (availability varies)." + "description": "Organic compounds and indicators such as benzene, toluene, ethylbenzene, xylenes, naphthalene, oil and grease, phenolics, and related constituents (availability varies).", + "temporalCoverage": "2011-01-01/..", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Water quantity (where available)", - "description": "Water quantity observations associated with monitoring sites where contributed and published through HIS services." + "description": "Water quantity observations associated with monitoring sites where contributed and published through HIS services.", + "temporalCoverage": "2011-01-01/..", + "spatialCoverage": "not detected" } ], "measurementTechnique": [ @@ -104,28 +114,36 @@ "name": "DOI landing page", "description": "Persistent identifier landing page for the Shale Network database.", "contentUrl": "https://doi.org/10.4211/his-data-shalenetwork", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "HIS Central network registration (Shale Network)", "description": "HIS Central registry entry for the Shale Network WaterOneFlow service and citation information.", "contentUrl": "https://hiscentral.cuahsi.org/pub_network.aspx?n=228", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "HydroClient data portal access", "description": "CUAHSI HydroClient portal for discovering and downloading published observations.", "contentUrl": "https://data.cuahsi.org/", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "Shale Network data access documentation", "description": "Project documentation describing ways to access Shale Network data, including HydroClient, HydroShare, and Penn State DataCommons.", "contentUrl": "https://shalenetwork.org/database/data-access.html", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] } ], "citation": [ @@ -133,7 +151,10 @@ "@type": "CreativeWork", "name": "Shale Network Database", "author": [ - { "@type": "Person", "name": "Susan L. Brantley" } + { + "@type": "Person", + "name": "Susan L. Brantley" + } ], "datePublished": "2011", "identifier": [ @@ -147,11 +168,29 @@ } ], "about": [ - { "@type": "Thing", "name": "Hydrogeochemistry" }, - { "@type": "Thing", "name": "Water quality" }, - { "@type": "Thing", "name": "Oil and gas development" }, - { "@type": "Thing", "name": "Shale gas" }, - { "@type": "Thing", "name": "Groundwater" }, - { "@type": "Thing", "name": "Surface water" } + { + "@type": "Thing", + "name": "Hydrogeochemistry" + }, + { + "@type": "Thing", + "name": "Water quality" + }, + { + "@type": "Thing", + "name": "Oil and gas development" + }, + { + "@type": "Thing", + "name": "Shale gas" + }, + { + "@type": "Thing", + "name": "Groundwater" + }, + { + "@type": "Thing", + "name": "Surface water" + } ] } diff --git a/data/objects/summoned/generated/SoilGrids2/soilgrids2.jsonld b/data/objects/summoned/generated/SoilGrids2/soilgrids2.jsonld index 5ff27d8..7c72dd4 100644 --- a/data/objects/summoned/generated/SoilGrids2/soilgrids2.jsonld +++ b/data/objects/summoned/generated/SoilGrids2/soilgrids2.jsonld @@ -58,28 +58,129 @@ "variableMeasured": [ { "@type": "PropertyValue", - "name": "Bulk density", - "description": "Predicted soil bulk density maps (global gridded layers)." + "name": "Bulk Density", + "alternateName": "bdod", + "unitText": "cg/cm^3", + "description": "Predicted bulk density maps for standard SoilGrids depth intervals.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Soil organic carbon content", - "description": "Predicted soil organic carbon content maps (global gridded layers)." + "name": "Cation Exchange Capacity at pH 7", + "alternateName": "cec", + "unitText": "mmol(c)/kg", + "description": "Predicted cation exchange capacity buffered at pH 7 for standard SoilGrids depth intervals.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Soil pH (in H2O)", - "description": "Predicted soil pH maps (global gridded layers)." + "name": "Coarse Fragments", + "alternateName": "cfvo", + "unitText": "cm^3/dm^3", + "description": "Predicted volumetric fraction of coarse fragments for standard SoilGrids depth intervals.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Soil texture fractions", - "description": "Predicted sand, silt, and clay fraction maps (global gridded layers)." + "name": "Clay", + "alternateName": "clay", + "unitText": "g/kg", + "description": "Predicted clay content for standard SoilGrids depth intervals.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", - "name": "Coarse fragments", - "description": "Predicted coarse fragment content maps (global gridded layers)." + "name": "Nitrogen", + "alternateName": "nitrogen", + "unitText": "cg/kg", + "description": "Predicted total nitrogen for standard SoilGrids depth intervals.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" + }, + { + "@type": "PropertyValue", + "name": "Organic Carbon Density", + "alternateName": "ocd", + "unitText": "hg/m^3", + "description": "Predicted organic carbon density for standard SoilGrids depth intervals.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" + }, + { + "@type": "PropertyValue", + "name": "Organic Carbon Stocks", + "alternateName": "ocs", + "unitText": "t/ha", + "description": "Predicted organic carbon stocks for SoilGrids soil layers.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" + }, + { + "@type": "PropertyValue", + "name": "Soil Organic Carbon", + "alternateName": "soc", + "unitText": "dg/kg", + "description": "Predicted soil organic carbon concentration for standard SoilGrids depth intervals.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" + }, + { + "@type": "PropertyValue", + "name": "pH Water", + "alternateName": "phh2o", + "unitText": "pH x 10", + "description": "Predicted soil pH in water for standard SoilGrids depth intervals.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" + }, + { + "@type": "PropertyValue", + "name": "Sand", + "alternateName": "sand", + "unitText": "g/kg", + "description": "Predicted sand content for standard SoilGrids depth intervals.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" + }, + { + "@type": "PropertyValue", + "name": "Silt", + "alternateName": "silt", + "unitText": "g/kg", + "description": "Predicted silt content for standard SoilGrids depth intervals.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" + }, + { + "@type": "PropertyValue", + "name": "Volumetric Water Content at 10 kPa", + "alternateName": "wv0010", + "unitText": "10^-3 cm^3 cm^-3", + "description": "Predicted volumetric water content at 10 kPa for standard SoilGrids depth intervals.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" + }, + { + "@type": "PropertyValue", + "name": "Volumetric Water Content at 1500 kPa", + "alternateName": "wv1500", + "unitText": "10^-3 cm^3 cm^-3", + "description": "Predicted volumetric water content at 1500 kPa for standard SoilGrids depth intervals.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" + }, + { + "@type": "PropertyValue", + "name": "Volumetric Water Content at 33 kPa", + "alternateName": "wv003", + "unitText": "10^-3 cm^3 cm^-3", + "description": "Predicted volumetric water content at 33 kPa for standard SoilGrids depth intervals.", + "temporalCoverage": "Static", + "spatialCoverage": "not detected" } ], "encodingFormat": [ diff --git a/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld b/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld index e0b95ac..ab8880f 100644 --- a/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld +++ b/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld @@ -100,72 +100,142 @@ { "@type": "PropertyValue", "name": "Maximum temperature", - "unitText": "C" + "unitText": "C", + "temporalCoverage": [ + "1950-01-01/..", + "1950-01-01/2025-12-31" + ], + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Minimum temperature", - "unitText": "C" + "unitText": "C", + "temporalCoverage": [ + "1950-01-01/..", + "1950-01-01/2025-12-31" + ], + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Vapor pressure", - "unitText": "kPa" + "unitText": "kPa", + "temporalCoverage": [ + "1950-01-01/..", + "1950-01-01/2025-12-31" + ], + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Precipitation accumulation", - "unitText": "mm" + "unitText": "mm", + "temporalCoverage": [ + "1950-01-01/..", + "1950-01-01/2025-12-31" + ], + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Downward surface shortwave radiation", - "unitText": "W/m2" + "unitText": "W/m2", + "temporalCoverage": [ + "1950-01-01/..", + "1950-01-01/2025-12-31" + ], + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Wind speed", - "unitText": "m/s" + "unitText": "m/s", + "temporalCoverage": [ + "1950-01-01/..", + "1950-01-01/2025-12-31" + ], + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Reference evapotranspiration (ASCE Penman–Monteith)", - "unitText": "mm" + "unitText": "mm", + "temporalCoverage": [ + "1950-01-01/..", + "1950-01-01/2025-12-31" + ], + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Runoff", - "unitText": "mm" + "unitText": "mm", + "temporalCoverage": [ + "1950-01-01/..", + "1950-01-01/2025-12-31" + ], + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Actual evapotranspiration", - "unitText": "mm" + "unitText": "mm", + "temporalCoverage": [ + "1950-01-01/..", + "1950-01-01/2025-12-31" + ], + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Climate water deficit", - "unitText": "mm" + "unitText": "mm", + "temporalCoverage": [ + "1950-01-01/..", + "1950-01-01/2025-12-31" + ], + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Soil moisture (total column, end of month)", - "unitText": "mm" + "unitText": "mm", + "temporalCoverage": [ + "1950-01-01/..", + "1950-01-01/2025-12-31" + ], + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Snow water equivalent (end of month)", - "unitText": "mm" + "unitText": "mm", + "temporalCoverage": [ + "1950-01-01/..", + "1950-01-01/2025-12-31" + ], + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Palmer Drought Severity Index (PDSI)", - "unitText": "unitless" + "unitText": "unitless", + "temporalCoverage": [ + "1950-01-01/..", + "1950-01-01/2025-12-31" + ], + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Vapor pressure deficit (VPD)", - "unitText": "kPa" + "unitText": "kPa", + "temporalCoverage": [ + "1950-01-01/..", + "1950-01-01/2025-12-31" + ], + "spatialCoverage": "not detected" } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/WATERBASE/waterbase.jsonld b/data/objects/summoned/generated/WATERBASE/waterbase.jsonld index ecb4baa..a0c6a53 100644 --- a/data/objects/summoned/generated/WATERBASE/waterbase.jsonld +++ b/data/objects/summoned/generated/WATERBASE/waterbase.jsonld @@ -60,17 +60,23 @@ { "@type": "PropertyValue", "name": "Water quality parameters", - "description": "Measured and aggregated parameters describing chemical and physico-chemical water quality in inland and coastal/marine waters (including nutrients, organic matter, hazardous substances, pesticides and other chemicals)." + "description": "Measured and aggregated parameters describing chemical and physico-chemical water quality in inland and coastal/marine waters (including nutrients, organic matter, hazardous substances, pesticides and other chemicals).", + "temporalCoverage": "1900-01-01/2024-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Monitoring locations and water bodies", - "description": "Spatial identifiers and associated attributes for monitoring sites and water bodies reported through WISE and WFD/WISE spatial reporting." + "description": "Spatial identifiers and associated attributes for monitoring sites and water bodies reported through WISE and WFD/WISE spatial reporting.", + "temporalCoverage": "1900-01-01/2024-12-31", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Ecological and chemical status (where applicable)", - "description": "Reported status and classification attributes associated with monitored waters and water bodies, as provided in reporting streams." + "description": "Reported status and classification attributes associated with monitored waters and water bodies, as provided in reporting streams.", + "temporalCoverage": "1900-01-01/2024-12-31", + "spatialCoverage": "not detected" } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/Water_Quality_Portal/water-quality-portal.jsonld b/data/objects/summoned/generated/Water_Quality_Portal/water-quality-portal.jsonld index d87f7f8..0d82f95 100644 --- a/data/objects/summoned/generated/Water_Quality_Portal/water-quality-portal.jsonld +++ b/data/objects/summoned/generated/Water_Quality_Portal/water-quality-portal.jsonld @@ -78,37 +78,51 @@ { "@type": "PropertyValue", "name": "Water quality results", - "description": "Discrete sample results including measured values, units, methods, and qualifiers for physical, chemical, and biological characteristics." + "description": "Discrete sample results including measured values, units, methods, and qualifiers for physical, chemical, and biological characteristics.", + "temporalCoverage": "1900-01-01/..", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Station and Monitoring Location Metadata", - "description": "Locations where samples and observations were collected, including identifiers, coordinates, and site descriptors." + "description": "Locations where samples and observations were collected, including identifiers, coordinates, and site descriptors.", + "temporalCoverage": "1900-01-01/..", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Nutrients", - "description": "Nutrient-related characteristics such as nitrogen and phosphorus species." + "description": "Nutrient-related characteristics such as nitrogen and phosphorus species.", + "temporalCoverage": "1900-01-01/..", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Metals and trace elements", - "description": "Metals and trace elements measured in water, sediment, or related matrices." + "description": "Metals and trace elements measured in water, sediment, or related matrices.", + "temporalCoverage": "1900-01-01/..", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Organic contaminants and pesticides", - "description": "Organic contaminants, pesticides, and related analytes reported by contributing organizations." + "description": "Organic contaminants, pesticides, and related analytes reported by contributing organizations.", + "temporalCoverage": "1900-01-01/..", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Physical parameters", - "description": "Physical characteristics such as temperature, specific conductance, turbidity, and dissolved oxygen (where available)." + "description": "Physical characteristics such as temperature, specific conductance, turbidity, and dissolved oxygen (where available).", + "temporalCoverage": "1900-01-01/..", + "spatialCoverage": "not detected" }, { "@type": "PropertyValue", "name": "Biological data", - "description": "Biological observations and metrics available through WQP services where reported by data providers." + "description": "Biological observations and metrics available through WQP services where reported by data providers.", + "temporalCoverage": "1900-01-01/..", + "spatialCoverage": "not detected" } ], "encodingFormat": [ @@ -126,42 +140,60 @@ "name": "Water Quality Portal (web interface)", "description": "Main portal interface for querying and downloading water-quality stations and results.", "contentUrl": "https://www.waterqualitydata.us/", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "WQP Web Services Guide", "description": "Documentation for constructing REST web-service requests and available endpoints and parameters.", "contentUrl": "https://www.waterqualitydata.us/webservices_documentation/", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "WQP Data Download API (Swagger UI)", "description": "Interactive API documentation for Water Quality Portal data download services.", "contentUrl": "https://www.waterqualitydata.us/data/swagger-ui/index.html", - "encodingFormat": ["text/html"] + "encodingFormat": [ + "text/html" + ] }, { "@type": "DataDownload", "name": "WQP Station (sites) service", "description": "Base endpoint for downloading station (monitoring location) data and metadata via REST.", "contentUrl": "https://www.waterqualitydata.us/data/Station/search", - "encodingFormat": ["application/json", "text/csv", "application/xml"] + "encodingFormat": [ + "application/json", + "text/csv", + "application/xml" + ] }, { "@type": "DataDownload", "name": "WQP Result (analytical results) service", "description": "Base endpoint for downloading discrete water-quality result records via REST.", "contentUrl": "https://www.waterqualitydata.us/data/Result/search", - "encodingFormat": ["application/json", "text/csv", "application/xml"] + "encodingFormat": [ + "application/json", + "text/csv", + "application/xml" + ] }, { "@type": "DataDownload", "name": "WQP OGC services (WMS/WFS)", "description": "OGC-compliant WMS/WFS services for mapping and feature access based on WQP search parameters.", "contentUrl": "https://www.waterqualitydata.us/ogcservices/", - "encodingFormat": ["text/html", "application/xml", "application/geo+json"] + "encodingFormat": [ + "text/html", + "application/xml", + "application/geo+json" + ] } ], "citation": [ @@ -169,7 +201,10 @@ "@type": "ScholarlyArticle", "name": "Water quality data for national-scale aquatic research: The Water Quality Portal", "author": [ - { "@type": "Person", "name": "E. K. Read" } + { + "@type": "Person", + "name": "E. K. Read" + } ], "isPartOf": { "@type": "Periodical", @@ -187,10 +222,25 @@ } ], "about": [ - { "@type": "Thing", "name": "Water quality" }, - { "@type": "Thing", "name": "Hydrogeochemistry" }, - { "@type": "Thing", "name": "Environmental monitoring" }, - { "@type": "Thing", "name": "Surface water" }, - { "@type": "Thing", "name": "Groundwater" } + { + "@type": "Thing", + "name": "Water quality" + }, + { + "@type": "Thing", + "name": "Hydrogeochemistry" + }, + { + "@type": "Thing", + "name": "Environmental monitoring" + }, + { + "@type": "Thing", + "name": "Surface water" + }, + { + "@type": "Thing", + "name": "Groundwater" + } ] } diff --git a/docs/jsonld-validation-plan.md b/docs/jsonld-validation-plan.md index 8f31dba..2cb3be2 100644 --- a/docs/jsonld-validation-plan.md +++ b/docs/jsonld-validation-plan.md @@ -9,12 +9,15 @@ Validate generated JSON-LD against the authoritative dataset webpage, linked dow - Expand source-listed variables into one `variableMeasured` entry per variable. Use the physical variable label as `name` and put short codes such as `bio01`, `tas`, or `lossyear` in `alternateName`. - Include all citations shown by the source page. When a page has both `Model Citation` and `Data Citation`, represent both as separate structured citation objects. - Use the exact URL behind a source page's Download button or direct data/API endpoint for `distribution[].contentUrl`; do not use a generic portal root when a more specific target is available. +- Include variables listed in expandable menus, layer lists, and property tables, not only variables visible in the initial page text. +- For each `variableMeasured`, include `temporalCoverage`. Use the source-supported variable or dataset temporal range when available; otherwise use `Static`. +- For each `variableMeasured`, include `spatialCoverage`. Use variable-specific spatial coverage when available; otherwise use `not detected`. ## Generation Safeguards -- The generation prompt requires separate variable rows, structured citation arrays, and exact download targets. +- The generation prompt requires separate variable rows, structured citation arrays, exact download targets, and variable-level temporal and spatial coverage fields. - `generate_jsonld.py` extracts source-page download links, citation text, and variable rows and passes them into the generation prompt. -- `generate_jsonld.py` emits review warnings when generated JSON-LD still contains lumped variable names, plain-string citations, or distributions that omit exact source-page download links. +- `generate_jsonld.py` emits review warnings when generated JSON-LD still contains lumped variable names, omits source-listed variables, lacks variable-level temporal or spatial coverage, contains plain-string citations, or distributions omit exact source-page download links. ## Validation diff --git a/prompts/jsonld-generation-prompt.txt b/prompts/jsonld-generation-prompt.txt index 5051429..3843b55 100644 --- a/prompts/jsonld-generation-prompt.txt +++ b/prompts/jsonld-generation-prompt.txt @@ -46,8 +46,10 @@ You are generating a JSON-LD (JSON for Linking Data) description for a scientifi {EXTRACTED_METADATA} **Source-page fidelity requirements**: -- If the source page has a variable table, create one `variableMeasured` entry per source-table row. Do not lump ranges or groups such as "BIO1-BIO19", "tas/tasmax/tasmin", "bands 1-7", or "all layers" into one entry when the page lists individual variables. +- If the source page has a variable table, expandable variable menu, layer list, or property list, create one `variableMeasured` entry per source-listed variable. Do not lump ranges or groups such as "BIO1-BIO19", "tas/tasmax/tasmin", "bands 1-7", or "all layers" into one entry when the page lists individual variables. - Use the physically meaningful variable name from the source page as `variableMeasured[].name`. Put short codes such as `bio01`, `tas`, `pr`, `lossyear`, or `treecover2000` in `alternateName`. +- For each `variableMeasured` entry, include `temporalCoverage`. If the variable is static or no variable-specific temporal range can be detected, set it to "Static"; otherwise use the source-supported variable or dataset temporal range. +- For each `variableMeasured` entry, include `spatialCoverage`. If no variable-specific spatial coverage or spatial range can be detected, set it to "not detected". - If the source page has both "Model Citation" and "Data Citation", include both as separate entries in `citation`. Do not drop either one. - Always represent `citation` as an array of structured `CreativeWork`, `ScholarlyArticle`, or `Dataset` objects; do not use a plain citation string. - For `distribution[].contentUrl`, use the exact URL linked from the source page's Download button or direct data/API endpoint when available. Do not use a generic portal root if a more specific download link, bucket prefix, DOI landing page, API endpoint, or file list is available. @@ -70,6 +72,7 @@ You are generating a JSON-LD (JSON for Linking Data) description for a scientifi 11. Set "keywords" as a JSON array of strings — never a single semicolon- or comma-separated string 12. Set "encodingFormat" (at dataset level and in each distribution) as a JSON array of strings — never a single semicolon- or comma-separated string 13. Set "citation" as a JSON array of structured objects — never as a single string -14. Add exactly: "comment": "This dataset metadata was generated by AI." +14. Each `variableMeasured` object must include `temporalCoverage` and `spatialCoverage` +15. Add exactly: "comment": "This dataset metadata was generated by AI." **Output**: Provide ONLY valid JSON-LD, no additional text or explanation. diff --git a/scripts/generate_jsonld.py b/scripts/generate_jsonld.py index 27fcf15..d053fb4 100644 --- a/scripts/generate_jsonld.py +++ b/scripts/generate_jsonld.py @@ -815,6 +815,29 @@ def extract_source_facts(html: str, base_url: str) -> Dict: if line in ("Model Citation", "Data Citation") and i + 1 < len(lines): facts["citation_text"].append({"label": line, "text": lines[i + 1]}) + seen_variables = set() + for row in soup.find_all("tr"): + cells = [cell.get_text(" ", strip=True) for cell in row.find_all(["th", "td"])] + if len(cells) < 2: + continue + code = cells[0].strip() + name = cells[1].strip() + if not code or not name or code.lower() in {"name", "code", "property"}: + continue + if not re.fullmatch(r"[A-Za-z][A-Za-z0-9_-]*", code): + continue + unit = cells[2].strip() if len(cells) > 2 else "" + key = code.lower() + if key in seen_variables: + continue + facts["variables"].append({ + "name": name, + "alternateName": code, + "unitText": unit, + "description": name, + }) + seen_variables.add(key) + for i, line in enumerate(lines): code_match = re.match(r"^\(([^)]+)\)(.*)$", line) if not code_match or i == 0: @@ -831,12 +854,15 @@ def extract_source_facts(html: str, base_url: str) -> Dict: if parts: unit = parts[0] description = parts[1] if len(parts) > 1 else "" + if code.lower() in seen_variables: + continue facts["variables"].append({ "name": name, "alternateName": code, "unitText": unit, "description": description, }) + seen_variables.add(code.lower()) except Exception as e: print(f"Warning: could not extract source-page facts from {base_url}: {e}") return facts @@ -871,11 +897,26 @@ def audit_generated_jsonld(data: Dict, source_facts: Dict) -> List[str]: if isinstance(variables, str): warnings.append("variableMeasured is a string; expected separate PropertyValue objects") elif isinstance(variables, list): + source_codes = { + str(item.get("alternateName", "")).lower() + for item in (source_facts or {}).get("variables", []) + if isinstance(item, dict) and item.get("alternateName") + } + generated_codes = set() for idx, item in enumerate(variables): if not isinstance(item, dict): continue + if item.get("alternateName"): + generated_codes.add(str(item.get("alternateName")).lower()) if _looks_like_lumped_or_code_name(str(item.get("name", ""))): warnings.append(f"variableMeasured[{idx}].name looks like a code or lumped range") + if "temporalCoverage" not in item: + warnings.append(f"variableMeasured[{idx}] is missing temporalCoverage") + if "spatialCoverage" not in item: + warnings.append(f"variableMeasured[{idx}] is missing spatialCoverage") + missing_codes = sorted(source_codes - generated_codes) + if missing_codes: + warnings.append(f"variableMeasured is missing source-listed variable code(s): {', '.join(missing_codes)}") download_links = { link.get("href") for link in (source_facts or {}).get("download_links", []) From 3d6295181429968d1f7d52fe704080e72052eece Mon Sep 17 00:00:00 2001 From: jaywt Date: Wed, 20 May 2026 15:27:20 -0400 Subject: [PATCH 58/58] Update generated JSON-LD metadata --- .../generated/CHELSA/chelsa_bioclim.jsonld | 471 ++++++++++++++++-- .../chelsa_canaryclim_climatologies.jsonld | 33 +- .../CHELSA/chelsa_cerra_daily.jsonld | 12 +- .../chelsa_ch_highres_climatologies.jsonld | 33 +- .../CHELSA/chelsa_ch_highres_daily.jsonld | 33 +- .../CHELSA/chelsa_climatologies.jsonld | 130 ++++- .../generated/CHELSA/chelsa_daily.jsonld | 130 ++++- .../CHELSA/chelsa_drought_indices.jsonld | 64 ++- .../generated/CHELSA/chelsa_monthly.jsonld | 152 +++++- .../CHELSA/chelsa_trace21k_centennial.jsonld | 33 +- .../chelsa_trace21k_centennial_bioclim.jsonld | 159 +++++- .../generated/CHELSA/chelsa_w5e5_daily.jsonld | 40 +- .../consensus-land-cover.jsonld | 142 +++++- .../summoned/generated/FLO1K/flo1k.jsonld | 44 +- .../summoned/generated/G-RUN/g-run.jsonld | 22 +- .../objects/summoned/generated/GFC/gfc.jsonld | 127 ++++- .../summoned/generated/GHSL/ghsl.jsonld | 64 ++- .../generated/GPP_MOD17/gpp_mod17.jsonld | 31 +- .../generated/GRACE-REC/grace-rec.jsonld | 42 +- .../global-multi-layer-soil-moisture.jsonld | 55 +- .../global-tree-density.jsonld | 32 +- .../generated/HydroSHEDS/hydrosheds.jsonld | 108 +++- .../Hydrography90m/hydrography90m.jsonld | 110 +++- .../generated/MERIT_DEM/merit-dem.jsonld | 19 +- .../generated/MRLC_NLCD/annual-nlcd.jsonld | 104 +++- .../MRLC_NLCD/exotic-annual-grass.jsonld | 104 +++- .../generated/MRLC_NLCD/legacy-nlcd.jsonld | 73 ++- .../generated/MRLC_NLCD/nalcms.jsonld | 29 +- .../summoned/generated/MRLC_NLCD/rcmap.jsonld | 104 +++- .../Shale_Network/shale-network.jsonld | 40 +- .../generated/SoilGrids2/soilgrids2.jsonld | 163 +++++- .../TerraClimate/terraclimate.jsonld | 168 +++++-- .../generated/WATERBASE/waterbase.jsonld | 26 +- .../water-quality-portal.jsonld | 54 +- 34 files changed, 2388 insertions(+), 563 deletions(-) diff --git a/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld index 73c598a..31765f0 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_bioclim.jsonld @@ -19,15 +19,6 @@ "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", "url": "https://www.wsl.ch/" }, - "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": { - "@type": "Place", - "name": "Global land surface areas", - "geo": { - "@type": "GeoShape", - "box": "-180.0,-60.0 180.0,90.0" - } - }, "keywords": [ "CHELSA", "bioclim", @@ -44,7 +35,16 @@ "unitText": "°C", "description": "Mean annual temperature calculated as the average of mean monthly temperatures over the year", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -53,7 +53,16 @@ "unitText": "°C", "description": "Mean diurnal temperature range computed as the average of monthly daily maximum minus daily minimum near-surface air temperature", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -62,7 +71,16 @@ "unitText": "°C", "description": "Isothermality: 100 × bio02 ÷ bio07; compares day-night variability to annual temperature range", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -71,7 +89,16 @@ "unitText": "°C/100", "description": "Temperature seasonality given by the standard deviation of mean monthly temperatures", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -80,7 +107,16 @@ "unitText": "°C", "description": "Highest monthly mean of daily maximum temperatures across the year; indicates peak thermal conditions", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -89,7 +125,16 @@ "unitText": "°C", "description": "Lowest monthly mean of daily minimum temperatures across the year; characterizes winter cold intensity", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -98,7 +143,16 @@ "unitText": "°C", "description": "Annual temperature range calculated as bio05 minus bio06; measures amplitude between warmest and coldest months", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -107,7 +161,16 @@ "unitText": "°C", "description": "Average monthly mean temperature over the wettest three-month period of the year", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -116,7 +179,16 @@ "unitText": "°C", "description": "Average monthly mean temperature over the driest three-month period of the year", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -125,7 +197,16 @@ "unitText": "°C", "description": "Average monthly mean temperature over the warmest three-month period of the year", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -134,7 +215,16 @@ "unitText": "°C", "description": "Average monthly mean temperature over the coldest three-month period of the year", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -143,7 +233,16 @@ "unitText": "kg m-2 year-1", "description": "Sum of monthly precipitation totals across the year", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -152,7 +251,16 @@ "unitText": "kg m-2 month-1", "description": "Maximum monthly precipitation total", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -161,7 +269,16 @@ "unitText": "kg m-2 month-1", "description": "Minimum monthly precipitation total", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -170,7 +287,16 @@ "unitText": "kg m-2", "description": "Coefficient of variation: 100 × standard deviation ÷ mean of monthly precipitation totals", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -179,7 +305,16 @@ "unitText": "kg m-2 month-1", "description": "Average monthly precipitation during the wettest three-month period of the year", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -188,7 +323,16 @@ "unitText": "kg m-2 month-1", "description": "Average monthly precipitation during the driest three-month period of the year", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -197,7 +341,16 @@ "unitText": "kg m-2 month-1", "description": "Average monthly precipitation during the warmest three-month period of the year", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -206,7 +359,16 @@ "unitText": "kg m-2 month-1", "description": "Average monthly precipitation during the coldest three-month period of the year", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -215,7 +377,16 @@ "unitText": "count", "description": "Number of freeze-thaw transitions per year.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -224,7 +395,16 @@ "unitText": "julian day", "description": "Julian day marking the first occurrence of growing season conditions.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -233,7 +413,16 @@ "unitText": "°C", "description": "Sum of daily mean temperatures above 0 °C accumulated over the year.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -242,7 +431,16 @@ "unitText": "°C", "description": "Sum of daily mean temperatures above 10 °C accumulated over the year.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -251,7 +449,16 @@ "unitText": "°C", "description": "Sum of daily mean temperatures above 5 °C accumulated over the year.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -260,7 +467,16 @@ "unitText": "julian day", "description": "Julian day of the first occurrence of a daily mean temperature above 10 °C.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -269,7 +485,16 @@ "unitText": "julian day", "description": "Julian day of the first occurrence of a daily mean temperature above 5 °C.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -278,7 +503,16 @@ "unitText": "days", "description": "Number of days between the first and last occurrence of growing season conditions.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -287,7 +521,16 @@ "unitText": "kg m-2 gsl-1", "description": "Total precipitation accumulated during the growing season period.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -296,7 +539,16 @@ "unitText": "°C", "description": "Average daily mean temperature over all growing season days.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -305,7 +557,16 @@ "unitText": "category", "description": "Köppen-Geiger climate classification.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -314,7 +575,16 @@ "unitText": "category", "description": "Köppen-Geiger climate classification without As and Aw differentiation.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -323,7 +593,16 @@ "unitText": "category", "description": "Köppen-Geiger climate classification after Peel et al. 2007.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -332,7 +611,16 @@ "unitText": "category", "description": "Climate classification after Wissmann 1939.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -341,7 +629,16 @@ "unitText": "category", "description": "Climate classification after Thornthwaite 1931.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -350,7 +647,16 @@ "unitText": "category", "description": "Climate classification after Troll-Pfaffen.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -359,7 +665,16 @@ "unitText": "julian day", "description": "Julian day of the last occurrence of growing season conditions.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -368,7 +683,16 @@ "unitText": "number of days", "description": "Total number of days in a year with mean daily temperature above 0 °C.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -377,7 +701,16 @@ "unitText": "number of days", "description": "Total number of days in a year with mean daily temperature above 10 °C.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -386,7 +719,16 @@ "unitText": "number of days", "description": "Total number of days in a year with mean daily temperature above 5 °C.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -395,7 +737,16 @@ "unitText": "g C m-2 yr-1", "description": "Net primary production on land expressed as carbon mass flux.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -404,7 +755,16 @@ "unitText": "days", "description": "Number of days per year with snow cover present at the surface.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -413,7 +773,16 @@ "unitText": "kg m-2 year-1", "description": "Total water equivalent of snowpack accumulated over the year.", "temporalCoverage": "1981-2010/1971-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld index 0ac5a80..b898e6f 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_canaryclim_climatologies.jsonld @@ -19,11 +19,6 @@ "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", "url": "https://www.wsl.ch/" }, - "temporalCoverage": "1979-01-01/2013-12-31", - "spatialCoverage": { - "@type": "Place", - "name": "Canary Islands" - }, "keywords": [ "CHELSA", "Canary Islands", @@ -40,7 +35,12 @@ "unitText": "kg m-2 day-1", "description": "Precipitation including liquid and solid phases.", "temporalCoverage": "1979-01-01/2013-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Canary Islands" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -49,7 +49,12 @@ "unitText": "K", "description": "Near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1979-01-01/2013-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Canary Islands" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -58,7 +63,12 @@ "unitText": "K", "description": "Maximum near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1979-01-01/2013-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Canary Islands" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -67,7 +77,12 @@ "unitText": "K", "description": "Minimum near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1979-01-01/2013-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Canary Islands" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld index 38d6af1..a4e4f81 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_cerra_daily.jsonld @@ -19,11 +19,6 @@ "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", "url": "https://www.wsl.ch/" }, - "temporalCoverage": "1985-01-01/2015-12-31", - "spatialCoverage": { - "@type": "Place", - "name": "Europe" - }, "keywords": [ "CHELSA", "CERRA", @@ -40,7 +35,12 @@ "unitText": "K", "description": "Near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1985-01-01/2015-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Europe" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" } ], "distribution": [ diff --git a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld index e1d7b5e..0978061 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_climatologies.jsonld @@ -19,11 +19,6 @@ "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", "url": "https://www.wsl.ch/" }, - "temporalCoverage": "1981-01-01/2010-12-31", - "spatialCoverage": { - "@type": "Place", - "name": "Switzerland" - }, "keywords": [ "CHELSA", "Switzerland", @@ -40,7 +35,12 @@ "unitText": "kg m-2 day-1", "description": "Precipitation including liquid and solid phases.", "temporalCoverage": "1981-01-01/2010-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Switzerland" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -49,7 +49,12 @@ "unitText": "K", "description": "Near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1981-01-01/2010-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Switzerland" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -58,7 +63,12 @@ "unitText": "K", "description": "Maximum near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1981-01-01/2010-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Switzerland" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -67,7 +77,12 @@ "unitText": "K", "description": "Minimum near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1981-01-01/2010-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Switzerland" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld index 7511309..ed79bc4 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_ch_highres_daily.jsonld @@ -19,11 +19,6 @@ "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", "url": "https://www.wsl.ch/" }, - "temporalCoverage": "1981-01-01/2022-12-31", - "spatialCoverage": { - "@type": "Place", - "name": "Switzerland" - }, "keywords": [ "CHELSA", "Switzerland", @@ -40,7 +35,12 @@ "unitText": "kg m-2 day-1", "description": "Precipitation including liquid and solid phases.", "temporalCoverage": "1981-01-01/2022-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Switzerland" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -49,7 +49,12 @@ "unitText": "K", "description": "Near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1981-01-01/2022-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Switzerland" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -58,7 +63,12 @@ "unitText": "K", "description": "Maximum near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1981-01-01/2022-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Switzerland" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -67,7 +77,12 @@ "unitText": "K", "description": "Minimum near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1981-01-01/2022-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Switzerland" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld index 91d62be..7100cb7 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_climatologies.jsonld @@ -19,15 +19,6 @@ "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", "url": "https://www.wsl.ch/" }, - "temporalCoverage": "1981-2010/2071-2100", - "spatialCoverage": { - "@type": "Place", - "name": "Global land surface areas", - "geo": { - "@type": "GeoShape", - "box": "-180.0,-60.0 180.0,90.0" - } - }, "keywords": [ "CHELSA", "climatologies", @@ -44,7 +35,16 @@ "unitText": "percent", "description": "Total cloud area fraction, reported as a percentage, for the whole atmospheric column.", "temporalCoverage": "1981-2010/2071-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -53,7 +53,16 @@ "unitText": "kg m-2 month-1", "description": "Monthly ratio of precipitation to potential evapotranspiration; indicator of climatic water availability.", "temporalCoverage": "1981-2010/2071-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -62,7 +71,16 @@ "unitText": "percent", "description": "Relative humidity near the surface.", "temporalCoverage": "1981-2010/2071-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -71,7 +89,16 @@ "unitText": "kg m-2 month-1", "description": "Total potential evapotranspiration for the month assuming unlimited water availability, calculated using Penman-Monteith.", "temporalCoverage": "1981-2010/2071-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -80,7 +107,16 @@ "unitText": "kg m-2 day-1", "description": "Precipitation including liquid and solid phases.", "temporalCoverage": "1981-2010/2071-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -89,7 +125,16 @@ "unitText": "W m-2", "description": "Surface solar irradiance for UV calculations.", "temporalCoverage": "1981-2010/2071-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -98,7 +143,16 @@ "unitText": "m s-1", "description": "Near-surface, usually 10 meter, wind speed.", "temporalCoverage": "1981-2010/2071-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -107,7 +161,16 @@ "unitText": "K", "description": "Near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1981-2010/2071-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -116,7 +179,16 @@ "unitText": "K", "description": "Maximum near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1981-2010/2071-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -125,7 +197,16 @@ "unitText": "K", "description": "Minimum near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1981-2010/2071-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -134,7 +215,16 @@ "unitText": "Pa", "description": "Difference between saturation vapor pressure and actual vapor pressure.", "temporalCoverage": "1981-2010/2071-2100", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld index 3abf4a3..005ae18 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_daily.jsonld @@ -19,15 +19,6 @@ "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", "url": "https://www.wsl.ch/" }, - "temporalCoverage": "1979-01-01/2025-08-29", - "spatialCoverage": { - "@type": "Place", - "name": "Global land surface areas", - "geo": { - "@type": "GeoShape", - "box": "-180.0,-60.0 180.0,90.0" - } - }, "keywords": [ "CHELSA", "daily climate", @@ -46,7 +37,16 @@ "unitText": "percent", "description": "Total cloud area fraction, reported as a percentage, for the whole atmospheric column.", "temporalCoverage": "1979-01-01/2025-08-29", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -55,7 +55,16 @@ "unitText": "percent", "description": "Relative humidity near the surface.", "temporalCoverage": "1979-01-01/2025-08-29", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -64,7 +73,16 @@ "unitText": "kg m-2 day-1", "description": "Precipitation including liquid and solid phases.", "temporalCoverage": "1979-01-01/2025-08-29", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -73,7 +91,16 @@ "unitText": "kg m-2 day-1", "description": "Downscaled forecast precipitation from ERA5; not bias corrected and should not be mixed with pr.", "temporalCoverage": "1979-01-01/2025-08-29", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -82,7 +109,16 @@ "unitText": "hPa", "description": "Surface pressure, not mean sea-level pressure.", "temporalCoverage": "1979-01-01/2025-08-29", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -91,7 +127,16 @@ "unitText": "W m-2", "description": "Surface solar irradiance for UV calculations.", "temporalCoverage": "1979-01-01/2025-08-29", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -100,7 +145,16 @@ "unitText": "m s-1", "description": "Near-surface, usually 10 meter, wind speed.", "temporalCoverage": "1979-01-01/2025-08-29", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -109,7 +163,16 @@ "unitText": "K", "description": "Near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1979-01-01/2025-08-29", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -118,7 +181,16 @@ "unitText": "K", "description": "Maximum near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1979-01-01/2025-08-29", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -127,7 +199,16 @@ "unitText": "K", "description": "Minimum near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1979-01-01/2025-08-29", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -136,7 +217,16 @@ "unitText": "K m-1", "description": "Rate of change in air temperature with altitude calculated over the centennial period.", "temporalCoverage": "1979-01-01/2025-08-29", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld index 08c8a6d..9fce445 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_drought_indices.jsonld @@ -19,15 +19,6 @@ "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", "url": "https://www.wsl.ch/" }, - "temporalCoverage": "1980-06-15/2018-07-15", - "spatialCoverage": { - "@type": "Place", - "name": "Global land surface areas", - "geo": { - "@type": "GeoShape", - "box": "-180.0,-60.0 180.0,90.0" - } - }, "keywords": [ "CHELSA", "drought", @@ -44,7 +35,16 @@ "unitText": "id", "description": "Identifier for multiyear meteorological drought events.", "temporalCoverage": "1980-06-15/2018-07-15", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -53,7 +53,16 @@ "unitText": "id", "description": "Identifier for multiyear meteorological drought events at 10 km resolution.", "temporalCoverage": "1980-06-15/2018-07-15", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -62,7 +71,16 @@ "unitText": "1", "description": "Annual anomaly in vegetation greenness derived from kernel normalized difference vegetation index.", "temporalCoverage": "1980-06-15/2018-07-15", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -71,7 +89,16 @@ "unitText": "1", "description": "Standardized climatic water balance index over a 12-month integration period.", "temporalCoverage": "1980-06-15/2018-07-15", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -80,7 +107,16 @@ "unitText": "1", "description": "Standardized precipitation anomaly index over a 12-month integration period.", "temporalCoverage": "1980-06-15/2018-07-15", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" } ], "license": "https://creativecommons.org/licenses/by/4.0/", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld index 63b2426..725a23f 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_monthly.jsonld @@ -19,15 +19,6 @@ "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", "url": "https://www.wsl.ch/" }, - "temporalCoverage": "1979-01-15/2021-12-15", - "spatialCoverage": { - "@type": "Place", - "name": "Global land surface areas", - "geo": { - "@type": "GeoShape", - "box": "-180.0,-60.0 180.0,90.0" - } - }, "keywords": [ "CHELSA", "monthly climate", @@ -44,7 +35,16 @@ "unitText": "percent", "description": "Total cloud area fraction, reported as a percentage, for the whole atmospheric column.", "temporalCoverage": "1979-01-15/2021-12-15", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -53,7 +53,16 @@ "unitText": "kg m-2 month-1", "description": "Monthly ratio of precipitation to potential evapotranspiration; indicator of climatic water availability.", "temporalCoverage": "1979-01-15/2021-12-15", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -62,7 +71,16 @@ "unitText": "percent", "description": "Relative humidity near the surface.", "temporalCoverage": "1979-01-15/2021-12-15", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -71,7 +89,16 @@ "unitText": "kg m-2 month-1", "description": "Total potential evapotranspiration for the month assuming unlimited water availability, calculated using Penman-Monteith.", "temporalCoverage": "1979-01-15/2021-12-15", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -80,7 +107,16 @@ "unitText": "kg m-2 month-1", "description": "Precipitation including liquid and solid phases.", "temporalCoverage": "1979-01-15/2021-12-15", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -89,7 +125,16 @@ "unitText": "W m-2", "description": "Surface solar irradiance for UV calculations.", "temporalCoverage": "1979-01-15/2021-12-15", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -98,7 +143,16 @@ "unitText": "m s-1", "description": "Near-surface, usually 10 meter, wind speed.", "temporalCoverage": "1979-01-15/2021-12-15", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -107,7 +161,16 @@ "unitText": "1", "description": "Standardized climatic water balance index over a 12-month integration period.", "temporalCoverage": "1979-01-15/2021-12-15", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -116,7 +179,16 @@ "unitText": "1", "description": "Standardized precipitation anomaly index over a 12-month integration period.", "temporalCoverage": "1979-01-15/2021-12-15", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -125,7 +197,16 @@ "unitText": "K", "description": "Near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1979-01-15/2021-12-15", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -134,7 +215,16 @@ "unitText": "K", "description": "Maximum near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1979-01-15/2021-12-15", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -143,7 +233,16 @@ "unitText": "K", "description": "Minimum near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1979-01-15/2021-12-15", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -152,7 +251,16 @@ "unitText": "Pa", "description": "Difference between saturation vapor pressure and actual vapor pressure.", "temporalCoverage": "1979-01-15/2021-12-15", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas", + "geo": { + "@type": "GeoShape", + "box": "-180.0,-60.0 180.0,90.0" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld index ceeb4f9..6c5032a 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial.jsonld @@ -19,11 +19,6 @@ "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", "url": "https://www.wsl.ch/" }, - "temporalCoverage": "21000BP/0BP", - "spatialCoverage": { - "@type": "Place", - "name": "Global land surface areas" - }, "keywords": [ "CHELSA", "TraCE21k", @@ -41,7 +36,12 @@ "unitText": "kg m-2 day-1", "description": "Precipitation including liquid and solid phases.", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -50,7 +50,12 @@ "unitText": "K", "description": "Maximum near-surface, usually 2 meter, air temperature.", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -59,7 +64,12 @@ "unitText": "K", "description": "Minimum near-surface, usually 2 meter, air temperature.", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -68,7 +78,12 @@ "unitText": "K m-1", "description": "Rate of change in air temperature with altitude calculated over the centennial period.", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld index fad1782..976c5f3 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_trace21k_centennial_bioclim.jsonld @@ -19,11 +19,6 @@ "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", "url": "https://www.wsl.ch/" }, - "temporalCoverage": "21000BP/0BP", - "spatialCoverage": { - "@type": "Place", - "name": "Global land surface areas" - }, "keywords": [ "CHELSA", "TraCE21k", @@ -40,7 +35,12 @@ "unitText": "K", "description": "Mean annual temperature calculated as the average of mean monthly temperatures over the year", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -49,7 +49,12 @@ "unitText": "K", "description": "Mean diurnal temperature range computed as the average of monthly daily maximum minus daily minimum near-surface air temperature", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -58,7 +63,12 @@ "unitText": "K", "description": "Isothermality: 100 × bio02 ÷ bio07; compares day-night variability to annual temperature range", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -67,7 +77,12 @@ "unitText": "K", "description": "Temperature seasonality given by the standard deviation of mean monthly temperatures", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -76,7 +91,12 @@ "unitText": "K", "description": "Highest monthly mean of daily maximum temperatures across the year; indicates peak thermal conditions", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -85,7 +105,12 @@ "unitText": "K", "description": "Lowest monthly mean of daily minimum temperatures across the year; characterizes winter cold intensity", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -94,7 +119,12 @@ "unitText": "K", "description": "Annual temperature range calculated as bio05 minus bio06; measures amplitude between warmest and coldest months", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -103,7 +133,12 @@ "unitText": "K", "description": "Average monthly mean temperature over the wettest three-month period of the year", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -112,7 +147,12 @@ "unitText": "K", "description": "Average monthly mean temperature over the driest three-month period of the year", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -121,7 +161,12 @@ "unitText": "K", "description": "Average monthly mean temperature over the warmest three-month period of the year", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -130,7 +175,12 @@ "unitText": "K", "description": "Average monthly mean temperature over the coldest three-month period of the year", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -139,7 +189,12 @@ "unitText": "kg m-2 year-1", "description": "Sum of monthly precipitation totals across the year", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -148,7 +203,12 @@ "unitText": "kg m-2 month-1", "description": "Maximum monthly precipitation total", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -157,7 +217,12 @@ "unitText": "kg m-2 month-1", "description": "Minimum monthly precipitation total", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -166,7 +231,12 @@ "unitText": "kg m-2", "description": "Coefficient of variation: 100 × standard deviation ÷ mean of monthly precipitation totals", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -175,7 +245,12 @@ "unitText": "kg m-2 month-1", "description": "Average monthly precipitation during the wettest three-month period of the year", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -184,7 +259,12 @@ "unitText": "kg m-2 month-1", "description": "Average monthly precipitation during the driest three-month period of the year", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -193,7 +273,12 @@ "unitText": "kg m-2 month-1", "description": "Average monthly precipitation during the warmest three-month period of the year", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -202,7 +287,12 @@ "unitText": "kg m-2 month-1", "description": "Average monthly precipitation during the coldest three-month period of the year", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -211,7 +301,12 @@ "unitText": "m", "description": "Elevation of the ice sheet surface above sea level.", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -220,7 +315,12 @@ "unitText": "m", "description": "Geometric height of the land surface above the geoid.", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -229,7 +329,12 @@ "unitText": "days", "description": "Number of days per year with snow cover present at the surface.", "temporalCoverage": "21000BP/0BP", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" } ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", diff --git a/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld b/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld index 720fb7c..b06091f 100644 --- a/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld +++ b/data/objects/summoned/generated/CHELSA/chelsa_w5e5_daily.jsonld @@ -19,11 +19,6 @@ "name": "WSL (Swiss Federal Institute for Forest, Snow and Landscape Research)", "url": "https://www.wsl.ch/" }, - "temporalCoverage": "1979-01-01/2016-12-31", - "spatialCoverage": { - "@type": "Place", - "name": "Global land surface areas" - }, "keywords": [ "CHELSA", "W5E5", @@ -41,7 +36,12 @@ "unitText": "kg m-2 day-1", "description": "Precipitation including liquid and solid phases.", "temporalCoverage": "1979-01-01/2016-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -50,7 +50,12 @@ "unitText": "W m-2", "description": "Surface solar irradiance for UV calculations.", "temporalCoverage": "1979-01-01/2016-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -59,7 +64,12 @@ "unitText": "K", "description": "Near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1979-01-01/2016-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -68,7 +78,12 @@ "unitText": "K", "description": "Maximum near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1979-01-01/2016-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", @@ -77,7 +92,12 @@ "unitText": "K", "description": "Minimum near-surface, usually 2 meter, air temperature.", "temporalCoverage": "1979-01-01/2016-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land surface areas" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" } ], "distribution": [ diff --git a/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld b/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld index b5caed1..156ebc0 100644 --- a/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld +++ b/data/objects/summoned/generated/Consensus_Land_Cover/consensus-land-cover.jsonld @@ -52,16 +52,6 @@ "GeoTIFF" ], "license": "https://creativecommons.org/licenses/by-nc/4.0/", - "spatialCoverage": { - "@type": "Place", - "name": "Global", - "geo": { - "@type": "GeoShape", - "box": "-180,-56 180,90" - } - }, - "spatialResolution": "30 arc-second (~1 km at the equator)", - "temporalCoverage": "1992-01-01/2006-12-31", "encodingFormat": [ "image/tiff", "application=geotiff" @@ -72,84 +62,192 @@ "name": "Evergreen and Deciduous Needleleaf Trees", "description": "Consensus prevalence (0–100%)", "temporalCoverage": "1992-01-01/2006-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-56 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "30 arc-second (~1 km at the equator)" }, { "@type": "PropertyValue", "name": "Evergreen Broadleaf Trees", "description": "Consensus prevalence (0–100%)", "temporalCoverage": "1992-01-01/2006-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-56 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "30 arc-second (~1 km at the equator)" }, { "@type": "PropertyValue", "name": "Deciduous Broadleaf Trees", "description": "Consensus prevalence (0–100%)", "temporalCoverage": "1992-01-01/2006-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-56 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "30 arc-second (~1 km at the equator)" }, { "@type": "PropertyValue", "name": "Mixed and Other Trees", "description": "Consensus prevalence (0–100%)", "temporalCoverage": "1992-01-01/2006-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-56 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "30 arc-second (~1 km at the equator)" }, { "@type": "PropertyValue", "name": "Shrubs", "description": "Consensus prevalence (0–100%)", "temporalCoverage": "1992-01-01/2006-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-56 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "30 arc-second (~1 km at the equator)" }, { "@type": "PropertyValue", "name": "Herbaceous Vegetation", "description": "Consensus prevalence (0–100%)", "temporalCoverage": "1992-01-01/2006-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-56 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "30 arc-second (~1 km at the equator)" }, { "@type": "PropertyValue", "name": "Cultivated and Managed Vegetation", "description": "Consensus prevalence (0–100%)", "temporalCoverage": "1992-01-01/2006-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-56 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "30 arc-second (~1 km at the equator)" }, { "@type": "PropertyValue", "name": "Regularly Flooded Vegetation", "description": "Consensus prevalence (0–100%)", "temporalCoverage": "1992-01-01/2006-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-56 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "30 arc-second (~1 km at the equator)" }, { "@type": "PropertyValue", "name": "Urban and Built-Up", "description": "Consensus prevalence (0–100%)", "temporalCoverage": "1992-01-01/2006-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-56 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "30 arc-second (~1 km at the equator)" }, { "@type": "PropertyValue", "name": "Snow and Ice", "description": "Consensus prevalence (0–100%)", "temporalCoverage": "1992-01-01/2006-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-56 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "30 arc-second (~1 km at the equator)" }, { "@type": "PropertyValue", "name": "Barren", "description": "Consensus prevalence (0–100%)", "temporalCoverage": "1992-01-01/2006-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-56 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "30 arc-second (~1 km at the equator)" }, { "@type": "PropertyValue", "name": "Open Water", "description": "Consensus prevalence (0–100%)", "temporalCoverage": "1992-01-01/2006-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-56 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "30 arc-second (~1 km at the equator)" } ], "measurementTechnique": "Integration of multiple global remote sensing-derived land-cover products to estimate per-class consensus prevalence (percent) at 1-km resolution.", diff --git a/data/objects/summoned/generated/FLO1K/flo1k.jsonld b/data/objects/summoned/generated/FLO1K/flo1k.jsonld index 48669f5..22a9737 100644 --- a/data/objects/summoned/generated/FLO1K/flo1k.jsonld +++ b/data/objects/summoned/generated/FLO1K/flo1k.jsonld @@ -65,38 +65,54 @@ } ], "isAccessibleForFree": true, - "temporalCoverage": "1960-01-01/2015-12-31", - "spatialCoverage": { - "@type": "Place", - "name": "Global land areas excluding Antarctica", - "geo": { - "@type": "GeoShape", - "box": "-180,-90 180,90" - } - }, - "spatialResolution": "30 arc-second (~1 km)", - "temporalResolution": "P1Y", "variableMeasured": [ { "@type": "PropertyValue", "name": "Mean annual streamflow", "description": "Mean annual flow for each year (gridded, global, ~1 km).", "temporalCoverage": "1960-01-01/2015-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land areas excluding Antarctica", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1Y", + "spatialResolution": "30 arc-second (~1 km)" }, { "@type": "PropertyValue", "name": "Maximum annual streamflow (maximum monthly flow per year)", "description": "Maximum monthly flow within each year (gridded, global, ~1 km).", "temporalCoverage": "1960-01-01/2015-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land areas excluding Antarctica", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1Y", + "spatialResolution": "30 arc-second (~1 km)" }, { "@type": "PropertyValue", "name": "Minimum annual streamflow (minimum monthly flow per year)", "description": "Minimum monthly flow within each year (gridded, global, ~1 km).", "temporalCoverage": "1960-01-01/2015-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land areas excluding Antarctica", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1Y", + "spatialResolution": "30 arc-second (~1 km)" } ], "encodingFormat": [ diff --git a/data/objects/summoned/generated/G-RUN/g-run.jsonld b/data/objects/summoned/generated/G-RUN/g-run.jsonld index 1e27d75..cb7f417 100644 --- a/data/objects/summoned/generated/G-RUN/g-run.jsonld +++ b/data/objects/summoned/generated/G-RUN/g-run.jsonld @@ -76,24 +76,22 @@ } ], "isAccessibleForFree": true, - "temporalCoverage": "1902-01-01/2014-12-31", - "spatialCoverage": { - "@type": "Place", - "name": "Global gridded land runoff reconstruction", - "geo": { - "@type": "GeoShape", - "box": "-180,-90 180,90" - } - }, - "spatialResolution": "0.5 degree", - "temporalResolution": "P1M", "variableMeasured": [ { "@type": "PropertyValue", "name": "Runoff", "description": "Monthly gridded runoff rates reconstructed using machine learning trained on in-situ streamflow observations.", "temporalCoverage": "1902-01-01/2014-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global gridded land runoff reconstruction", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1M", + "spatialResolution": "0.5 degree" } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/GFC/gfc.jsonld b/data/objects/summoned/generated/GFC/gfc.jsonld index 5d526a1..52acb4e 100644 --- a/data/objects/summoned/generated/GFC/gfc.jsonld +++ b/data/objects/summoned/generated/GFC/gfc.jsonld @@ -81,21 +81,6 @@ "url": "https://glad.umd.edu/" } ], - "temporalCoverage": "2000-01-01/2023-12-31", - "spatialCoverage": { - "@type": "Place", - "name": "Global (granules provided for 180W–180E, 80N–60S)", - "geo": { - "@type": "GeoShape", - "box": "-180,-60 180,80" - } - }, - "spatialResolution": { - "@type": "QuantitativeValue", - "value": 30, - "unitCode": "MTR", - "description": "Approx. 30 m (1 arc-second per pixel; Earth Engine catalog lists ~30.92 m pixel size)." - }, "keywords": [ "forest monitoring", "deforestation", @@ -124,7 +109,21 @@ "unitText": "percent", "description": "Tree canopy cover for year 2000, defined as canopy closure for vegetation taller than 5 m.", "temporalCoverage": "2000-01-01/2023-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global (granules provided for 180W–180E, 80N–60S)", + "geo": { + "@type": "GeoShape", + "box": "-180,-60 180,80" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitCode": "MTR", + "description": "Approx. 30 m (1 arc-second per pixel; Earth Engine catalog lists ~30.92 m pixel size)." + } }, { "@type": "PropertyValue", @@ -133,7 +132,21 @@ "unitText": "binary", "description": "Forest gain during 2000-2012.", "temporalCoverage": "2000-01-01/2023-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global (granules provided for 180W–180E, 80N–60S)", + "geo": { + "@type": "GeoShape", + "box": "-180,-60 180,80" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitCode": "MTR", + "description": "Approx. 30 m (1 arc-second per pixel; Earth Engine catalog lists ~30.92 m pixel size)." + } }, { "@type": "PropertyValue", @@ -142,7 +155,21 @@ "unitText": "year code", "description": "Year of gross forest cover loss event; 0 indicates no loss and 1-23 correspond primarily to 2001-2023.", "temporalCoverage": "2000-01-01/2023-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global (granules provided for 180W–180E, 80N–60S)", + "geo": { + "@type": "GeoShape", + "box": "-180,-60 180,80" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitCode": "MTR", + "description": "Approx. 30 m (1 arc-second per pixel; Earth Engine catalog lists ~30.92 m pixel size)." + } }, { "@type": "PropertyValue", @@ -151,7 +178,21 @@ "unitText": "class code", "description": "Data mask classes for no data, mapped land surface, and persistent water bodies.", "temporalCoverage": "2000-01-01/2023-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global (granules provided for 180W–180E, 80N–60S)", + "geo": { + "@type": "GeoShape", + "box": "-180,-60 180,80" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitCode": "MTR", + "description": "Approx. 30 m (1 arc-second per pixel; Earth Engine catalog lists ~30.92 m pixel size)." + } }, { "@type": "PropertyValue", @@ -160,7 +201,21 @@ "unitText": "digital number", "description": "Circa year-2000 Landsat cloud-free composite in red, NIR, SWIR1, and SWIR2 bands.", "temporalCoverage": "2000-01-01/2023-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global (granules provided for 180W–180E, 80N–60S)", + "geo": { + "@type": "GeoShape", + "box": "-180,-60 180,80" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitCode": "MTR", + "description": "Approx. 30 m (1 arc-second per pixel; Earth Engine catalog lists ~30.92 m pixel size)." + } }, { "@type": "PropertyValue", @@ -169,7 +224,21 @@ "unitText": "digital number", "description": "Circa year-2023 Landsat cloud-free composite in red, NIR, SWIR1, and SWIR2 bands.", "temporalCoverage": "2000-01-01/2023-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global (granules provided for 180W–180E, 80N–60S)", + "geo": { + "@type": "GeoShape", + "box": "-180,-60 180,80" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitCode": "MTR", + "description": "Approx. 30 m (1 arc-second per pixel; Earth Engine catalog lists ~30.92 m pixel size)." + } }, { "@type": "PropertyValue", @@ -178,7 +247,21 @@ "unitText": "derived binary", "description": "Derived indicator for pixels where lossyear is greater than 0.", "temporalCoverage": "2000-01-01/2023-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global (granules provided for 180W–180E, 80N–60S)", + "geo": { + "@type": "GeoShape", + "box": "-180,-60 180,80" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitCode": "MTR", + "description": "Approx. 30 m (1 arc-second per pixel; Earth Engine catalog lists ~30.92 m pixel size)." + } } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/GHSL/ghsl.jsonld b/data/objects/summoned/generated/GHSL/ghsl.jsonld index cdf8899..686b160 100644 --- a/data/objects/summoned/generated/GHSL/ghsl.jsonld +++ b/data/objects/summoned/generated/GHSL/ghsl.jsonld @@ -48,50 +48,86 @@ ], "isAccessibleForFree": true, "license": "https://creativecommons.org/licenses/by/4.0/", - "temporalCoverage": "1975-01-01/2030-12-31", - "spatialCoverage": { - "@type": "Place", - "name": "Global Human Settlement Layer product coverage", - "geo": { - "@type": "GeoShape", - "box": "-180,-90 180,90" - } - }, "variableMeasured": [ { "@type": "PropertyValue", "name": "Built-up surface", "description": "Gridded built-up surface area (including total and non-residential components for some products).", "temporalCoverage": "1975-01-01/2030-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global Human Settlement Layer product coverage", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", "name": "Built-up volume", "description": "Gridded built-up volume estimates (including total and non-residential components for some products).", "temporalCoverage": "1975-01-01/2030-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global Human Settlement Layer product coverage", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", "name": "Building height", "description": "Gridded building height estimates for selected reference years/products.", "temporalCoverage": "1975-01-01/2030-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global Human Settlement Layer product coverage", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", "name": "Population", "description": "Residential population grid (number of people per cell) for multiple epochs and projections.", "temporalCoverage": "1975-01-01/2030-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global Human Settlement Layer product coverage", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", "name": "Settlement Typology and Degree of Urbanisation", "description": "Settlement model classification based on the UN-recommended Degree of Urbanisation methodology.", "temporalCoverage": "1975-01-01/2030-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global Human Settlement Layer product coverage", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" } ], "encodingFormat": [ diff --git a/data/objects/summoned/generated/GPP_MOD17/gpp_mod17.jsonld b/data/objects/summoned/generated/GPP_MOD17/gpp_mod17.jsonld index 9280c68..75b9738 100644 --- a/data/objects/summoned/generated/GPP_MOD17/gpp_mod17.jsonld +++ b/data/objects/summoned/generated/GPP_MOD17/gpp_mod17.jsonld @@ -45,25 +45,34 @@ "name": "GPP", "description": "Gross Primary Production", "temporalCoverage": "2000-01-01/2013-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Earth's vegetated land surface", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", "name": "NPP", "description": "Net Primary Production", "temporalCoverage": "2000-01-01/2013-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Earth's vegetated land surface", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" } ], - "spatialCoverage": { - "@type": "Place", - "name": "Earth's vegetated land surface", - "geo": { - "@type": "GeoShape", - "box": "-180,-90 180,90" - } - }, - "temporalCoverage": "2000-01-01/2013-12-31", "encodingFormat": [ "application/x-hdf", "image/tiff", diff --git a/data/objects/summoned/generated/GRACE-REC/grace-rec.jsonld b/data/objects/summoned/generated/GRACE-REC/grace-rec.jsonld index 6045353..4276aa7 100644 --- a/data/objects/summoned/generated/GRACE-REC/grace-rec.jsonld +++ b/data/objects/summoned/generated/GRACE-REC/grace-rec.jsonld @@ -61,34 +61,44 @@ } ], "isAccessibleForFree": true, - "temporalCoverage": "1901-01-01/2019-12-31", - "spatialCoverage": { - "@type": "Place", - "name": "Global terrestrial water storage reconstruction", - "geo": { - "@type": "GeoShape", - "box": "-180,-90 180,90" - } - }, - "spatialResolution": "0.5 degree", - "temporalResolution": [ - "P1D", - "P1M" - ], "variableMeasured": [ { "@type": "PropertyValue", "name": "Terrestrial water storage anomaly", "description": "Reconstructed climate-driven terrestrial water storage anomalies (TWSA) derived from statistical modeling calibrated to GRACE observations.", "temporalCoverage": "1901-01-01/2019-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global terrestrial water storage reconstruction", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": [ + "P1D", + "P1M" + ], + "spatialResolution": "0.5 degree" }, { "@type": "PropertyValue", "name": "Ensemble members", "description": "Multiple ensemble realizations to quantify predictive uncertainty for reconstructed TWS anomalies.", "temporalCoverage": "1901-01-01/2019-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global terrestrial water storage reconstruction", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": [ + "P1D", + "P1M" + ], + "spatialResolution": "0.5 degree" } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/global-multi-layer-soil-moisture.jsonld b/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/global-multi-layer-soil-moisture.jsonld index 3d21e40..2b9e207 100644 --- a/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/global-multi-layer-soil-moisture.jsonld +++ b/data/objects/summoned/generated/Global_Multi-layer_Soil_Moisture/global-multi-layer-soil-moisture.jsonld @@ -57,45 +57,70 @@ } ], "isAccessibleForFree": true, - "temporalCoverage": "1970-01-01/2016-12-31", - "spatialCoverage": { - "@type": "Place", - "name": "Global land areas", - "geo": { - "@type": "GeoShape", - "box": "-180,-90 180,90" - } - }, - "spatialResolution": "0.5 degree", - "temporalResolution": "P1M", "variableMeasured": [ { "@type": "PropertyValue", "name": "Soil moisture (0–10 cm)", "description": "Monthly soil moisture for the 0–10 cm layer.", "temporalCoverage": "1970-01-01/2016-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land areas", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1M", + "spatialResolution": "0.5 degree" }, { "@type": "PropertyValue", "name": "Soil moisture (10–30 cm)", "description": "Monthly soil moisture for the 10–30 cm layer.", "temporalCoverage": "1970-01-01/2016-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land areas", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1M", + "spatialResolution": "0.5 degree" }, { "@type": "PropertyValue", "name": "Soil moisture (30–50 cm)", "description": "Monthly soil moisture for the 30–50 cm layer.", "temporalCoverage": "1970-01-01/2016-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land areas", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1M", + "spatialResolution": "0.5 degree" }, { "@type": "PropertyValue", "name": "Soil moisture (50–100 cm)", "description": "Monthly soil moisture for the 50–100 cm layer.", "temporalCoverage": "1970-01-01/2016-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land areas", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1M", + "spatialResolution": "0.5 degree" } ], "encodingFormat": [ diff --git a/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld b/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld index 9cc20ef..2aa5ea0 100644 --- a/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld +++ b/data/objects/summoned/generated/Global_Tree_Density/global-tree-density.jsonld @@ -67,16 +67,6 @@ } }, "datePublished": "2015-09-02", - "temporalCoverage": "2015-09-02", - "spatialCoverage": { - "@type": "Place", - "name": "Global", - "geo": { - "@type": "GeoShape", - "box": "-180,-90 180,90" - } - }, - "spatialResolution": "Best suited for country-scale (or larger) summaries; pixel-level precision is less reliable per creators' guidance.", "keywords": [ "tree density", "forest", @@ -96,14 +86,32 @@ "name": "Tree density (biome-level model)", "description": "Biome-level regression model predictions applied at biome scale.", "temporalCoverage": "2015-09-02", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "Best suited for country-scale (or larger) summaries; pixel-level precision is less reliable per creators' guidance." }, { "@type": "PropertyValue", "name": "Tree density (ecoregion-level model)", "description": "Ecoregion-level regression model predictions applied at ecoregion scale.", "temporalCoverage": "2015-09-02", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "not detected", + "spatialResolution": "Best suited for country-scale (or larger) summaries; pixel-level precision is less reliable per creators' guidance." } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/HydroSHEDS/hydrosheds.jsonld b/data/objects/summoned/generated/HydroSHEDS/hydrosheds.jsonld index 0954cc1..4fe7a98 100644 --- a/data/objects/summoned/generated/HydroSHEDS/hydrosheds.jsonld +++ b/data/objects/summoned/generated/HydroSHEDS/hydrosheds.jsonld @@ -50,78 +50,150 @@ } ], "isAccessibleForFree": true, - "spatialCoverage": { - "@type": "Place", - "name": "Global and regional hydrographic products; product-specific coverage varies" - }, - "spatialResolution": [ - "3 arc-second", - "15 arc-second", - "30 arc-second" - ], "variableMeasured": [ { "@type": "PropertyValue", "name": "Catchment and Sub-Basin Boundaries", "description": "Vector catchment and sub-basin boundary products derived from HydroSHEDS hydrography.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global and regional hydrographic products; product-specific coverage varies" + }, + "temporalResolution": "Static", + "spatialResolution": [ + "3 arc-second", + "15 arc-second", + "30 arc-second" + ] }, { "@type": "PropertyValue", "name": "River networks", "description": "Vector river/stream network products derived from HydroSHEDS hydrography.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global and regional hydrographic products; product-specific coverage varies" + }, + "temporalResolution": "Static", + "spatialResolution": [ + "3 arc-second", + "15 arc-second", + "30 arc-second" + ] }, { "@type": "PropertyValue", "name": "Lakes and water bodies", "description": "Lake and water body products distributed as part of the HydroSHEDS product suite.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global and regional hydrographic products; product-specific coverage varies" + }, + "temporalResolution": "Static", + "spatialResolution": [ + "3 arc-second", + "15 arc-second", + "30 arc-second" + ] }, { "@type": "PropertyValue", "name": "Void-filled DEM", "description": "Digital elevation model underpinning HydroSHEDS core layers.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global and regional hydrographic products; product-specific coverage varies" + }, + "temporalResolution": "Static", + "spatialResolution": [ + "3 arc-second", + "15 arc-second", + "30 arc-second" + ] }, { "@type": "PropertyValue", "name": "Conditioned DEM", "description": "Hydrologically conditioned DEM used to derive flow products.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global and regional hydrographic products; product-specific coverage varies" + }, + "temporalResolution": "Static", + "spatialResolution": [ + "3 arc-second", + "15 arc-second", + "30 arc-second" + ] }, { "@type": "PropertyValue", "name": "Flow direction", "description": "Drainage direction grid derived from the conditioned DEM.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global and regional hydrographic products; product-specific coverage varies" + }, + "temporalResolution": "Static", + "spatialResolution": [ + "3 arc-second", + "15 arc-second", + "30 arc-second" + ] }, { "@type": "PropertyValue", "name": "Flow accumulation", "description": "Upstream contributing area / upstream cell count derived from flow direction.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global and regional hydrographic products; product-specific coverage varies" + }, + "temporalResolution": "Static", + "spatialResolution": [ + "3 arc-second", + "15 arc-second", + "30 arc-second" + ] }, { "@type": "PropertyValue", "name": "Flow length", "description": "Upstream and/or downstream flow length derived from flow direction.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global and regional hydrographic products; product-specific coverage varies" + }, + "temporalResolution": "Static", + "spatialResolution": [ + "3 arc-second", + "15 arc-second", + "30 arc-second" + ] }, { "@type": "PropertyValue", "name": "Land mask and sinks", "description": "Land/ocean mask and coastal/inland sink indicators used in HydroSHEDS processing.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global and regional hydrographic products; product-specific coverage varies" + }, + "temporalResolution": "Static", + "spatialResolution": [ + "3 arc-second", + "15 arc-second", + "30 arc-second" + ] } ], "encodingFormat": [ diff --git a/data/objects/summoned/generated/Hydrography90m/hydrography90m.jsonld b/data/objects/summoned/generated/Hydrography90m/hydrography90m.jsonld index 8aec239..5cba286 100644 --- a/data/objects/summoned/generated/Hydrography90m/hydrography90m.jsonld +++ b/data/objects/summoned/generated/Hydrography90m/hydrography90m.jsonld @@ -82,67 +82,139 @@ ], "datePublished": "2022-08-09", "isAccessibleForFree": true, - "spatialCoverage": { - "@type": "Place", - "name": "Global", - "geo": { - "@type": "GeoShape", - "box": "-180,-60 180,90" - } - }, - "spatialResolution": [ - "3 arc-second (~90 m at the equator)", - "90 m" - ], "variableMeasured": [ { "@type": "PropertyValue", "name": "Stream channels", "description": "Global stream channel network with unique segment identifiers and topology attributes.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-60 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": [ + "3 arc-second (~90 m at the equator)", + "90 m" + ] }, { "@type": "PropertyValue", "name": "Drainage basins", "description": "Global drainage basin delineations derived from flow routing.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-60 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": [ + "3 arc-second (~90 m at the equator)", + "90 m" + ] }, { "@type": "PropertyValue", "name": "Sub-catchments", "description": "Sub-catchment polygons linked to individual stream segments.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-60 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": [ + "3 arc-second (~90 m at the equator)", + "90 m" + ] }, { "@type": "PropertyValue", "name": "Network topology", "description": "Upstream/downstream connectivity and routing attributes for stream segments.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-60 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": [ + "3 arc-second (~90 m at the equator)", + "90 m" + ] }, { "@type": "PropertyValue", "name": "Stream order", "description": "Stream order metrics computed for the network.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-60 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": [ + "3 arc-second (~90 m at the equator)", + "90 m" + ] }, { "@type": "PropertyValue", "name": "Stream slope", "description": "Slope metrics computed along stream segments.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-60 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": [ + "3 arc-second (~90 m at the equator)", + "90 m" + ] }, { "@type": "PropertyValue", "name": "Distance metrics", "description": "In-stream and among-stream distance measures for network analysis.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global", + "geo": { + "@type": "GeoShape", + "box": "-180,-60 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": [ + "3 arc-second (~90 m at the equator)", + "90 m" + ] } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld b/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld index 6116161..ccdfe5d 100644 --- a/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld +++ b/data/objects/summoned/generated/MERIT_DEM/merit-dem.jsonld @@ -56,14 +56,6 @@ "url": "https://opendatacommons.org/licenses/odbl/1-0/" } ], - "spatialCoverage": { - "@type": "Place", - "name": "Global land areas between 90°N and 60°S", - "geo": { - "@type": "GeoShape", - "box": "-180,-60 180,90" - } - }, "variableMeasured": [ { "@type": "PropertyValue", @@ -71,7 +63,16 @@ "unitText": "meter", "description": "Terrain elevation in meters referenced to WGS84 and the EGM96 geoid.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global land areas between 90°N and 60°S", + "geo": { + "@type": "GeoShape", + "box": "-180,-60 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": "not detected" } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld b/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld index bfaac60..fb2888c 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/annual-nlcd.jsonld @@ -17,20 +17,6 @@ "url": "https://www.mrlc.gov/" }, "version": "Collection 1.1 (ver. 1.1, June 2025)", - "temporalCoverage": "1985-01-01/2024-12-31", - "spatialCoverage": { - "@type": "Place", - "name": "Conterminous United States (CONUS)", - "geo": { - "@type": "GeoShape", - "box": "-125,24 -66,50" - } - }, - "spatialResolution": { - "@type": "QuantitativeValue", - "value": 30, - "unitText": "m" - }, "keywords": [ "Annual NLCD", "NLCD", @@ -46,37 +32,115 @@ "@type": "PropertyValue", "name": "Land Cover", "temporalCoverage": "1985-01-01/2024-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Conterminous United States (CONUS)", + "geo": { + "@type": "GeoShape", + "box": "-125,24 -66,50" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } }, { "@type": "PropertyValue", "name": "Land Cover Change", "temporalCoverage": "1985-01-01/2024-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Conterminous United States (CONUS)", + "geo": { + "@type": "GeoShape", + "box": "-125,24 -66,50" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } }, { "@type": "PropertyValue", "name": "Land Cover Confidence", "temporalCoverage": "1985-01-01/2024-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Conterminous United States (CONUS)", + "geo": { + "@type": "GeoShape", + "box": "-125,24 -66,50" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } }, { "@type": "PropertyValue", "name": "Fractional Impervious Surface", "temporalCoverage": "1985-01-01/2024-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Conterminous United States (CONUS)", + "geo": { + "@type": "GeoShape", + "box": "-125,24 -66,50" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } }, { "@type": "PropertyValue", "name": "Impervious Descriptor", "temporalCoverage": "1985-01-01/2024-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Conterminous United States (CONUS)", + "geo": { + "@type": "GeoShape", + "box": "-125,24 -66,50" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } }, { "@type": "PropertyValue", "name": "Spectral Change Day of Year", "temporalCoverage": "1985-01-01/2024-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Conterminous United States (CONUS)", + "geo": { + "@type": "GeoShape", + "box": "-125,24 -66,50" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld b/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld index 8785233..771b77a 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/exotic-annual-grass.jsonld @@ -16,20 +16,6 @@ "name": "Multi-Resolution Land Characteristics (MRLC) Consortium", "url": "https://www.mrlc.gov/" }, - "temporalCoverage": "2016-01-01/2025-12-31", - "spatialCoverage": { - "@type": "Place", - "name": "Western United States (arid and semi-arid rangelands)", - "geo": { - "@type": "GeoShape", - "box": "-130,30 -95,50" - } - }, - "spatialResolution": { - "@type": "QuantitativeValue", - "value": 30, - "unitText": "m" - }, "keywords": [ "Exotic Annual Grass", "EAG", @@ -45,37 +31,115 @@ "@type": "PropertyValue", "name": "EAG fractional cover (multiple species group)", "temporalCoverage": "2016-01-01/2025-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Western United States (arid and semi-arid rangelands)", + "geo": { + "@type": "GeoShape", + "box": "-130,30 -95,50" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } }, { "@type": "PropertyValue", "name": "Cheatgrass (Bromus tectorum) fractional cover", "temporalCoverage": "2016-01-01/2025-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Western United States (arid and semi-arid rangelands)", + "geo": { + "@type": "GeoShape", + "box": "-130,30 -95,50" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } }, { "@type": "PropertyValue", "name": "Other Bromus spp. fractional cover (Field brome + Japanese brome)", "temporalCoverage": "2016-01-01/2025-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Western United States (arid and semi-arid rangelands)", + "geo": { + "@type": "GeoShape", + "box": "-130,30 -95,50" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } }, { "@type": "PropertyValue", "name": "Medusahead (Taeniatherum caput-medusae) fractional cover", "temporalCoverage": "2016-01-01/2025-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Western United States (arid and semi-arid rangelands)", + "geo": { + "@type": "GeoShape", + "box": "-130,30 -95,50" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } }, { "@type": "PropertyValue", "name": "Sandberg bluegrass (Poa secunda) fractional cover", "temporalCoverage": "2016-01-01/2025-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Western United States (arid and semi-arid rangelands)", + "geo": { + "@type": "GeoShape", + "box": "-130,30 -95,50" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } }, { "@type": "PropertyValue", "name": "Confidence maps (per target)", "temporalCoverage": "2016-01-01/2025-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Western United States (arid and semi-arid rangelands)", + "geo": { + "@type": "GeoShape", + "box": "-130,30 -95,50" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld b/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld index a15f489..8d4690d 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/legacy-nlcd.jsonld @@ -16,19 +16,6 @@ "name": "Multi-Resolution Land Characteristics (MRLC) Consortium", "url": "https://www.mrlc.gov/" }, - "spatialCoverage": { - "@type": "Place", - "name": "United States (nationwide coverage; includes Alaska, Hawaii, and other U.S. territories depending on product)", - "geo": { - "@type": "GeoShape", - "box": "-180,15 -50,72" - } - }, - "spatialResolution": { - "@type": "QuantitativeValue", - "value": 30, - "unitText": "m" - }, "keywords": [ "NLCD", "legacy NLCD", @@ -44,25 +31,77 @@ "@type": "PropertyValue", "name": "Land cover class (16-class legend)", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "United States (nationwide coverage; includes Alaska, Hawaii, and other U.S. territories depending on product)", + "geo": { + "@type": "GeoShape", + "box": "-180,15 -50,72" + } + }, + "temporalResolution": "Static", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } }, { "@type": "PropertyValue", "name": "Land cover change (varies by product)", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "United States (nationwide coverage; includes Alaska, Hawaii, and other U.S. territories depending on product)", + "geo": { + "@type": "GeoShape", + "box": "-180,15 -50,72" + } + }, + "temporalResolution": "Static", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } }, { "@type": "PropertyValue", "name": "Percent impervious surface (selected releases)", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "United States (nationwide coverage; includes Alaska, Hawaii, and other U.S. territories depending on product)", + "geo": { + "@type": "GeoShape", + "box": "-180,15 -50,72" + } + }, + "temporalResolution": "Static", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } }, { "@type": "PropertyValue", "name": "Tree canopy cover (selected releases)", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "United States (nationwide coverage; includes Alaska, Hawaii, and other U.S. territories depending on product)", + "geo": { + "@type": "GeoShape", + "box": "-180,15 -50,72" + } + }, + "temporalResolution": "Static", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld b/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld index acc7836..4ee9d8a 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/nalcms.jsonld @@ -33,20 +33,6 @@ "name": "U.S. Geological Survey (USGS)", "url": "https://www.usgs.gov/" }, - "temporalCoverage": "2019-01-01/2021-12-31", - "spatialCoverage": { - "@type": "Place", - "name": "North America (Canada, United States, Mexico)", - "geo": { - "@type": "GeoShape", - "box": "-180,15 -50,85" - } - }, - "spatialResolution": { - "@type": "QuantitativeValue", - "value": 30, - "unitText": "m" - }, "keywords": [ "NALCMS", "North America", @@ -60,7 +46,20 @@ "@type": "PropertyValue", "name": "Land cover class", "temporalCoverage": "2019-01-01/2021-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "North America (Canada, United States, Mexico)", + "geo": { + "@type": "GeoShape", + "box": "-180,15 -50,85" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld b/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld index 7dff542..a7cc90e 100644 --- a/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld +++ b/data/objects/summoned/generated/MRLC_NLCD/rcmap.jsonld @@ -16,20 +16,6 @@ "name": "Multi-Resolution Land Characteristics (MRLC) Consortium", "url": "https://www.mrlc.gov/" }, - "temporalCoverage": "1985-01-01/2024-12-31", - "spatialCoverage": { - "@type": "Place", - "name": "Western North America (rangelands)", - "geo": { - "@type": "GeoShape", - "box": "-130,25 -95,60" - } - }, - "spatialResolution": { - "@type": "QuantitativeValue", - "value": 30, - "unitText": "m" - }, "keywords": [ "RCMAP", "rangeland", @@ -45,37 +31,115 @@ "@type": "PropertyValue", "name": "Bare ground cover (percent)", "temporalCoverage": "1985-01-01/2024-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Western North America (rangelands)", + "geo": { + "@type": "GeoShape", + "box": "-130,25 -95,60" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } }, { "@type": "PropertyValue", "name": "Herbaceous cover (percent)", "temporalCoverage": "1985-01-01/2024-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Western North America (rangelands)", + "geo": { + "@type": "GeoShape", + "box": "-130,25 -95,60" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } }, { "@type": "PropertyValue", "name": "Litter cover (percent)", "temporalCoverage": "1985-01-01/2024-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Western North America (rangelands)", + "geo": { + "@type": "GeoShape", + "box": "-130,25 -95,60" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } }, { "@type": "PropertyValue", "name": "Shrub cover (percent)", "temporalCoverage": "1985-01-01/2024-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Western North America (rangelands)", + "geo": { + "@type": "GeoShape", + "box": "-130,25 -95,60" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } }, { "@type": "PropertyValue", "name": "Sagebrush cover (percent)", "temporalCoverage": "1985-01-01/2024-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Western North America (rangelands)", + "geo": { + "@type": "GeoShape", + "box": "-130,25 -95,60" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } }, { "@type": "PropertyValue", "name": "Tree cover (percent)", "temporalCoverage": "1985-01-01/2024-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Western North America (rangelands)", + "geo": { + "@type": "GeoShape", + "box": "-130,25 -95,60" + } + }, + "temporalResolution": "not detected", + "spatialResolution": { + "@type": "QuantitativeValue", + "value": 30, + "unitText": "m" + } } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/Shale_Network/shale-network.jsonld b/data/objects/summoned/generated/Shale_Network/shale-network.jsonld index f9a1da0..9b6188b 100644 --- a/data/objects/summoned/generated/Shale_Network/shale-network.jsonld +++ b/data/objects/summoned/generated/Shale_Network/shale-network.jsonld @@ -56,46 +56,66 @@ } ], "isAccessibleForFree": true, - "temporalCoverage": "2011-01-01/..", - "spatialCoverage": { - "@type": "Place", - "name": "Energy production regions, primarily the northeastern United States; see dataset portals for exact site coverage" - }, "variableMeasured": [ { "@type": "PropertyValue", "name": "Common water quality measurements", "description": "Common field and laboratory measurements such as pH, major ions (Na, K, Mg, Ca, sulfate, chloride, bromide), nutrients (ammonium, nitrate, nitrite, total N), alkalinity/acidity, hardness, TDS, and related parameters.", "temporalCoverage": "2011-01-01/..", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Energy production regions, primarily the northeastern United States; see dataset portals for exact site coverage" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", "name": "Trace elements", "description": "Trace element concentrations such as Al, As, Ba, B, Cd, Cr, Co, Cu, Fe, Pb, Li, Mn, Hg, Mo, Ni, Se, Ag, Sr, Th, U, Zn (availability varies by site and dataset).", "temporalCoverage": "2011-01-01/..", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Energy production regions, primarily the northeastern United States; see dataset portals for exact site coverage" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", "name": "Naturally occurring radioactive material (NORM)", "description": "Radiological measurements such as gross alpha/beta, Ra-226, and Ra-228 (availability varies).", "temporalCoverage": "2011-01-01/..", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Energy production regions, primarily the northeastern United States; see dataset portals for exact site coverage" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", "name": "Organic constituents", "description": "Organic compounds and indicators such as benzene, toluene, ethylbenzene, xylenes, naphthalene, oil and grease, phenolics, and related constituents (availability varies).", "temporalCoverage": "2011-01-01/..", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Energy production regions, primarily the northeastern United States; see dataset portals for exact site coverage" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", "name": "Water quantity (where available)", "description": "Water quantity observations associated with monitoring sites where contributed and published through HIS services.", "temporalCoverage": "2011-01-01/..", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Energy production regions, primarily the northeastern United States; see dataset portals for exact site coverage" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/SoilGrids2/soilgrids2.jsonld b/data/objects/summoned/generated/SoilGrids2/soilgrids2.jsonld index 7c72dd4..3689bb4 100644 --- a/data/objects/summoned/generated/SoilGrids2/soilgrids2.jsonld +++ b/data/objects/summoned/generated/SoilGrids2/soilgrids2.jsonld @@ -46,15 +46,6 @@ "url": "https://www.isric.org/" } ], - "spatialCoverage": { - "@type": "Place", - "name": "Global soil property maps", - "geo": { - "@type": "GeoShape", - "box": "-180,-90 180,90" - } - }, - "spatialResolution": "Approximately 250 m", "variableMeasured": [ { "@type": "PropertyValue", @@ -63,7 +54,16 @@ "unitText": "cg/cm^3", "description": "Predicted bulk density maps for standard SoilGrids depth intervals.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global soil property maps", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": "Approximately 250 m" }, { "@type": "PropertyValue", @@ -72,7 +72,16 @@ "unitText": "mmol(c)/kg", "description": "Predicted cation exchange capacity buffered at pH 7 for standard SoilGrids depth intervals.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global soil property maps", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": "Approximately 250 m" }, { "@type": "PropertyValue", @@ -81,7 +90,16 @@ "unitText": "cm^3/dm^3", "description": "Predicted volumetric fraction of coarse fragments for standard SoilGrids depth intervals.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global soil property maps", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": "Approximately 250 m" }, { "@type": "PropertyValue", @@ -90,7 +108,16 @@ "unitText": "g/kg", "description": "Predicted clay content for standard SoilGrids depth intervals.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global soil property maps", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": "Approximately 250 m" }, { "@type": "PropertyValue", @@ -99,7 +126,16 @@ "unitText": "cg/kg", "description": "Predicted total nitrogen for standard SoilGrids depth intervals.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global soil property maps", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": "Approximately 250 m" }, { "@type": "PropertyValue", @@ -108,7 +144,16 @@ "unitText": "hg/m^3", "description": "Predicted organic carbon density for standard SoilGrids depth intervals.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global soil property maps", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": "Approximately 250 m" }, { "@type": "PropertyValue", @@ -117,7 +162,16 @@ "unitText": "t/ha", "description": "Predicted organic carbon stocks for SoilGrids soil layers.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global soil property maps", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": "Approximately 250 m" }, { "@type": "PropertyValue", @@ -126,7 +180,16 @@ "unitText": "dg/kg", "description": "Predicted soil organic carbon concentration for standard SoilGrids depth intervals.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global soil property maps", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": "Approximately 250 m" }, { "@type": "PropertyValue", @@ -135,7 +198,16 @@ "unitText": "pH x 10", "description": "Predicted soil pH in water for standard SoilGrids depth intervals.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global soil property maps", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": "Approximately 250 m" }, { "@type": "PropertyValue", @@ -144,7 +216,16 @@ "unitText": "g/kg", "description": "Predicted sand content for standard SoilGrids depth intervals.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global soil property maps", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": "Approximately 250 m" }, { "@type": "PropertyValue", @@ -153,7 +234,16 @@ "unitText": "g/kg", "description": "Predicted silt content for standard SoilGrids depth intervals.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global soil property maps", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": "Approximately 250 m" }, { "@type": "PropertyValue", @@ -162,7 +252,16 @@ "unitText": "10^-3 cm^3 cm^-3", "description": "Predicted volumetric water content at 10 kPa for standard SoilGrids depth intervals.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global soil property maps", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": "Approximately 250 m" }, { "@type": "PropertyValue", @@ -171,7 +270,16 @@ "unitText": "10^-3 cm^3 cm^-3", "description": "Predicted volumetric water content at 1500 kPa for standard SoilGrids depth intervals.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global soil property maps", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": "Approximately 250 m" }, { "@type": "PropertyValue", @@ -180,7 +288,16 @@ "unitText": "10^-3 cm^3 cm^-3", "description": "Predicted volumetric water content at 33 kPa for standard SoilGrids depth intervals.", "temporalCoverage": "Static", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global soil property maps", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "Static", + "spatialResolution": "Approximately 250 m" } ], "encodingFormat": [ diff --git a/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld b/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld index ab8880f..a8a1de6 100644 --- a/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld +++ b/data/objects/summoned/generated/TerraClimate/terraclimate.jsonld @@ -78,20 +78,6 @@ "name": "CC0 1.0 Universal (Public Domain Dedication)", "url": "https://creativecommons.org/publicdomain/zero/1.0/" }, - "temporalCoverage": [ - "1950-01-01/..", - "1950-01-01/2025-12-31" - ], - "spatialCoverage": { - "@type": "Place", - "name": "Global terrestrial land surfaces", - "geo": { - "@type": "GeoShape", - "box": "-180,-90 180,90" - } - }, - "spatialResolution": "1/24 degree (~4 km)", - "temporalResolution": "P1M", "encodingFormat": [ "application/x-netcdf", "text/html" @@ -105,7 +91,16 @@ "1950-01-01/..", "1950-01-01/2025-12-31" ], - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global terrestrial land surfaces", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1M", + "spatialResolution": "1/24 degree (~4 km)" }, { "@type": "PropertyValue", @@ -115,7 +110,16 @@ "1950-01-01/..", "1950-01-01/2025-12-31" ], - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global terrestrial land surfaces", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1M", + "spatialResolution": "1/24 degree (~4 km)" }, { "@type": "PropertyValue", @@ -125,7 +129,16 @@ "1950-01-01/..", "1950-01-01/2025-12-31" ], - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global terrestrial land surfaces", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1M", + "spatialResolution": "1/24 degree (~4 km)" }, { "@type": "PropertyValue", @@ -135,7 +148,16 @@ "1950-01-01/..", "1950-01-01/2025-12-31" ], - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global terrestrial land surfaces", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1M", + "spatialResolution": "1/24 degree (~4 km)" }, { "@type": "PropertyValue", @@ -145,7 +167,16 @@ "1950-01-01/..", "1950-01-01/2025-12-31" ], - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global terrestrial land surfaces", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1M", + "spatialResolution": "1/24 degree (~4 km)" }, { "@type": "PropertyValue", @@ -155,7 +186,16 @@ "1950-01-01/..", "1950-01-01/2025-12-31" ], - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global terrestrial land surfaces", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1M", + "spatialResolution": "1/24 degree (~4 km)" }, { "@type": "PropertyValue", @@ -165,7 +205,16 @@ "1950-01-01/..", "1950-01-01/2025-12-31" ], - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global terrestrial land surfaces", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1M", + "spatialResolution": "1/24 degree (~4 km)" }, { "@type": "PropertyValue", @@ -175,7 +224,16 @@ "1950-01-01/..", "1950-01-01/2025-12-31" ], - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global terrestrial land surfaces", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1M", + "spatialResolution": "1/24 degree (~4 km)" }, { "@type": "PropertyValue", @@ -185,7 +243,16 @@ "1950-01-01/..", "1950-01-01/2025-12-31" ], - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global terrestrial land surfaces", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1M", + "spatialResolution": "1/24 degree (~4 km)" }, { "@type": "PropertyValue", @@ -195,7 +262,16 @@ "1950-01-01/..", "1950-01-01/2025-12-31" ], - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global terrestrial land surfaces", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1M", + "spatialResolution": "1/24 degree (~4 km)" }, { "@type": "PropertyValue", @@ -205,7 +281,16 @@ "1950-01-01/..", "1950-01-01/2025-12-31" ], - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global terrestrial land surfaces", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1M", + "spatialResolution": "1/24 degree (~4 km)" }, { "@type": "PropertyValue", @@ -215,7 +300,16 @@ "1950-01-01/..", "1950-01-01/2025-12-31" ], - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global terrestrial land surfaces", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1M", + "spatialResolution": "1/24 degree (~4 km)" }, { "@type": "PropertyValue", @@ -225,7 +319,16 @@ "1950-01-01/..", "1950-01-01/2025-12-31" ], - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global terrestrial land surfaces", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1M", + "spatialResolution": "1/24 degree (~4 km)" }, { "@type": "PropertyValue", @@ -235,7 +338,16 @@ "1950-01-01/..", "1950-01-01/2025-12-31" ], - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Global terrestrial land surfaces", + "geo": { + "@type": "GeoShape", + "box": "-180,-90 180,90" + } + }, + "temporalResolution": "P1M", + "spatialResolution": "1/24 degree (~4 km)" } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/WATERBASE/waterbase.jsonld b/data/objects/summoned/generated/WATERBASE/waterbase.jsonld index a0c6a53..c9d1dd3 100644 --- a/data/objects/summoned/generated/WATERBASE/waterbase.jsonld +++ b/data/objects/summoned/generated/WATERBASE/waterbase.jsonld @@ -51,32 +51,42 @@ "version": "01.00", "isAccessibleForFree": true, "license": "https://creativecommons.org/licenses/by/4.0/", - "temporalCoverage": "1900-01-01/2024-12-31", - "spatialCoverage": { - "@type": "Place", - "name": "Europe, EEA member countries, and cooperating reporting countries; see dataset documentation for exact country coverage" - }, "variableMeasured": [ { "@type": "PropertyValue", "name": "Water quality parameters", "description": "Measured and aggregated parameters describing chemical and physico-chemical water quality in inland and coastal/marine waters (including nutrients, organic matter, hazardous substances, pesticides and other chemicals).", "temporalCoverage": "1900-01-01/2024-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Europe, EEA member countries, and cooperating reporting countries; see dataset documentation for exact country coverage" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", "name": "Monitoring locations and water bodies", "description": "Spatial identifiers and associated attributes for monitoring sites and water bodies reported through WISE and WFD/WISE spatial reporting.", "temporalCoverage": "1900-01-01/2024-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Europe, EEA member countries, and cooperating reporting countries; see dataset documentation for exact country coverage" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", "name": "Ecological and chemical status (where applicable)", "description": "Reported status and classification attributes associated with monitored waters and water bodies, as provided in reporting streams.", "temporalCoverage": "1900-01-01/2024-12-31", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "Europe, EEA member countries, and cooperating reporting countries; see dataset documentation for exact country coverage" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" } ], "measurementTechnique": [ diff --git a/data/objects/summoned/generated/Water_Quality_Portal/water-quality-portal.jsonld b/data/objects/summoned/generated/Water_Quality_Portal/water-quality-portal.jsonld index 0d82f95..9507028 100644 --- a/data/objects/summoned/generated/Water_Quality_Portal/water-quality-portal.jsonld +++ b/data/objects/summoned/generated/Water_Quality_Portal/water-quality-portal.jsonld @@ -69,60 +69,90 @@ } ], "isAccessibleForFree": true, - "temporalCoverage": "1900-01-01/..", - "spatialCoverage": { - "@type": "Place", - "name": "United States and beyond; coverage depends on contributing organizations and query parameters" - }, "variableMeasured": [ { "@type": "PropertyValue", "name": "Water quality results", "description": "Discrete sample results including measured values, units, methods, and qualifiers for physical, chemical, and biological characteristics.", "temporalCoverage": "1900-01-01/..", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "United States and beyond; coverage depends on contributing organizations and query parameters" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", "name": "Station and Monitoring Location Metadata", "description": "Locations where samples and observations were collected, including identifiers, coordinates, and site descriptors.", "temporalCoverage": "1900-01-01/..", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "United States and beyond; coverage depends on contributing organizations and query parameters" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", "name": "Nutrients", "description": "Nutrient-related characteristics such as nitrogen and phosphorus species.", "temporalCoverage": "1900-01-01/..", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "United States and beyond; coverage depends on contributing organizations and query parameters" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", "name": "Metals and trace elements", "description": "Metals and trace elements measured in water, sediment, or related matrices.", "temporalCoverage": "1900-01-01/..", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "United States and beyond; coverage depends on contributing organizations and query parameters" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", "name": "Organic contaminants and pesticides", "description": "Organic contaminants, pesticides, and related analytes reported by contributing organizations.", "temporalCoverage": "1900-01-01/..", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "United States and beyond; coverage depends on contributing organizations and query parameters" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", "name": "Physical parameters", "description": "Physical characteristics such as temperature, specific conductance, turbidity, and dissolved oxygen (where available).", "temporalCoverage": "1900-01-01/..", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "United States and beyond; coverage depends on contributing organizations and query parameters" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" }, { "@type": "PropertyValue", "name": "Biological data", "description": "Biological observations and metrics available through WQP services where reported by data providers.", "temporalCoverage": "1900-01-01/..", - "spatialCoverage": "not detected" + "spatialCoverage": { + "@type": "Place", + "name": "United States and beyond; coverage depends on contributing organizations and query parameters" + }, + "temporalResolution": "not detected", + "spatialResolution": "not detected" } ], "encodingFormat": [