Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions .zenodo.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"title": "GeneLab Benchmark: A Multi-Tissue Spaceflight Transcriptomics Benchmark for AI/ML Models",
"title": "SpaceBio-Bench / GeneLab Benchmark: Mission-Held-Out Spaceflight Transcriptomics Benchmark",
"upload_type": "software",
"version": "7.1.2",
"description": "GeneLab Benchmark provides standardized mission-held-out benchmark tasks for evaluating machine learning and foundation-model behavior on public NASA OSDR mouse spaceflight transcriptomics data. This archive metadata describes the current public documentation, card-pack, and release-candidate surface. v7.1.2 is a documentation, public-card, metadata, and evidence-visibility patch and does not introduce new benchmark result generation.",
"description": "SpaceBio-Bench / GeneLab Benchmark provides standardized mission-held-out benchmark tasks for evaluating machine-learning and foundation-model generalization on public NASA OSDR mouse spaceflight transcriptomics data. The v7.1.2 archive records the public-card and metadata patch over the canonical v7.1 result surface; it does not introduce new benchmark result generation.",
"creators": [
{
"name": "Kim, JangKeun",
Expand All @@ -21,7 +21,10 @@
"NASA OSDR",
"GeneLab",
"leave-one-mission-out",
"bioinformatics"
"bioinformatics",
"SpaceBio-Bench",
"space biology",
"mission-held-out"
],
"related_identifiers": [
{
Expand All @@ -40,5 +43,5 @@
"scheme": "url"
}
],
"notes": "Source biological data are derived from NASA OSDR public datasets and should be cited at the individual OSDR study level where applicable. This repository archive is not a clinical, crew-health, countermeasure, or intervention recommendation artifact. Final DOI publication should confirm the final manuscript author list, manuscript title, release tag, archive checksum, and upstream dataset citation metadata before deposition."
"notes": "Source biological data are derived from NASA OSDR public datasets and should be cited at the individual OSDR study level where applicable. Processed public fold packages are available from the linked Hugging Face dataset repository."
}
9 changes: 9 additions & 0 deletions scripts/validate_public_docs_consistency.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
README = REPO_ROOT / "README.md"
HF_CARD = REPO_ROOT / "docs" / "hf_dataset_card.md"
CITATION = REPO_ROOT / "CITATION.cff"
ZENODO = REPO_ROOT / ".zenodo.json"
RELEASE_MANIFEST = REPO_ROOT / "release" / "release_manifest.json"


Expand Down Expand Up @@ -61,6 +62,7 @@ def validate_public_docs() -> list[str]:
readme = README.read_text()
hf_card = HF_CARD.read_text()
citation = CITATION.read_text()
zenodo = load_json(ZENODO)
hf_front_matter = parse_front_matter(hf_card)

lanes = {lane["lane_id"]: lane for lane in manifest["release_lanes"]}
Expand Down Expand Up @@ -99,6 +101,13 @@ def validate_public_docs() -> list[str]:
require_contains(errors, "docs/hf_dataset_card.md", hf_card, "Public status: **v7.1.2 public-card/metadata patch")
require_contains(errors, "docs/hf_dataset_card.md", hf_card, "Dataset freeze: **2026-03-01**")
require_contains(errors, "docs/hf_dataset_card.md", hf_card, "repo_id = \"jang1563/genelab-benchmark\"")

zenodo_text = json.dumps(zenodo, sort_keys=True)
require_contains(errors, ".zenodo.json", zenodo.get("title", ""), "SpaceBio-Bench")
require_contains(errors, ".zenodo.json", zenodo.get("version", ""), "7.1.2")
require_contains(errors, ".zenodo.json", zenodo.get("description", ""), "public-card and metadata patch")
for forbidden in ("evidence-visibility", "release-candidate surface", "hiring-manager"):
require_absent(errors, ".zenodo.json", zenodo_text, forbidden)
if v7.get("public_label"):
require_contains(errors, "README.md", readme.lower(), v7["public_label"].lower())
require_contains(errors, "docs/hf_dataset_card.md", hf_card.lower(), v7["public_label"].lower())
Expand Down