From 87499230f963230d4e3274357761835771268725 Mon Sep 17 00:00:00 2001 From: Chris Mungall Date: Sun, 19 Apr 2026 14:33:44 -0700 Subject: [PATCH] Fix post-processing steps and add CMSO/ASMO --- ontologies.Makefile | 24 ++++++++- src/semsql/builder/builder.py | 10 ++-- src/semsql/builder/cli.py | 26 ++++++--- src/semsql/builder/prefixes/prefixes.csv | 4 ++ .../builder/prefixes/prefixes_local.csv | 4 ++ src/semsql/builder/registry/ontologies.yaml | 12 +++++ tests/test_builder/test_builder.py | 53 +++++++++++++++++++ tests/test_builder/test_cli.py | 13 ++++- 8 files changed, 135 insertions(+), 11 deletions(-) diff --git a/ontologies.Makefile b/ontologies.Makefile index 7885a6c..dbc2db0 100644 --- a/ontologies.Makefile +++ b/ontologies.Makefile @@ -1637,6 +1637,28 @@ db/minsysont.owl: download/minsysont.owl robot merge -i $< -o $@.tmp.owl && perl -npe 's/\[HSiO4\]/%5BHSiO4%5D/g' $@.tmp.owl > $@ && rm $@.tmp.owl +download/cmso.owl: STAMP + curl -L -s https://raw.githubusercontent.com/OCDO/cmso/v0.0.1/cmso.owl > $@.tmp + sha256sum -b $@.tmp > $@.sha256 + mv $@.tmp $@ + +.PRECIOUS: download/cmso.owl + +db/cmso.owl: download/cmso.owl + cp $< $@ + + +download/asmo.owl: STAMP + curl -L -s https://raw.githubusercontent.com/OCDO/asmo/v0.3.0/asmo.owl > $@.tmp + sha256sum -b $@.tmp > $@.sha256 + mv $@.tmp $@ + +.PRECIOUS: download/asmo.owl + +db/asmo.owl: download/asmo.owl + cp $< $@ + + download/sulo.owl: STAMP curl -L -s https://w3id.org/sulo/sulo.ttl > $@.tmp sha256sum -b $@.tmp > $@.sha256 @@ -1658,4 +1680,4 @@ download/%.owl: STAMP db/%.owl: download/%.owl robot merge -i $< -o $@ -EXTRA_ONTOLOGIES = swo chiro pcl chemessence ogco ncit fma maxo foodon chebiplus msio chemrof deb matpo panet phenx pride sosa emi npc modl phenio comploinc hba mba dmba dhba pba bero aio reacto xsmo bcio sio icd10who icd11f ordo gard icd10cm omim mondo-ingest oeo envthes wifire taxslim goldterms sdgio kin metpo d3o biovoices omop comet cco occo iof upa go go-lego go-amigo neo bao orcid ror cpont biolink biopax enanomapper mlo ito chemont molgenie cso obiws biopragmatics-reactome reactome-hs reactome-mm efo hcao hpinternational edam chr sweetAll oboe-core oboe-standards lov schema-dot-org prov dtype vaem qudtunit quantitykind cellosaurus cosmo gist gistBFO fhkb dbpendiaont uberoncm co_324 ppeo interpro pfam hgnc.genegroup hgnc sgd gtdb eccode uniprot uniprot.ptm credit rhea swisslipid drugbank drugcentral complexportal wikipathways pathbank kegg.genome drugmechdb rxnorm vccf ontobiotope nando ecso enigma_context cbo ontie pain como ecosim bervo valuesets micront nmdc_schema mixs kgcl fibo bfo2020 bfo2020_core bfo2020_notime bfo2020_time saref4ener saref4bldg hhearvs sdoho pathgo brick minsysont sulo +EXTRA_ONTOLOGIES = swo chiro pcl chemessence ogco ncit fma maxo foodon chebiplus msio chemrof deb matpo panet phenx pride sosa emi npc modl phenio comploinc hba mba dmba dhba pba bero aio reacto xsmo bcio sio icd10who icd11f ordo gard icd10cm omim mondo-ingest oeo envthes wifire taxslim goldterms sdgio kin metpo d3o biovoices omop comet cco occo iof upa go go-lego go-amigo neo bao orcid ror cpont biolink biopax enanomapper mlo ito chemont molgenie cso obiws biopragmatics-reactome reactome-hs reactome-mm efo hcao hpinternational edam chr sweetAll oboe-core oboe-standards lov schema-dot-org prov dtype vaem qudtunit quantitykind cellosaurus cosmo gist gistBFO fhkb dbpendiaont uberoncm co_324 ppeo interpro pfam hgnc.genegroup hgnc sgd gtdb eccode uniprot uniprot.ptm credit rhea swisslipid drugbank drugcentral complexportal wikipathways pathbank kegg.genome drugmechdb rxnorm vccf ontobiotope nando ecso enigma_context cbo ontie pain como ecosim bervo valuesets micront nmdc_schema mixs kgcl fibo bfo2020 bfo2020_core bfo2020_notime bfo2020_time saref4ener saref4bldg hhearvs sdoho pathgo brick minsysont cmso asmo sulo diff --git a/src/semsql/builder/builder.py b/src/semsql/builder/builder.py index 1ae09f1..73f1545 100644 --- a/src/semsql/builder/builder.py +++ b/src/semsql/builder/builder.py @@ -133,9 +133,13 @@ def get_postprocessing_steps( registry = yaml_loader.load( str(registry_path), target_class=registry_schema.Registry ) - # steps = [step.format(ont=ontology, db=db) for step in registry.ontologies.get(ontology, []).post_processing_steps] - steps = registry.ontologies.get(ontology, []) - return steps + ontology_entry = registry.ontologies.get(ontology) + if ontology_entry is None: + return [] + return [ + step.format(ont=ontology, ontology=ontology, db=db) + for step in ontology_entry.post_processing_steps + ] def compile_registry(registry_path: str, local_prefix_file: TextIO = None) -> str: diff --git a/src/semsql/builder/cli.py b/src/semsql/builder/cli.py index 6aa8fd5..dc6f180 100644 --- a/src/semsql/builder/cli.py +++ b/src/semsql/builder/cli.py @@ -1,6 +1,7 @@ import logging import subprocess from itertools import chain, combinations +from pathlib import Path import click from linkml_runtime import SchemaView @@ -24,6 +25,23 @@ def powerset(iterable): return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1)) +def ontology_from_target_path(path: str): + """ + Extract ontology ID from a standard db target path. + + >>> ontology_from_target_path("db/cmso.db") + 'cmso' + >>> ontology_from_target_path("db/reactome-mm.db") + 'reactome-mm' + >>> ontology_from_target_path("/tmp/cmso.db") is None + True + """ + target_path = Path(path) + if target_path.suffix != ".db" or target_path.parent.name != "db": + return None + return target_path.stem + + @click.group() @click.option("-v", "--verbose", count=True) @click.option("-q", "--quiet") @@ -63,12 +81,8 @@ def make(path, docker, **kwargs): else: docker_config = None builder.make(path, docker_config=docker_config, **kwargs) - # check if path is db/{foo}.db using regular expression - import re - - matches = re.match(r"db/(\w+).db", path) - if matches: - ontology = matches.group(1) + ontology = ontology_from_target_path(path) + if ontology: steps = builder.get_postprocessing_steps(ontology, path) for step in steps: print(f"RUNNING: {step}") diff --git a/src/semsql/builder/prefixes/prefixes.csv b/src/semsql/builder/prefixes/prefixes.csv index 4d34c7c..6410814 100644 --- a/src/semsql/builder/prefixes/prefixes.csv +++ b/src/semsql/builder/prefixes/prefixes.csv @@ -252,6 +252,10 @@ sh,http://www.w3.org/ns/shacl# vcard,http://www.w3.org/2006/vcard/ns# MinSysOnt,http://www.semanticweb.org/hbabaie/ontologies/2021/5/MinSysOnt# CMO.minerals,http://www.semanticweb.org/Davarpanah-Babaie/Ontologies/2022/CMO# +CMSO,http://purls.helmholtz-metadaten.de/cmso/ +CDCO,http://purls.helmholtz-metadaten.de/cdos/cdco/ +ASMO,https://purls.helmholtz-metadaten.de/asmo/ +MDO.calculation,https://w3id.org/mdo/calculation/ sulo,https://w3id.org/sulo/ RBO,http://purl.obolibrary.org/obo/RBO_ RBO,http://purl.obolibrary.org/obo/RBO_ diff --git a/src/semsql/builder/prefixes/prefixes_local.csv b/src/semsql/builder/prefixes/prefixes_local.csv index 2d95caa..174794d 100644 --- a/src/semsql/builder/prefixes/prefixes_local.csv +++ b/src/semsql/builder/prefixes/prefixes_local.csv @@ -189,4 +189,8 @@ sh,http://www.w3.org/ns/shacl# vcard,http://www.w3.org/2006/vcard/ns# MinSysOnt,http://www.semanticweb.org/hbabaie/ontologies/2021/5/MinSysOnt# CMO.minerals,http://www.semanticweb.org/Davarpanah-Babaie/Ontologies/2022/CMO# +CMSO,http://purls.helmholtz-metadaten.de/cmso/ +CDCO,http://purls.helmholtz-metadaten.de/cdos/cdco/ +ASMO,https://purls.helmholtz-metadaten.de/asmo/ +MDO.calculation,https://w3id.org/mdo/calculation/ sulo,https://w3id.org/sulo/ diff --git a/src/semsql/builder/registry/ontologies.yaml b/src/semsql/builder/registry/ontologies.yaml index 34ed7fe..ee1316f 100644 --- a/src/semsql/builder/registry/ontologies.yaml +++ b/src/semsql/builder/registry/ontologies.yaml @@ -764,6 +764,18 @@ ontologies: prefixmap: MinSysOnt: "http://www.semanticweb.org/hbabaie/ontologies/2021/5/MinSysOnt#" CMO.minerals: "http://www.semanticweb.org/Davarpanah-Babaie/Ontologies/2022/CMO#" + cmso: + description: Computational Materials Sample Ontology + url: https://raw.githubusercontent.com/OCDO/cmso/v0.0.1/cmso.owl + prefixmap: + CMSO: http://purls.helmholtz-metadaten.de/cmso/ + CDCO: http://purls.helmholtz-metadaten.de/cdos/cdco/ + asmo: + description: Atomistic Simulation Methods Ontology + url: https://raw.githubusercontent.com/OCDO/asmo/v0.3.0/asmo.owl + prefixmap: + ASMO: https://purls.helmholtz-metadaten.de/asmo/ + MDO.calculation: https://w3id.org/mdo/calculation/ sulo: description: Simplified Upper Level Ontology url: https://w3id.org/sulo/sulo.ttl diff --git a/tests/test_builder/test_builder.py b/tests/test_builder/test_builder.py index ec0950a..cc120ad 100644 --- a/tests/test_builder/test_builder.py +++ b/tests/test_builder/test_builder.py @@ -1,4 +1,6 @@ import os +import tempfile +import textwrap import unittest from semsql.builder import builder @@ -20,3 +22,54 @@ def setUp(self) -> None: def test_builder(self): mkfile = builder.compile_registry(self.registry) print(mkfile) + + def test_get_postprocessing_steps(self): + registry_text = textwrap.dedent( + """ + id: test-registry + license: CC0 + ontologies: + reactome-mm: + url: https://example.org/reactome-mm.owl + post_processing_steps: + - "sqlite3 {db} < views/reactome.sql" + - "echo {ont}" + """ + ).strip() + with tempfile.TemporaryDirectory() as tmpdir: + registry_path = os.path.join(tmpdir, "registry.yaml") + with open(registry_path, "w") as stream: + stream.write(registry_text) + steps = builder.get_postprocessing_steps( + "reactome-mm", + "db/reactome-mm.db", + registry_path=registry_path, + ) + self.assertEqual( + [ + "sqlite3 db/reactome-mm.db < views/reactome.sql", + "echo reactome-mm", + ], + steps, + ) + + def test_get_postprocessing_steps_missing_ontology(self): + registry_text = textwrap.dedent( + """ + id: test-registry + license: CC0 + ontologies: + cmso: + url: https://example.org/cmso.owl + """ + ).strip() + with tempfile.TemporaryDirectory() as tmpdir: + registry_path = os.path.join(tmpdir, "registry.yaml") + with open(registry_path, "w") as stream: + stream.write(registry_text) + steps = builder.get_postprocessing_steps( + "asmo", + "db/asmo.db", + registry_path=registry_path, + ) + self.assertEqual([], steps) diff --git a/tests/test_builder/test_cli.py b/tests/test_builder/test_cli.py index aaf4cdc..3adc21c 100644 --- a/tests/test_builder/test_cli.py +++ b/tests/test_builder/test_cli.py @@ -4,7 +4,7 @@ from click.testing import CliRunner -from semsql.builder.cli import main +from semsql.builder.cli import main, ontology_from_target_path cwd = os.path.abspath(os.path.dirname(__file__)) DB_DIR = os.path.join(cwd, "../inputs") @@ -35,6 +35,17 @@ def test_main_help(self): 0, self.runner.invoke(main, ["view2table", "--help"]).exit_code ) + def test_ontology_from_target_path(self): + cases = [ + ("db/cmso.db", "cmso"), + ("db/reactome-mm.db", "reactome-mm"), + ("./db/asmo.db", "asmo"), + ("/tmp/asmo.db", None), + ("db/asmo.owl", None), + ] + for path, expected in cases: + self.assertEqual(expected, ontology_from_target_path(path)) + def test_view2table(self): cases = [ (["--no-index"], "CREATE TABLE deprecated_node AS SELECT"),