Skip to content

Commit 452dead

Browse files
authored
Merge pull request #34 from OpenSemanticLab/rework-ontology-import
Rework ontology import BREAKING CHANGE: remove `import_ontology` from osw.core
2 parents 26775e7 + 6983cb3 commit 452dead

11 files changed

Lines changed: 1263 additions & 340 deletions

File tree

examples/data/example_ontology.ttl

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
@prefix : <http://example.com/> .
2+
@prefix owl: <http://www.w3.org/2002/07/owl#> .
3+
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
4+
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
5+
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
6+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
7+
@base <http://example.com/> .
8+
9+
<http://example.com> rdf:type owl:Ontology .
10+
11+
#################################################################
12+
# Annotation properties
13+
#################################################################
14+
15+
### http://example.com/R7k3ssL7gUxsfWuVsXWDXYF
16+
<http://example.com/R7k3ssL7gUxsfWuVsXWDXYF> rdf:type owl:AnnotationProperty ;
17+
rdfs:label "description"@en .
18+
19+
20+
#################################################################
21+
# Object Properties
22+
#################################################################
23+
24+
### http://example.com/R9avr2pWFWEML712PSKDfcq
25+
<http://example.com/R9avr2pWFWEML712PSKDfcq> rdf:type owl:ObjectProperty ;
26+
rdfs:subPropertyOf <http://example.com/REh2qNSARmKpPuwrJmr5Pu> ;
27+
rdfs:domain <http://example.com/RDDfNZfAHDafrgYXW6rtT14> ;
28+
rdfs:range <http://example.com/RDnVhTMcRkWFpWWnAprFlO0> ;
29+
rdfs:label "SubpropertyA"@en .
30+
31+
32+
### http://example.com/REh2qNSARmKpPuwrJmr5Pu
33+
<http://example.com/REh2qNSARmKpPuwrJmr5Pu> rdf:type owl:ObjectProperty ;
34+
rdfs:subPropertyOf owl:topObjectProperty ;
35+
rdfs:domain <http://example.com/RBfJambxhZvFDQYeKK2zzeH> ;
36+
rdfs:range <http://example.com/RDnVhTMcRkWFpWWnAprFlO0> ;
37+
rdfs:label "ObjectPropertyA"@en .
38+
39+
40+
#################################################################
41+
# Data properties
42+
#################################################################
43+
44+
### http://example.com/RqSw2tmyIfMbLNbk0NPkKa
45+
<http://example.com/RqSw2tmyIfMbLNbk0NPkKa> rdf:type owl:DatatypeProperty ;
46+
rdfs:subPropertyOf owl:topDataProperty ;
47+
rdfs:label "DataPropertyA"@en .
48+
49+
50+
#################################################################
51+
# Classes
52+
#################################################################
53+
54+
### http://example.com/RBfJambxhZvFDQYeKK2zzeH
55+
<http://example.com/RBfJambxhZvFDQYeKK2zzeH> rdf:type owl:Class ;
56+
rdfs:subClassOf [ rdf:type owl:Restriction ;
57+
owl:onProperty <http://example.com/REh2qNSARmKpPuwrJmr5Pu> ;
58+
owl:someValuesFrom <http://example.com/RDnVhTMcRkWFpWWnAprFlO0>
59+
] ;
60+
<http://example.com/R7k3ssL7gUxsfWuVsXWDXYF> "a test class"^^xsd:string ;
61+
rdfs:label "ClassA"@en .
62+
63+
64+
### http://example.com/RDDfNZfAHDafrgYXW6rtT14
65+
<http://example.com/RDDfNZfAHDafrgYXW6rtT14> rdf:type owl:Class ;
66+
rdfs:subClassOf <http://example.com/RBfJambxhZvFDQYeKK2zzeH> ,
67+
[ rdf:type owl:Restriction ;
68+
owl:onProperty <http://example.com/REh2qNSARmKpPuwrJmr5Pu> ;
69+
owl:someValuesFrom <http://example.com/RDnVhTMcRkWFpWWnAprFlO0>
70+
] ;
71+
rdfs:label "SubclassA"@en .
72+
73+
74+
### http://example.com/RDnVhTMcRkWFpWWnAprFlO0
75+
<http://example.com/RDnVhTMcRkWFpWWnAprFlO0> rdf:type owl:Class ;
76+
rdfs:label "ClassB"@en .
77+
78+
79+
#################################################################
80+
# Individuals
81+
#################################################################
82+
83+
### http://example.com/RD2X6TQT0bKVpXehgObBb7O
84+
<http://example.com/RD2X6TQT0bKVpXehgObBb7O> rdf:type owl:NamedIndividual ,
85+
<http://example.com/RDDfNZfAHDafrgYXW6rtT14> ;
86+
rdfs:label "IndividualA"@en .

examples/ontology_import.py

Lines changed: 50 additions & 226 deletions
Original file line numberDiff line numberDiff line change
@@ -1,238 +1,62 @@
1-
import json
1+
import os
22

3-
# import os
4-
import re
5-
from uuid import UUID
6-
7-
import mwclient
8-
from pyld import jsonld
9-
from rdflib import Graph
10-
11-
import osw.model.entity as model
12-
13-
# from osw.auth import CredentialManager
14-
from osw.core import OSW
3+
from osw.auth import CredentialManager
4+
from osw.core import OSW, model
5+
from osw.ontology import ImportConfig, OntologyImporter
156
from osw.wtsite import WtSite
167

17-
# create/update the password file under examples/accounts.pwd.yaml
18-
# pwd_file_path = os.path.join(
19-
# os.path.dirname(os.path.abspath(__file__)), "accounts.pwd.yaml"
20-
# )
21-
# cm = CredentialManager(cred_filepath=pwd_file_path)
22-
# wtsite = WtSite(WtSite.WtSiteConfig(
23-
# iri="http://localhost:18081",
24-
# cred_mngr=cm
25-
# ))
26-
27-
# or use a hardocded login
28-
site = mwclient.Site(scheme="http", host="localhost:18081", path="/w/")
29-
site.login("Admin", "change_me123123")
30-
wtsite = WtSite(WtSite.WtSiteLegacyConfig(site=site))
31-
32-
osw = OSW(site=wtsite)
33-
34-
35-
# load the EmmoTerm schema => run the script a second time after the schema was loaded
36-
try:
37-
model.EmmoTerm
38-
except AttributeError:
39-
# name 'EmmoTerm' is not defined
40-
osw.fetch_schema(
41-
osw.FetchSchemaParam(
42-
schema_title="Category:OSW57beed5e1294434ba77bb6516e461456",
43-
mode="replace", # EmmoTerm
44-
)
8+
# use credentials from file. if none are found, the user will be prompted to enter them
9+
cm = CredentialManager(
10+
cred_filepath=os.path.join(
11+
os.path.dirname(os.path.abspath(__file__)), "accounts.pwd.yaml"
4512
)
46-
47-
# load the ontology
48-
g = Graph()
49-
g.parse(
50-
"https://raw.githubusercontent.com/emmo-repo/domain-battery/master/battery.ttl",
51-
format="n3",
5213
)
53-
# g.parse("https://github.com/Battery-Value-Chain-Ontology/ontology/releases/download/v0.3.0/BVCO_inferred.ttl", format="n3")
54-
# g.parse(r"BVCO_inferred.ttl")
55-
56-
# convert to json-ld dict
57-
g = json.loads(g.serialize(format="json-ld", auto_compact=True))
58-
59-
# define the context
60-
context = {
61-
"owl": "http://www.w3.org/2002/07/owl#",
62-
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
63-
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
64-
"xsd": "http://www.w3.org/2001/XMLSchema#",
65-
"skos": "http://www.w3.org/2004/02/skos/core#",
66-
"dc": "http://purl.org/dc/terms/",
67-
"emmo": "http://emmo.info/emmo#", # keep values with full uri
68-
"uri": {"@id": "@id"},
69-
"rdf_type": {"@id": "@type"},
70-
# "label": "rdfs:label",
71-
"label": {"@id": "skos:prefLabel"},
72-
"altLabel": {"@id": "skos:altLabel"},
73-
"text": {"@id": "@value"},
74-
"lang": {"@id": "@language"},
75-
"subclass_of": {"@id": "rdfs:subClassOf", "@type": "@id"},
76-
"source": "dc:source",
77-
"disjointUnionOf": "owl:disjointUnionOf",
78-
"disjointWith": "owl:disjointWith",
79-
"equivalentClass": "owl:equivalentClass",
80-
"unionOf": {"@id": "owl:unionOf", "@container": "@list", "@type": "@id"},
81-
"comment": "rdfs:comment",
82-
"isDefinedBy": "rdfs:isDefinedBy",
83-
"seeAlso": "rdfs:seeAlso",
84-
# shorten properties
85-
"qudtReference": "http://emmo.info/emmo#EMMO_1f1b164d_ec6a_4faa_8d5e_88bda62316cc",
86-
"omReference": "http://emmo.info/emmo#EMMO_209ba1b3_149f_4ff0_b672_941610eafd72",
87-
"wikidataReference": "http://emmo.info/emmo#EMMO_26bf1bef_d192_4da6_b0eb_d2209698fb54",
88-
"ISO9000Reference": "http://emmo.info/emmo#EMMO_3aa37f92_8dc5_4ee4_8438_e41e6ae20c62",
89-
"IEVReference": "http://emmo.info/emmo#EMMO_50c298c2_55a2_4068_b3ac_4e948c33181f",
90-
"dbpediaReference": "http://emmo.info/emmo#EMMO_6dd685dd_1895_46e4_b227_be9f7d643c25",
91-
"etymology": "http://emmo.info/emmo#EMMO_705f27ae_954c_4f13_98aa_18473fc52b25",
92-
"definition": "http://emmo.info/emmo#EMMO_70fe84ff_99b6_4206_a9fc_9a8931836d84",
93-
"ISO80000Reference": "http://emmo.info/emmo#EMMO_8de5d5bf_db1c_40ac_b698_095ba3b18578",
94-
"ISO14040Reference": "http://emmo.info/emmo#EMMO_964568dd_64d2_454b_a12f_ac389f1c5e7f",
95-
"description": "http://emmo.info/emmo#EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9", # elucidation
96-
"example": "http://emmo.info/emmo#EMMO_b432d2d5_25f4_4165_99c5_5935a7763c1a",
97-
"VIMTerm": "http://emmo.info/emmo#EMMO_bb49844b_45d7_4f0d_8cae_8e552cbc20d6",
98-
"emmo_comment": "http://emmo.info/emmo#EMMO_c7b62dd7_063a_4c2a_8504_42f7264ba83f",
99-
"wikipediaReference": "http://emmo.info/emmo#EMMO_c84c6752_6d64_48cc_9500_e54a3c34898d",
100-
"iupacReference": "http://emmo.info/emmo#EMMO_fe015383_afb3_44a6_ae86_043628697aa2",
101-
}
10214

103-
# compact the json-ld (replace IRIs defined in the context with plain properties)
104-
compacted = jsonld.compact(g, context)
15+
# create the site object
16+
wtsite = WtSite(
17+
WtSite.WtSiteConfig(iri="https://wiki-dev.open-semantic-lab.org", cred_mngr=cm)
18+
)
19+
osw = OSW(site=wtsite)
10520

106-
# define postprocessed properties
107-
ensure_multilang = ["label", "prefLabel", "altLabel", "comment", "description"]
108-
ensure_array = [
109-
"label",
110-
"prefLabel",
111-
"altLabel",
112-
"comment",
113-
"description",
114-
"subclass_of",
21+
list_of_schemas = [
22+
"Category:OSW725a3cf5458f4daea86615fcbd0029f8", # OwlClass
23+
"Category:OSW6b9ef2784a934b8ab96523366e23e906", # OwlIndividual
24+
"Category:Item",
25+
"Category:ObjectProperty",
26+
"Category:DataProperty",
27+
"Category:AnnotationProperty",
11528
]
116-
map_uuid_uri = ["subclass_of"]
117-
remove_unnamed = ["subclass_of"] # , 'equivalentClass']
118-
119-
# postprocess json-ld
120-
for node in compacted["@graph"]:
121-
for key in ensure_multilang:
122-
if key in node:
123-
if isinstance(node[key], str):
124-
node[key] = {"text": node[key], "lang": "en"}
125-
elif "text" in node[key] and "lang" not in node[key]:
126-
node[key]["lang"] = "en"
127-
elif isinstance(node[key], list):
128-
for i, val in enumerate(node[key]):
129-
if isinstance(node[key][i], str):
130-
node[key][i] = {"text": node[key][i], "lang": "en"}
131-
elif "text" in node[key][i] and "lang" not in node[key][i]:
132-
node[key][i]["lang"] = "en"
133-
for key in ensure_array:
134-
if key in node and not isinstance(node[key], list):
135-
node[key] = [node[key]]
136-
for key in remove_unnamed:
137-
if key in node:
138-
if isinstance(node[key], list):
139-
node[key] = [value for value in node[key] if not value.startswith("_:")]
140-
elif isinstance(node[key], str) and node[key].startswith("_:"):
141-
del node[key]
142-
for key in map_uuid_uri:
143-
if key in node:
144-
if isinstance(node[key], list):
145-
for i, val in enumerate(node[key]):
146-
node[key][i] = "Category:OSW" + str(
147-
UUID(re.sub(r"[^A-Fa-f0-9]", "", node[key][i])[-32:])
148-
).replace("-", "")
149-
if isinstance(node[key], str):
150-
node[key][i] = "Category:OSW" + str(
151-
UUID(re.sub(r"[^A-Fa-f0-9]", "", node[key][i])[-32:])
152-
).replace("-", "")
153-
154-
if (
155-
"rdf_type" in node
156-
and node["rdf_type"] == "owl:Class"
157-
and not node["uri"].startswith("_:")
158-
):
159-
node["uuid"] = str(UUID(re.sub(r"[^A-Fa-f0-9]", "", node["uri"])[-32:]))
160-
161-
if "prefLabel" in node:
162-
node["name"] = node["prefLabel"][0]["text"]
163-
elif "label" in node:
164-
node["name"] = node["label"][0]["text"]
165-
else:
166-
print("No label: ", node["uri"])
167-
168-
# store the json-ld serialization on disk
169-
with open("BVCO.compacted.jsonld", "w", encoding="utf-8") as f:
170-
json.dump(compacted, f, indent=4, ensure_ascii=False)
171-
172-
# optional: also serialize as ttl
173-
g2 = Graph()
174-
g2.parse("BVCO.compacted.jsonld")
175-
g2.serialize(destination="BVCO.jsonld.ttl", format="ttl")
176-
177-
# create OSW entities
178-
limit = 3000 # choose a smaller number for tests
179-
counter = 0
180-
max_index = len(compacted["@graph"])
181-
entities = []
182-
for index, node in enumerate(compacted["@graph"]):
183-
if "rdf_type" in node and node["rdf_type"] == "owl:Class":
184-
if "label" in node:
185-
if counter < limit:
186-
e = model.EmmoTerm(**node)
187-
entities.append(e)
188-
counter += 1
189-
190-
# define ontology metadata
191-
emmo = model.Ontology(
192-
name="EMMO",
193-
iri="http://emmo.info/emmo",
194-
prefix="http://emmo.info/emmo#",
195-
prefix_name="emmo",
196-
link="https://github.com/emmo-repo/EMMO",
197-
)
198-
battinfo = model.Ontology(
199-
name="EMMO BattINFO",
200-
iri="http://emmo.info/battery",
201-
prefix="http://emmo.info/battery#",
202-
prefix_name="battinfo",
203-
link="https://github.com/BIG-MAP/BattINFO",
204-
)
205-
electrochemistry = model.Ontology(
206-
name="EMMO Electrochemistry",
207-
iri="http://emmo.info/electrochemistry",
208-
prefix="http://emmo.info/electrochemistry#",
209-
prefix_name="electrochemistry",
210-
link="https://github.com/emmo-repo/EMMO",
211-
)
212-
periodictable = model.Ontology(
213-
name="EMMO Periodic Table",
214-
iri="http://emmo.info/emmo/domain/periodic-table",
215-
prefix="http://emmo.info/emmo/domain/periodic-table#",
216-
prefix_name="periodictable",
217-
link="https://github.com/emmo-repo/EMMO",
218-
)
219-
gpo = model.Ontology(
220-
name="GPO",
221-
iri="https://gpo.ontology.link",
222-
prefix="https://gpo.ontology.link/",
223-
prefix_name="gpo",
224-
link="https://github.com/General-Process-Ontology/ontology",
225-
)
226-
bvco = model.Ontology(
227-
name="BVCO",
228-
iri="https://bvco.ontology.link",
229-
prefix="https://bvco.ontology.link/",
230-
prefix_name="bvco",
231-
link="https://github.com/Battery-Value-Chain-Ontology/ontology",
29+
for i, cat in enumerate(list_of_schemas):
30+
mode = "append"
31+
if i == 0:
32+
mode = "replace"
33+
osw.fetch_schema(OSW.FetchSchemaParam(schema_title=cat, mode=mode))
34+
35+
ontology_name = "example_ontology"
36+
37+
ex = model.Ontology(
38+
name="Example",
39+
iri="http://example.com",
40+
prefix="http://example.com/",
41+
prefix_name="example",
42+
link="http://example.com",
23243
)
23344

234-
ontologies = [emmo, battinfo, electrochemistry, periodictable, gpo, bvco]
235-
# ontologies = [battinfo]
45+
import_config = ImportConfig(
46+
ontology_name=ontology_name,
47+
ontologies=[ex],
48+
file=os.path.join(
49+
os.path.dirname(os.path.abspath(__file__)), "data", f"{ontology_name}.ttl"
50+
),
51+
base_class=model.OwlClass,
52+
base_class_title="Category:OSW725a3cf5458f4daea86615fcbd0029f8", # OwlClass
53+
dump_files=True,
54+
dump_path=os.path.dirname(os.path.abspath(__file__)),
55+
dry_run=False,
56+
)
23657

23758
# import ontologies
238-
osw.import_ontology(OSW.ImportOntologyParam(ontologies=ontologies, entities=entities))
59+
importer = OntologyImporter(
60+
osw=osw,
61+
)
62+
importer.import_ontology(import_config)

0 commit comments

Comments
 (0)