Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 14 additions & 8 deletions src/zbmath_rest2oai/getAsXml.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,13 @@ class EntryNotFoundException(Exception):
pass


def apply_zbmath_api_fixes(result, prefix):
def apply_zbmath_api_fixes(result, prefix_get_as_xml):
if result.get('datestamp'):
result['datestamp'] = (result['datestamp'].
replace('0001-01-01T00:00:00Z', '0001-01-01T00:00:00'))

if result.get('id'):
result['id'] = prefix + str(result['id'])
result['id'] = prefix_get_as_xml + str(result['id'])
old_states = result.get('states')
if old_states is None:
return
Expand Down Expand Up @@ -77,6 +77,7 @@ def extract_tags(result):
def add_references_to_software(api_uri, dict_res):
list_articles_ids_to_soft = []
list_articles_ids_and_alter_ids_to_soft = []
list_references_year_alt = []
if "software" in api_uri:
if api_uri.startswith("https://api.zbmath.org/v1/software/_all?start_after=")==False:
soft_id=api_uri.split("/")[-1]
Expand All @@ -101,6 +102,10 @@ def api_doc_endpoint(page):

list_ids_and_alter.append(";".join([str(entry["id"])]+list_links))

if "datestamp" in entry:
year = entry["datestamp"][:4]
list_references_year_alt.append(year)

list_articles_ids_to_soft.extend(list_ids)
list_articles_ids_and_alter_ids_to_soft.extend(list_ids_and_alter)

Expand All @@ -110,10 +115,11 @@ def api_doc_endpoint(page):
dict_res["references"] = list_articles_ids_to_soft
# Wrap it in a list to make it iterable for your existing loop
dict_res["references_alt"] = list_articles_ids_and_alter_ids_to_soft
dict_res["references_year_alt"] = list_references_year_alt
dict_res = [dict_res]

return dict_res
def final_xml2(api_source, prefix):
def final_xml2(api_source, prefix_final_xml2):
headers = {'Accept': 'application/json'}
r = requests.get(api_source, headers=headers, timeout=(10, 60))
if r.status_code == 404:
Expand All @@ -135,14 +141,14 @@ def final_xml2(api_source, prefix):
for result in json["result"]:
if isinstance(result, list):
result = result[0]
apply_zbmath_api_fixes(result, prefix)
apply_zbmath_api_fixes(result, prefix_final_xml2)
identifier = result["id"]
dict_math_entities[identifier] = _illegal_xml_chars_RE.sub("", Converter(wrap="root").build(
result,
closed_tags_for=[[], '', [None], None]))
tags[identifier] = extract_tags(result)
elif isinstance(result, dict):
apply_zbmath_api_fixes(result, prefix)
apply_zbmath_api_fixes(result, prefix_final_xml2)
identifier = result["id"]
dict_math_entities[identifier] = _illegal_xml_chars_RE.sub("", Converter(wrap="root").build(
result,
Expand All @@ -153,7 +159,7 @@ def final_xml2(api_source, prefix):

if __name__ == "__main__":
if "document" in sys.argv[1]:
prefix="oai:zbmath.org:"
prefix_final_xml2_main = "oai:zbmath.org:"
else:
prefix="oai:swmath.org:"
print(final_xml2(sys.argv[1], prefix))
prefix_final_xml2_main = "oai:swmath.org:"
print(final_xml2(sys.argv[1], prefix_final_xml2_main))
111 changes: 111 additions & 0 deletions test/data/software/plain_with_references.xml
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,117 @@
<references_alt>6666873;10.1016/j.jcp.2016.06.039</references_alt>
<references_alt>5538352;10.1007/978-3-540-71992-2_17</references_alt>
<references_alt>2234457</references_alt>
<references_year_alt>2005</references_year_alt>
<references_year_alt>2012</references_year_alt>
<references_year_alt>2009</references_year_alt>
<references_year_alt>2007</references_year_alt>
<references_year_alt>2023</references_year_alt>
<references_year_alt>2018</references_year_alt>
<references_year_alt>2007</references_year_alt>
<references_year_alt>2024</references_year_alt>
<references_year_alt>2008</references_year_alt>
<references_year_alt>2008</references_year_alt>
<references_year_alt>2005</references_year_alt>
<references_year_alt>2015</references_year_alt>
<references_year_alt>2015</references_year_alt>
<references_year_alt>2021</references_year_alt>
<references_year_alt>2008</references_year_alt>
<references_year_alt>2011</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2011</references_year_alt>
<references_year_alt>2007</references_year_alt>
<references_year_alt>2007</references_year_alt>
<references_year_alt>2008</references_year_alt>
<references_year_alt>2008</references_year_alt>
<references_year_alt>2017</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2019</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2019</references_year_alt>
<references_year_alt>2023</references_year_alt>
<references_year_alt>2007</references_year_alt>
<references_year_alt>2016</references_year_alt>
<references_year_alt>2011</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2013</references_year_alt>
<references_year_alt>2016</references_year_alt>
<references_year_alt>2017</references_year_alt>
<references_year_alt>2024</references_year_alt>
<references_year_alt>2019</references_year_alt>
<references_year_alt>2019</references_year_alt>
<references_year_alt>2023</references_year_alt>
<references_year_alt>2011</references_year_alt>
<references_year_alt>2008</references_year_alt>
<references_year_alt>2016</references_year_alt>
<references_year_alt>2023</references_year_alt>
<references_year_alt>2011</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2009</references_year_alt>
<references_year_alt>2019</references_year_alt>
<references_year_alt>2016</references_year_alt>
<references_year_alt>2015</references_year_alt>
<references_year_alt>2019</references_year_alt>
<references_year_alt>2018</references_year_alt>
<references_year_alt>2014</references_year_alt>
<references_year_alt>2019</references_year_alt>
<references_year_alt>2015</references_year_alt>
<references_year_alt>2018</references_year_alt>
<references_year_alt>2015</references_year_alt>
<references_year_alt>2014</references_year_alt>
<references_year_alt>2013</references_year_alt>
<references_year_alt>2007</references_year_alt>
<references_year_alt>2012</references_year_alt>
<references_year_alt>2021</references_year_alt>
<references_year_alt>2021</references_year_alt>
<references_year_alt>2020</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2022</references_year_alt>
<references_year_alt>2008</references_year_alt>
<references_year_alt>2007</references_year_alt>
<references_year_alt>2018</references_year_alt>
<references_year_alt>2014</references_year_alt>
<references_year_alt>2009</references_year_alt>
<references_year_alt>2007</references_year_alt>
<references_year_alt>2023</references_year_alt>
<references_year_alt>2011</references_year_alt>
<references_year_alt>2018</references_year_alt>
<references_year_alt>2011</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2024</references_year_alt>
<references_year_alt>2022</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2021</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2022</references_year_alt>
<references_year_alt>2022</references_year_alt>
<references_year_alt>2022</references_year_alt>
<references_year_alt>2023</references_year_alt>
<references_year_alt>2011</references_year_alt>
<references_year_alt>2021</references_year_alt>
<references_year_alt>2021</references_year_alt>
<references_year_alt>2024</references_year_alt>
<references_year_alt>2014</references_year_alt>
<references_year_alt>2024</references_year_alt>
<references_year_alt>2022</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2020</references_year_alt>
<references_year_alt>2023</references_year_alt>
<references_year_alt>2016</references_year_alt>
<references_year_alt>2011</references_year_alt>
<references_year_alt>2020</references_year_alt>
<references_year_alt>2020</references_year_alt>
<references_year_alt>2019</references_year_alt>
<references_year_alt>2020</references_year_alt>
<references_year_alt>2024</references_year_alt>
<references_year_alt>2018</references_year_alt>
<references_year_alt>2021</references_year_alt>
<references_year_alt>2016</references_year_alt>
<references_year_alt>2024</references_year_alt>
<references_year_alt>2018</references_year_alt>
<references_year_alt>2016</references_year_alt>
<references_year_alt>2009</references_year_alt>
<references_year_alt>2005</references_year_alt>
<related_software>
<id>4013</id>
<name>MUMPS</name>
Expand Down
Loading