Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 19 additions & 10 deletions src/zbmath_rest2oai/getAsXml.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,18 +77,22 @@ def extract_tags(result):
def add_references_to_software(api_uri, dict_res):
list_articles_ids_to_soft = []
list_articles_ids_and_alter_ids_to_soft = []
list_references_year_alt = [] # New list to store years

if "software" in api_uri:
if api_uri.startswith("https://api.zbmath.org/v1/software/_all?start_after=")==False:
soft_id=api_uri.split("/")[-1]
if api_uri.startswith("https://api.zbmath.org/v1/software/_all?start_after=") == False:
soft_id = api_uri.split("/")[-1]

def api_doc_endpoint(page):
return requests.get("https://api.zbmath.org/v1/document/_structured_search?page={}&results_per_page=100&software%20id={}".format(page,soft_id))
page=0
return requests.get("https://api.zbmath.org/v1/document/_structured_search?page={}&results_per_page=100&software%20id={}".format(page, soft_id))

page = 0
while True:
data = api_doc_endpoint(page).json()
if data is None or "result" not in data or not data["result"]:
break

list_ids=[]
list_ids = []
list_ids_and_alter = []
for entry in data["result"]:
list_ids.append(entry["id"])
Expand All @@ -99,17 +103,22 @@ def api_doc_endpoint(page):
elif alt_dic["type"] == "arxiv":
list_links.append(alt_dic["identifier"])

list_ids_and_alter.append(";".join([str(entry["id"])]+list_links))
list_ids_and_alter.append(";".join([str(entry["id"])] + list_links))

# Extract the year from the datestamp
if "datestamp" in entry:
year = entry["datestamp"][:4] # Extract the first 4 characters (year)
list_references_year_alt.append(year)

list_articles_ids_to_soft.extend(list_ids)
list_articles_ids_and_alter_ids_to_soft.extend(list_ids_and_alter)

page+=1
page += 1

if isinstance(dict_res, dict):
dict_res["references"] = list_articles_ids_to_soft
# Wrap it in a list to make it iterable for your existing loop
dict_res["references_alt"] = list_articles_ids_and_alter_ids_to_soft
dict_res["references_year_alt"] = list_references_year_alt # Add the years to the dict
dict_res = [dict_res]

return dict_res
Expand Down Expand Up @@ -141,7 +150,7 @@ def final_xml2(api_source, prefix):
result,
closed_tags_for=[[], '', [None], None]))
tags[identifier] = extract_tags(result)
elif isinstance(result, dict):
elif isinstance(result, dict):
apply_zbmath_api_fixes(result, prefix)
identifier = result["id"]
dict_math_entities[identifier] = _illegal_xml_chars_RE.sub("", Converter(wrap="root").build(
Expand All @@ -156,4 +165,4 @@ def final_xml2(api_source, prefix):
prefix="oai:zbmath.org:"
else:
prefix="oai:swmath.org:"
print(final_xml2(sys.argv[1], prefix))
print(final_xml2(sys.argv[1], prefix))
111 changes: 111 additions & 0 deletions test/data/software/plain_with_references.xml
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,117 @@
<references_alt>6666873;10.1016/j.jcp.2016.06.039</references_alt>
<references_alt>5538352;10.1007/978-3-540-71992-2_17</references_alt>
<references_alt>2234457</references_alt>
<references_year_alt>2005</references_year_alt>
<references_year_alt>2012</references_year_alt>
<references_year_alt>2009</references_year_alt>
<references_year_alt>2007</references_year_alt>
<references_year_alt>2023</references_year_alt>
<references_year_alt>2018</references_year_alt>
<references_year_alt>2007</references_year_alt>
<references_year_alt>2024</references_year_alt>
<references_year_alt>2008</references_year_alt>
<references_year_alt>2008</references_year_alt>
<references_year_alt>2005</references_year_alt>
<references_year_alt>2015</references_year_alt>
<references_year_alt>2015</references_year_alt>
<references_year_alt>2021</references_year_alt>
<references_year_alt>2008</references_year_alt>
<references_year_alt>2011</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2011</references_year_alt>
<references_year_alt>2007</references_year_alt>
<references_year_alt>2007</references_year_alt>
<references_year_alt>2008</references_year_alt>
<references_year_alt>2008</references_year_alt>
<references_year_alt>2017</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2019</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2019</references_year_alt>
<references_year_alt>2023</references_year_alt>
<references_year_alt>2007</references_year_alt>
<references_year_alt>2016</references_year_alt>
<references_year_alt>2011</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2013</references_year_alt>
<references_year_alt>2016</references_year_alt>
<references_year_alt>2017</references_year_alt>
<references_year_alt>2024</references_year_alt>
<references_year_alt>2019</references_year_alt>
<references_year_alt>2019</references_year_alt>
<references_year_alt>2023</references_year_alt>
<references_year_alt>2011</references_year_alt>
<references_year_alt>2008</references_year_alt>
<references_year_alt>2016</references_year_alt>
<references_year_alt>2023</references_year_alt>
<references_year_alt>2011</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2009</references_year_alt>
<references_year_alt>2019</references_year_alt>
<references_year_alt>2016</references_year_alt>
<references_year_alt>2015</references_year_alt>
<references_year_alt>2019</references_year_alt>
<references_year_alt>2018</references_year_alt>
<references_year_alt>2014</references_year_alt>
<references_year_alt>2019</references_year_alt>
<references_year_alt>2015</references_year_alt>
<references_year_alt>2018</references_year_alt>
<references_year_alt>2015</references_year_alt>
<references_year_alt>2014</references_year_alt>
<references_year_alt>2013</references_year_alt>
<references_year_alt>2007</references_year_alt>
<references_year_alt>2012</references_year_alt>
<references_year_alt>2021</references_year_alt>
<references_year_alt>2021</references_year_alt>
<references_year_alt>2020</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2022</references_year_alt>
<references_year_alt>2008</references_year_alt>
<references_year_alt>2007</references_year_alt>
<references_year_alt>2018</references_year_alt>
<references_year_alt>2014</references_year_alt>
<references_year_alt>2009</references_year_alt>
<references_year_alt>2007</references_year_alt>
<references_year_alt>2023</references_year_alt>
<references_year_alt>2011</references_year_alt>
<references_year_alt>2018</references_year_alt>
<references_year_alt>2011</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2024</references_year_alt>
<references_year_alt>2022</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2021</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2022</references_year_alt>
<references_year_alt>2022</references_year_alt>
<references_year_alt>2022</references_year_alt>
<references_year_alt>2023</references_year_alt>
<references_year_alt>2011</references_year_alt>
<references_year_alt>2021</references_year_alt>
<references_year_alt>2021</references_year_alt>
<references_year_alt>2024</references_year_alt>
<references_year_alt>2014</references_year_alt>
<references_year_alt>2024</references_year_alt>
<references_year_alt>2022</references_year_alt>
<references_year_alt>2010</references_year_alt>
<references_year_alt>2020</references_year_alt>
<references_year_alt>2023</references_year_alt>
<references_year_alt>2016</references_year_alt>
<references_year_alt>2011</references_year_alt>
<references_year_alt>2020</references_year_alt>
<references_year_alt>2020</references_year_alt>
<references_year_alt>2019</references_year_alt>
<references_year_alt>2020</references_year_alt>
<references_year_alt>2024</references_year_alt>
<references_year_alt>2018</references_year_alt>
<references_year_alt>2021</references_year_alt>
<references_year_alt>2016</references_year_alt>
<references_year_alt>2024</references_year_alt>
<references_year_alt>2018</references_year_alt>
<references_year_alt>2016</references_year_alt>
<references_year_alt>2009</references_year_alt>
<references_year_alt>2005</references_year_alt>
<related_software>
<id>4013</id>
<name>MUMPS</name>
Expand Down
Loading