diff --git a/src/zbmath_rest2oai/getAsXml.py b/src/zbmath_rest2oai/getAsXml.py index 21148790..50555320 100644 --- a/src/zbmath_rest2oai/getAsXml.py +++ b/src/zbmath_rest2oai/getAsXml.py @@ -77,18 +77,22 @@ def extract_tags(result): def add_references_to_software(api_uri, dict_res): list_articles_ids_to_soft = [] list_articles_ids_and_alter_ids_to_soft = [] + list_references_year_alt = [] # New list to store years + if "software" in api_uri: - if api_uri.startswith("https://api.zbmath.org/v1/software/_all?start_after=")==False: - soft_id=api_uri.split("/")[-1] + if api_uri.startswith("https://api.zbmath.org/v1/software/_all?start_after=") == False: + soft_id = api_uri.split("/")[-1] + def api_doc_endpoint(page): - return requests.get("https://api.zbmath.org/v1/document/_structured_search?page={}&results_per_page=100&software%20id={}".format(page,soft_id)) - page=0 + return requests.get("https://api.zbmath.org/v1/document/_structured_search?page={}&results_per_page=100&software%20id={}".format(page, soft_id)) + + page = 0 while True: data = api_doc_endpoint(page).json() if data is None or "result" not in data or not data["result"]: break - list_ids=[] + list_ids = [] list_ids_and_alter = [] for entry in data["result"]: list_ids.append(entry["id"]) @@ -99,17 +103,22 @@ def api_doc_endpoint(page): elif alt_dic["type"] == "arxiv": list_links.append(alt_dic["identifier"]) - list_ids_and_alter.append(";".join([str(entry["id"])]+list_links)) + list_ids_and_alter.append(";".join([str(entry["id"])] + list_links)) + + # Extract the year from the datestamp + if "datestamp" in entry: + year = entry["datestamp"][:4] # Extract the first 4 characters (year) + list_references_year_alt.append(year) list_articles_ids_to_soft.extend(list_ids) list_articles_ids_and_alter_ids_to_soft.extend(list_ids_and_alter) - page+=1 + page += 1 if isinstance(dict_res, dict): dict_res["references"] = list_articles_ids_to_soft - # Wrap it in a list to make it iterable for your existing loop dict_res["references_alt"] = list_articles_ids_and_alter_ids_to_soft + dict_res["references_year_alt"] = list_references_year_alt # Add the years to the dict dict_res = [dict_res] return dict_res @@ -141,7 +150,7 @@ def final_xml2(api_source, prefix): result, closed_tags_for=[[], '', [None], None])) tags[identifier] = extract_tags(result) - elif isinstance(result, dict): + elif isinstance(result, dict): apply_zbmath_api_fixes(result, prefix) identifier = result["id"] dict_math_entities[identifier] = _illegal_xml_chars_RE.sub("", Converter(wrap="root").build( @@ -156,4 +165,4 @@ def final_xml2(api_source, prefix): prefix="oai:zbmath.org:" else: prefix="oai:swmath.org:" - print(final_xml2(sys.argv[1], prefix)) + print(final_xml2(sys.argv[1], prefix)) \ No newline at end of file diff --git a/test/data/software/plain_with_references.xml b/test/data/software/plain_with_references.xml index df1ff406..0964d626 100644 --- a/test/data/software/plain_with_references.xml +++ b/test/data/software/plain_with_references.xml @@ -250,6 +250,117 @@ 6666873;10.1016/j.jcp.2016.06.039 5538352;10.1007/978-3-540-71992-2_17 2234457 + 2005 + 2012 + 2009 + 2007 + 2023 + 2018 + 2007 + 2024 + 2008 + 2008 + 2005 + 2015 + 2015 + 2021 + 2008 + 2011 + 2010 + 2011 + 2007 + 2007 + 2008 + 2008 + 2017 + 2010 + 2019 + 2010 + 2019 + 2023 + 2007 + 2016 + 2011 + 2010 + 2013 + 2016 + 2017 + 2024 + 2019 + 2019 + 2023 + 2011 + 2008 + 2016 + 2023 + 2011 + 2010 + 2009 + 2019 + 2016 + 2015 + 2019 + 2018 + 2014 + 2019 + 2015 + 2018 + 2015 + 2014 + 2013 + 2007 + 2012 + 2021 + 2021 + 2020 + 2010 + 2022 + 2008 + 2007 + 2018 + 2014 + 2009 + 2007 + 2023 + 2011 + 2018 + 2011 + 2010 + 2024 + 2022 + 2010 + 2010 + 2021 + 2010 + 2022 + 2022 + 2022 + 2023 + 2011 + 2021 + 2021 + 2024 + 2014 + 2024 + 2022 + 2010 + 2020 + 2023 + 2016 + 2011 + 2020 + 2020 + 2019 + 2020 + 2024 + 2018 + 2021 + 2016 + 2024 + 2018 + 2016 + 2009 + 2005 4013 MUMPS