From 75e09cd7764a90f93adf57cd2581e83b2de84ea5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Feb 2026 15:35:48 +0000 Subject: [PATCH 1/4] Initial plan From ea408cd2938b4bbb7656cc70da1944f3cb9c6f38 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Feb 2026 15:43:19 +0000 Subject: [PATCH 2/4] Add QuickStatements JSON output support for software exports Co-authored-by: physikerwelt <2777736+physikerwelt@users.noreply.github.com> --- README.md | 43 ++++ .../run_software_quickstatements.py | 184 ++++++++++++++++++ .../software_quickstatements.py | 156 +++++++++++++++ test/data/software/sagemath_825_metadata.json | 16 ++ test/data/software/sagemath_825_raw.json | 54 +++++ .../software/sagemath_825_references.json | 12 ++ test/test_software_quickstatements.py | 184 ++++++++++++++++++ 7 files changed, 649 insertions(+) create mode 100644 src/zbmath_rest2oai/run_software_quickstatements.py create mode 100644 src/zbmath_rest2oai/software_quickstatements.py create mode 100644 test/data/software/sagemath_825_metadata.json create mode 100644 test/data/software/sagemath_825_raw.json create mode 100644 test/data/software/sagemath_825_references.json create mode 100644 test/test_software_quickstatements.py diff --git a/README.md b/README.md index ce280bd2..044a1cb7 100644 --- a/README.md +++ b/README.md @@ -19,5 +19,48 @@ Hint for a proper installation: ## Deployment The project is containerized and managed via [Portainer](https://portainer.portal.mardi4nfdi.de/#!/home). +## QuickStatements JSON export for software records + +The module `zbmath_rest2oai.software_quickstatements` converts zbMath software +records into a custom JSON format consumed by the +[MathSearch QuickStatements job](https://github.com/MathSearch/MathSearch). + +Two JSON files are produced, matching the two update phases described in +[MaRDIRoadmap issue #173](https://github.com/MaRDI4NFDI/MaRDIRoadmap/issues/173): + +| File | Contents | +|------|----------| +| `software_quickstatements_metadata.json` | `qP13` (swMath id lookup), `Len` (label), `P29` (homepage), `P339` (source code), `P226_*` (MSC classifications), `P1458q13_*` (related software), `P286q1459_*` (standard articles) | +| `software_quickstatements_references.json` | `qP1451` (citing article lookup), `P1463q13` (software id) | + +### Key conventions + +- `qP` – find the item whose property `P` equals the value. +- `Pq` – value is an external id looked up via property `P`; result + stored as a wikibase-item value of property `P`. +- `L` / `D` – label / description in language `lang`. +- Multi-valued fields use `_1`, `_2`, … suffixes (the job strips `_N` suffixes where N is a digit sequence). + +### CLI usage + +```bash +# Export all software (paged, from id 0) +python -m zbmath_rest2oai.run_software_quickstatements + +# Export a single software record (e.g. SageMath, swMath id 825) +python -m zbmath_rest2oai.run_software_quickstatements --id 825 + +# Only produce the metadata file +python -m zbmath_rest2oai.run_software_quickstatements --phase metadata + +# Only produce the references file +python -m zbmath_rest2oai.run_software_quickstatements --phase references + +# Write to a specific directory +python -m zbmath_rest2oai.run_software_quickstatements --output-dir /tmp/out +``` + +The existing XML/XSLT-based OAI-PMH pipeline is unaffected. + ## Support For inquiries, contact **[support@zbmath.org](mailto:support@zbmath.org)**. \ No newline at end of file diff --git a/src/zbmath_rest2oai/run_software_quickstatements.py b/src/zbmath_rest2oai/run_software_quickstatements.py new file mode 100644 index 00000000..d65d3932 --- /dev/null +++ b/src/zbmath_rest2oai/run_software_quickstatements.py @@ -0,0 +1,184 @@ +"""CLI: export software records from zbMath REST API as QuickStatements JSON. + +Produces two output files (by default in the current directory): + - software_quickstatements_metadata.json + - software_quickstatements_references.json + +Usage examples:: + + # Export all software (paged, starting from id 0) + python -m zbmath_rest2oai.run_software_quickstatements + + # Export a single software record by swMath id + python -m zbmath_rest2oai.run_software_quickstatements --id 825 + + # Only produce the metadata file + python -m zbmath_rest2oai.run_software_quickstatements --phase metadata + + # Only produce the references file + python -m zbmath_rest2oai.run_software_quickstatements --phase references + + # Write to a specific directory + python -m zbmath_rest2oai.run_software_quickstatements --output-dir /tmp/out +""" + +from __future__ import annotations + +import argparse +import json +import os +import sys + +import requests + +from zbmath_rest2oai.software_quickstatements import build_metadata_json, build_references_json + + +_API_BASE = "https://api.zbmath.org/v1/software" +_DEFAULT_PAGE_SIZE = 500 + + +def _fetch_software_by_id(sw_id: int) -> dict: + """Fetch a single software record from the zbMath REST API.""" + url = f"{_API_BASE}/{sw_id}" + headers = {"Accept": "application/json"} + r = requests.get(url, headers=headers, timeout=(10, 60)) + r.raise_for_status() + data = r.json() + result = data.get("result") + if isinstance(result, list) and result: + return result[0] + if isinstance(result, dict): + return result + raise ValueError(f"Unexpected result format for id {sw_id}: {data!r}") + + +def _iter_all_software(start_after: int = 0, page_size: int = _DEFAULT_PAGE_SIZE): + """Yield software result dicts from the zbMath REST API (paged).""" + cursor = start_after + while True: + url = f"{_API_BASE}/_all?start_after={cursor}&results_per_request={page_size}" + headers = {"Accept": "application/json"} + r = requests.get(url, headers=headers, timeout=(10, 120)) + r.raise_for_status() + data = r.json() + results = data.get("result", []) + if not results: + break + for item in results: + if isinstance(item, dict): + yield item + cursor = item.get("id", cursor) + if len(results) < page_size: + break + + +def _add_references(result: dict) -> dict: + """Augment a software result dict with its citing articles. + + Mirrors the logic in :func:`zbmath_rest2oai.getAsXml.add_references_to_software`. + """ + sw_id = result.get("id") + if sw_id is None: + return result + + references: list[int] = [] + page = 0 + while True: + url = ( + f"https://api.zbmath.org/v1/document/_structured_search" + f"?page={page}&results_per_page=100&software%20id={sw_id}" + ) + r = requests.get(url, headers={"Accept": "application/json"}, timeout=(10, 60)) + r.raise_for_status() + data = r.json() + page_results = data.get("result", []) + if not page_results: + break + for entry in page_results: + references.append(entry["id"]) + page += 1 + + result["references"] = references + return result + + +def main(argv: list[str] | None = None) -> None: + parser = argparse.ArgumentParser( + description="Export zbMath software records as QuickStatements JSON." + ) + parser.add_argument( + "--id", + type=int, + metavar="SWMATH_ID", + help="Export a single software record by its swMath id.", + ) + parser.add_argument( + "--start-after", + type=int, + default=0, + metavar="ID", + help="Start exporting records after this id (for full-dump mode). Default: 0.", + ) + parser.add_argument( + "--phase", + choices=["metadata", "references", "all"], + default="all", + help=( + "Which output file(s) to produce: 'metadata', 'references', or 'all'. " + "Default: all." + ), + ) + parser.add_argument( + "--output-dir", + default=".", + metavar="DIR", + help="Directory to write output JSON files to. Default: current directory.", + ) + parser.add_argument( + "--no-references", + action="store_true", + help=( + "Skip fetching citing articles for each software record. " + "Useful when running with --phase metadata only." + ), + ) + args = parser.parse_args(argv) + + os.makedirs(args.output_dir, exist_ok=True) + + if args.id is not None: + print(f"Fetching software id={args.id} …", file=sys.stderr) + record = _fetch_software_by_id(args.id) + if args.phase in ("references", "all") and not args.no_references: + record = _add_references(record) + results = [record] + else: + print("Fetching all software records …", file=sys.stderr) + results = [] + for record in _iter_all_software(start_after=args.start_after): + if args.phase in ("references", "all") and not args.no_references: + record = _add_references(record) + results.append(record) + if len(results) % 100 == 0: + print(f" … {len(results)} records fetched", file=sys.stderr) + + print(f"Building JSON for {len(results)} record(s) …", file=sys.stderr) + + if args.phase in ("metadata", "all"): + meta_path = os.path.join(args.output_dir, "software_quickstatements_metadata.json") + meta_json = build_metadata_json(results) + with open(meta_path, "w", encoding="utf-8") as f: + json.dump(meta_json, f, indent=2, ensure_ascii=False) + print(f"Wrote {meta_path}", file=sys.stderr) + + if args.phase in ("references", "all"): + refs_path = os.path.join(args.output_dir, "software_quickstatements_references.json") + refs_json = build_references_json(results) + with open(refs_path, "w", encoding="utf-8") as f: + json.dump(refs_json, f, indent=2, ensure_ascii=False) + print(f"Wrote {refs_path}", file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/src/zbmath_rest2oai/software_quickstatements.py b/src/zbmath_rest2oai/software_quickstatements.py new file mode 100644 index 00000000..3fe80353 --- /dev/null +++ b/src/zbmath_rest2oai/software_quickstatements.py @@ -0,0 +1,156 @@ +"""Convert zbMath software records to QuickStatements JSON for MaRDI portal. + +Produces two JSON output files: +- software_quickstatements_metadata.json: metadata rows (swmathID, label, + homepage, source code, classifications, related software, standard articles) +- software_quickstatements_references.json: reference rows (one per article + citing the software) + +JSON schema:: + + {"rows": [{"qP13": "825", "Len": "SageMath", "P29": "...", ...}, ...]} + +Key conventions (matching MathSearch QuickStatements job): +- ``qP`` – look up item whose property P equals the value. +- ``Pq`` – value is an external id looked up via property P; result + stored as wikibase-item value of property P. +- ``L`` / ``D`` – label / description in that language. +- ``qal`` – qualifier. +- Multi-valued fields use ``_1``, ``_2``, … suffixes (the job strips ``_(\\d+)``). +""" + +from __future__ import annotations + +from typing import Any + + +def _add_multi(row: dict, key: str, values: list[Any]) -> None: + """Add multi-valued fields with ``_1``, ``_2``, … suffixes. + + Does nothing when *values* is empty. + """ + for i, val in enumerate(values, start=1): + row[f"{key}_{i}"] = str(val) + + +def to_metadata_row(result: dict) -> dict: + """Build a QuickStatements metadata row from a software result dict. + + Parameters + ---------- + result: + Raw software dict as returned by the zbMath REST API (the ``result`` + field), before or after ``apply_zbmath_api_fixes``. If an OAI prefix + has already been applied to ``id`` (e.g. ``"oai:swmath.org:825"``), the + numeric part is extracted automatically. + + Returns + ------- + dict + A single row suitable for the ``rows`` list in the metadata JSON file. + """ + raw_id = result.get("id", "") + # Strip OAI prefix if present (e.g. "oai:swmath.org:825" → "825") + sw_id = str(raw_id).split(":")[-1] + + row: dict = {"qP13": sw_id} + + name = result.get("name", "") + if name: + row["Len"] = str(name) + + homepage = result.get("homepage", "") + if isinstance(homepage, str) and homepage.strip(): + row["P29"] = homepage.strip() + + source_code = result.get("source_code", "") + if isinstance(source_code, str) and source_code.strip(): + row["P339"] = source_code.strip() + + classifications = result.get("classification", []) + if isinstance(classifications, str): + classifications = [classifications] + _add_multi(row, "P226", classifications) + + related = result.get("related_software", []) + if isinstance(related, dict): + related = [related] + rel_ids = [ + str(r["id"]) for r in related if isinstance(r, dict) and r.get("id") is not None + ] + _add_multi(row, "P1458q13", rel_ids) + + std_articles = result.get("standard_articles", []) + if isinstance(std_articles, dict): + std_articles = [std_articles] + std_ids = [ + str(a["id"]) + for a in std_articles + if isinstance(a, dict) and a.get("id") is not None + ] + _add_multi(row, "P286q1459", std_ids) + + return row + + +def to_reference_rows(result: dict) -> list[dict]: + """Build QuickStatements reference rows from a software result dict. + + One row is emitted per article that cites (or is cited by) the software. + + Parameters + ---------- + result: + Raw software dict (same format as for :func:`to_metadata_row`). + + Returns + ------- + list[dict] + List of rows for the references JSON file. + """ + raw_id = result.get("id", "") + sw_id = str(raw_id).split(":")[-1] + + references = result.get("references", []) + if isinstance(references, (int, str)): + references = [references] + + rows = [] + for ref_id in references: + rows.append({"qP1451": str(ref_id), "P1463q13": sw_id}) + return rows + + +def build_metadata_json(results: list[dict]) -> dict: + """Build the metadata QuickStatements JSON structure. + + Parameters + ---------- + results: + List of raw software result dicts. + + Returns + ------- + dict + ``{"rows": [...]}`` ready to be serialised with ``json.dumps``. + """ + return {"rows": [to_metadata_row(r) for r in results]} + + +def build_references_json(results: list[dict]) -> dict: + """Build the references QuickStatements JSON structure. + + Parameters + ---------- + results: + List of raw software result dicts. + + Returns + ------- + dict + ``{"rows": [...]}`` ready to be serialised with ``json.dumps``. + """ + rows: list[dict] = [] + for r in results: + rows.extend(to_reference_rows(r)) + return {"rows": rows} diff --git a/test/data/software/sagemath_825_metadata.json b/test/data/software/sagemath_825_metadata.json new file mode 100644 index 00000000..302e5294 --- /dev/null +++ b/test/data/software/sagemath_825_metadata.json @@ -0,0 +1,16 @@ +{ + "rows": [ + { + "qP13": "825", + "Len": "SageMath", + "P29": "https://www.sagemath.org/", + "P339": "https://github.com/sagemath/sage", + "P226_1": "68", + "P226_2": "11", + "P226_3": "14", + "P1458q13_1": "1", + "P1458q13_2": "2", + "P286q1459_1": "2187000" + } + ] +} diff --git a/test/data/software/sagemath_825_raw.json b/test/data/software/sagemath_825_raw.json new file mode 100644 index 00000000..3925fc86 --- /dev/null +++ b/test/data/software/sagemath_825_raw.json @@ -0,0 +1,54 @@ +{ + "articles_count": 42, + "authors": [ + "Stein, William" + ], + "classification": [ + "68", + "11", + "14" + ], + "dependencies": "", + "description": "SageMath is a free open-source mathematics software system.", + "homepage": "https://www.sagemath.org/", + "id": 825, + "keywords": "orms", + "license_terms": "GPL", + "name": "SageMath", + "operating_systems": "Linux, macOS", + "orms_id": 825, + "programming_languages": "Python", + "related_software": [ + { + "id": 1, + "name": "Mathematica" + }, + { + "id": 2, + "name": "PARI/GP" + } + ], + "source_code": "https://github.com/sagemath/sage", + "standard_articles": [ + { + "authors": "Stein, William; Joyner, David", + "id": 2187000, + "source": "ACM SIGSAM Bull. 39, No. 2, 61-64 (2005).", + "title": "SAGE: system for algebra and geometry experimentation", + "year": 2005 + } + ], + "references": [ + 3000001, + 3000002 + ], + "references_alt": [ + "3000001;10.1000/test.001", + "3000002" + ], + "references_year_alt": [ + "2010", + "2012" + ], + "zbmath_url": "https://zbmath.org/software/825" +} diff --git a/test/data/software/sagemath_825_references.json b/test/data/software/sagemath_825_references.json new file mode 100644 index 00000000..6dd4740b --- /dev/null +++ b/test/data/software/sagemath_825_references.json @@ -0,0 +1,12 @@ +{ + "rows": [ + { + "qP1451": "3000001", + "P1463q13": "825" + }, + { + "qP1451": "3000002", + "P1463q13": "825" + } + ] +} diff --git a/test/test_software_quickstatements.py b/test/test_software_quickstatements.py new file mode 100644 index 00000000..cb7cd9dc --- /dev/null +++ b/test/test_software_quickstatements.py @@ -0,0 +1,184 @@ +"""Tests for software_quickstatements JSON output.""" + +import json +import os +import unittest + +from zbmath_rest2oai.software_quickstatements import ( + build_metadata_json, + build_references_json, + to_metadata_row, + to_reference_rows, +) + +DATA_DIR = os.path.join(os.path.dirname(__file__), "data", "software") + + +def load_fixture(name): + with open(os.path.join(DATA_DIR, name)) as f: + return json.load(f) + + +class TestToMetadataRow(unittest.TestCase): + """Unit tests for to_metadata_row.""" + + def test_basic_fields(self): + result = {"id": 825, "name": "SageMath", "homepage": "https://www.sagemath.org/"} + row = to_metadata_row(result) + self.assertEqual(row["qP13"], "825") + self.assertEqual(row["Len"], "SageMath") + self.assertEqual(row["P29"], "https://www.sagemath.org/") + + def test_source_code(self): + result = {"id": 1, "name": "Foo", "source_code": "https://github.com/foo/bar"} + row = to_metadata_row(result) + self.assertEqual(row["P339"], "https://github.com/foo/bar") + + def test_empty_source_code_omitted(self): + result = {"id": 1, "name": "Foo", "source_code": ""} + row = to_metadata_row(result) + self.assertNotIn("P339", row) + + def test_empty_homepage_omitted(self): + result = {"id": 1, "name": "Foo", "homepage": ""} + row = to_metadata_row(result) + self.assertNotIn("P29", row) + + def test_classifications_multi(self): + result = {"id": 1, "name": "Foo", "classification": ["68", "11", "14"]} + row = to_metadata_row(result) + self.assertEqual(row["P226_1"], "68") + self.assertEqual(row["P226_2"], "11") + self.assertEqual(row["P226_3"], "14") + + def test_related_software_multi(self): + result = { + "id": 1, + "name": "Foo", + "related_software": [{"id": 10, "name": "A"}, {"id": 20, "name": "B"}], + } + row = to_metadata_row(result) + self.assertEqual(row["P1458q13_1"], "10") + self.assertEqual(row["P1458q13_2"], "20") + + def test_standard_articles_multi(self): + result = { + "id": 1, + "name": "Foo", + "standard_articles": [{"id": 100, "title": "T1"}, {"id": 200, "title": "T2"}], + } + row = to_metadata_row(result) + self.assertEqual(row["P286q1459_1"], "100") + self.assertEqual(row["P286q1459_2"], "200") + + def test_standard_articles_as_dict(self): + """single standard_article returned as dict, not list.""" + result = { + "id": 1, + "name": "Foo", + "standard_articles": {"id": 100, "title": "T1"}, + } + row = to_metadata_row(result) + self.assertEqual(row["P286q1459_1"], "100") + + def test_oai_prefix_stripped(self): + result = {"id": "oai:swmath.org:825", "name": "SageMath"} + row = to_metadata_row(result) + self.assertEqual(row["qP13"], "825") + + def test_no_classifications_produces_no_p226(self): + result = {"id": 1, "name": "Foo"} + row = to_metadata_row(result) + self.assertNotIn("P226_1", row) + + +class TestToReferenceRows(unittest.TestCase): + """Unit tests for to_reference_rows.""" + + def test_basic(self): + result = {"id": 825, "references": [3000001, 3000002]} + rows = to_reference_rows(result) + self.assertEqual(len(rows), 2) + self.assertEqual(rows[0], {"qP1451": "3000001", "P1463q13": "825"}) + self.assertEqual(rows[1], {"qP1451": "3000002", "P1463q13": "825"}) + + def test_empty_references(self): + result = {"id": 1, "references": []} + rows = to_reference_rows(result) + self.assertEqual(rows, []) + + def test_single_reference_as_int(self): + result = {"id": 2, "references": 999} + rows = to_reference_rows(result) + self.assertEqual(rows, [{"qP1451": "999", "P1463q13": "2"}]) + + def test_oai_prefix_stripped(self): + result = {"id": "oai:swmath.org:5", "references": [42]} + rows = to_reference_rows(result) + self.assertEqual(rows[0]["P1463q13"], "5") + + +class TestBuildMetadataJson(unittest.TestCase): + """Unit tests for build_metadata_json.""" + + def test_structure(self): + results = [{"id": 1, "name": "A"}, {"id": 2, "name": "B"}] + output = build_metadata_json(results) + self.assertIn("rows", output) + self.assertEqual(len(output["rows"]), 2) + self.assertEqual(output["rows"][0]["qP13"], "1") + self.assertEqual(output["rows"][1]["qP13"], "2") + + +class TestBuildReferencesJson(unittest.TestCase): + """Unit tests for build_references_json.""" + + def test_structure(self): + results = [ + {"id": 1, "references": [10, 11]}, + {"id": 2, "references": [20]}, + ] + output = build_references_json(results) + self.assertIn("rows", output) + self.assertEqual(len(output["rows"]), 3) + + +class TestSageMath825Fixture(unittest.TestCase): + """Integration test: verify deterministic JSON output for SageMath (id=825). + + The raw fixture (sagemath_825_raw.json) is compared against the expected + output fixtures (sagemath_825_metadata.json, sagemath_825_references.json). + """ + + def setUp(self): + self.raw = load_fixture("sagemath_825_raw.json") + self.expected_meta = load_fixture("sagemath_825_metadata.json") + self.expected_refs = load_fixture("sagemath_825_references.json") + + def test_metadata_matches_fixture(self): + result = build_metadata_json([self.raw]) + self.assertEqual(result, self.expected_meta) + + def test_references_match_fixture(self): + result = build_references_json([self.raw]) + self.assertEqual(result, self.expected_refs) + + def test_metadata_qp13_is_825(self): + result = build_metadata_json([self.raw]) + self.assertEqual(result["rows"][0]["qP13"], "825") + + def test_metadata_label_is_sagemath(self): + result = build_metadata_json([self.raw]) + self.assertEqual(result["rows"][0]["Len"], "SageMath") + + def test_metadata_homepage_present(self): + result = build_metadata_json([self.raw]) + self.assertIn("P29", result["rows"][0]) + + def test_metadata_source_code_present(self): + result = build_metadata_json([self.raw]) + self.assertIn("P339", result["rows"][0]) + + +if __name__ == "__main__": + unittest.main() From 6c969e37ba61e0cbe9604271d34cc94f9cd13bc4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Feb 2026 05:43:04 +0000 Subject: [PATCH 3/4] Update SageMath fixture with real zbMATH API data and correct expected outputs Co-authored-by: physikerwelt <2777736+physikerwelt@users.noreply.github.com> --- test/data/software/sagemath_825_metadata.json | 91 ++++++++- test/data/software/sagemath_825_raw.json | 188 +++++++++++++++--- .../software/sagemath_825_references.json | 8 +- 3 files changed, 251 insertions(+), 36 deletions(-) diff --git a/test/data/software/sagemath_825_metadata.json b/test/data/software/sagemath_825_metadata.json index 302e5294..987a0f31 100644 --- a/test/data/software/sagemath_825_metadata.json +++ b/test/data/software/sagemath_825_metadata.json @@ -5,12 +5,95 @@ "Len": "SageMath", "P29": "https://www.sagemath.org/", "P339": "https://github.com/sagemath/sage", - "P226_1": "68", + "P226_1": "05", "P226_2": "11", "P226_3": "14", - "P1458q13_1": "1", - "P1458q13_2": "2", - "P286q1459_1": "2187000" + "P226_4": "20", + "P226_5": "68", + "P226_6": "00", + "P226_7": "01", + "P226_8": "03", + "P226_9": "06", + "P226_10": "08", + "P226_11": "12", + "P226_12": "13", + "P226_13": "15", + "P226_14": "16", + "P226_15": "17", + "P226_16": "18", + "P226_17": "19", + "P226_18": "22", + "P226_19": "26", + "P226_20": "28", + "P226_21": "30", + "P226_22": "31", + "P226_23": "32", + "P226_24": "33", + "P226_25": "34", + "P226_26": "35", + "P226_27": "37", + "P226_28": "39", + "P226_29": "40", + "P226_30": "41", + "P226_31": "42", + "P226_32": "43", + "P226_33": "44", + "P226_34": "46", + "P226_35": "47", + "P226_36": "49", + "P226_37": "51", + "P226_38": "52", + "P226_39": "53", + "P226_40": "54", + "P226_41": "55", + "P226_42": "57", + "P226_43": "58", + "P226_44": "60", + "P226_45": "62", + "P226_46": "65", + "P226_47": "70", + "P226_48": "74", + "P226_49": "76", + "P226_50": "78", + "P226_51": "80", + "P226_52": "81", + "P226_53": "82", + "P226_54": "83", + "P226_55": "85", + "P226_56": "86", + "P226_57": "90", + "P226_58": "91", + "P226_59": "92", + "P226_60": "93", + "P226_61": "94", + "P226_62": "97", + "P1458q13_1": "540", + "P1458q13_2": "23170", + "P1458q13_3": "320", + "P1458q13_4": "7248", + "P1458q13_5": "680", + "P1458q13_6": "866", + "P1458q13_7": "537", + "P1458q13_8": "554", + "P1458q13_9": "16448", + "P1458q13_10": "7249", + "P1458q13_11": "545", + "P1458q13_12": "14460", + "P1458q13_13": "27596", + "P1458q13_14": "611", + "P1458q13_15": "7823", + "P1458q13_16": "23728", + "P1458q13_17": "4968", + "P1458q13_18": "724", + "P1458q13_19": "560", + "P1458q13_20": "6874", + "P286q1459_1": "7045592", + "P286q1459_2": "6418360", + "P286q1459_3": "6532438", + "P286q1459_4": "6462232", + "P286q1459_5": "6114556", + "P286q1459_6": "5785538", + "P286q1459_7": "5380273" } ] } diff --git a/test/data/software/sagemath_825_raw.json b/test/data/software/sagemath_825_raw.json index 3925fc86..e995a1c9 100644 --- a/test/data/software/sagemath_825_raw.json +++ b/test/data/software/sagemath_825_raw.json @@ -1,54 +1,182 @@ { - "articles_count": 42, + "articles_count": 3439, "authors": [ - "Stein, William" + "The Sage Developers", + "Stein, William", + "Joyner, David", + "Kohel, David", + "Cremona, John", + "Eröcal, Burçin" ], "classification": [ - "68", + "05", "11", - "14" + "14", + "20", + "68", + "00", + "01", + "03", + "06", + "08", + "12", + "13", + "15", + "16", + "17", + "18", + "19", + "22", + "26", + "28", + "30", + "31", + "32", + "33", + "34", + "35", + "37", + "39", + "40", + "41", + "42", + "43", + "44", + "46", + "47", + "49", + "51", + "52", + "53", + "54", + "55", + "57", + "58", + "60", + "62", + "65", + "70", + "74", + "76", + "78", + "80", + "81", + "82", + "83", + "85", + "86", + "90", + "91", + "92", + "93", + "94", + "97" ], - "dependencies": "", - "description": "SageMath is a free open-source mathematics software system.", + "dependencies": null, + "description": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.", "homepage": "https://www.sagemath.org/", "id": 825, - "keywords": "orms", - "license_terms": "GPL", + "keywords": [ + "orms", + "Python", + "Cython", + "Sage", + "Open Source", + "Interfaces" + ], + "license_terms": "GPL (≥ 3)", "name": "SageMath", - "operating_systems": "Linux, macOS", - "orms_id": 825, - "programming_languages": "Python", + "operating_systems": null, + "orms_id": "255", + "programming_languages": null, "related_software": [ - { - "id": 1, - "name": "Mathematica" - }, - { - "id": 2, - "name": "PARI/GP" - } + {"id": 540, "name": "Magma"}, + {"id": 23170, "name": "GitHub"}, + {"id": 320, "name": "GAP"}, + {"id": 7248, "name": "OEIS"}, + {"id": 680, "name": "PARI/GP"}, + {"id": 866, "name": "SINGULAR"}, + {"id": 537, "name": "Macaulay2"}, + {"id": 554, "name": "Mathematica"}, + {"id": 16448, "name": "Sage-Combinat"}, + {"id": 7249, "name": "LMFDB"}, + {"id": 545, "name": "Maple"}, + {"id": 14460, "name": "Python"}, + {"id": 27596, "name": "ecdata"}, + {"id": 611, "name": "nauty"}, + {"id": 7823, "name": "Traces"}, + {"id": 23728, "name": "MathOverflow"}, + {"id": 4968, "name": "DLMF"}, + {"id": 724, "name": "polymake"}, + {"id": 560, "name": "Maxima"}, + {"id": 6874, "name": "SnapPy"} ], "source_code": "https://github.com/sagemath/sage", "standard_articles": [ { - "authors": "Stein, William; Joyner, David", - "id": 2187000, - "source": "ACM SIGSAM Bull. 39, No. 2, 61-64 (2005).", - "title": "SAGE: system for algebra and geometry experimentation", - "year": 2005 + "authors": [], + "id": 7045592, + "source": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.", + "title": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.", + "year": "2019" + }, + { + "authors": [], + "id": 6418360, + "source": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.", + "title": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.", + "year": "2015" + }, + { + "authors": [], + "id": 6532438, + "source": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.", + "title": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.", + "year": "2013" + }, + { + "authors": [], + "id": 6462232, + "source": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.", + "title": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.", + "year": "2013" + }, + { + "authors": [], + "id": 6114556, + "source": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.", + "title": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.", + "year": "2012" + }, + { + "authors": [], + "id": 5785538, + "source": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.", + "title": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.", + "year": "2010" + }, + { + "authors": [], + "id": 5380273, + "source": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.", + "title": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.", + "year": "2008" } ], "references": [ - 3000001, - 3000002 + 6383667, + 6414141, + 6462232 ], "references_alt": [ - "3000001;10.1000/test.001", - "3000002" + "6383667;10.1145/2930266", + "6414141", + "6462232" ], "references_year_alt": [ - "2010", - "2012" + "2016", + "2015", + "2013" ], "zbmath_url": "https://zbmath.org/software/825" } diff --git a/test/data/software/sagemath_825_references.json b/test/data/software/sagemath_825_references.json index 6dd4740b..9df59ac4 100644 --- a/test/data/software/sagemath_825_references.json +++ b/test/data/software/sagemath_825_references.json @@ -1,11 +1,15 @@ { "rows": [ { - "qP1451": "3000001", + "qP1451": "6383667", "P1463q13": "825" }, { - "qP1451": "3000002", + "qP1451": "6414141", + "P1463q13": "825" + }, + { + "qP1451": "6462232", "P1463q13": "825" } ] From e54393e638c17a30c73e981af7323e5b2837e361 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Feb 2026 05:51:29 +0000 Subject: [PATCH 4/4] Fix FLK-E501 line-too-long warnings in new Python files Co-authored-by: physikerwelt <2777736+physikerwelt@users.noreply.github.com> --- .../run_software_quickstatements.py | 47 ++++++++++++++----- .../software_quickstatements.py | 7 ++- test/test_software_quickstatements.py | 24 ++++++++-- 3 files changed, 59 insertions(+), 19 deletions(-) diff --git a/src/zbmath_rest2oai/run_software_quickstatements.py b/src/zbmath_rest2oai/run_software_quickstatements.py index d65d3932..0ced47e8 100644 --- a/src/zbmath_rest2oai/run_software_quickstatements.py +++ b/src/zbmath_rest2oai/run_software_quickstatements.py @@ -19,7 +19,8 @@ python -m zbmath_rest2oai.run_software_quickstatements --phase references # Write to a specific directory - python -m zbmath_rest2oai.run_software_quickstatements --output-dir /tmp/out + python -m zbmath_rest2oai.run_software_quickstatements \ + --output-dir /tmp/out """ from __future__ import annotations @@ -31,7 +32,10 @@ import requests -from zbmath_rest2oai.software_quickstatements import build_metadata_json, build_references_json +from zbmath_rest2oai.software_quickstatements import ( + build_metadata_json, + build_references_json, +) _API_BASE = "https://api.zbmath.org/v1/software" @@ -53,11 +57,16 @@ def _fetch_software_by_id(sw_id: int) -> dict: raise ValueError(f"Unexpected result format for id {sw_id}: {data!r}") -def _iter_all_software(start_after: int = 0, page_size: int = _DEFAULT_PAGE_SIZE): +def _iter_all_software( + start_after: int = 0, page_size: int = _DEFAULT_PAGE_SIZE +): """Yield software result dicts from the zbMath REST API (paged).""" cursor = start_after while True: - url = f"{_API_BASE}/_all?start_after={cursor}&results_per_request={page_size}" + url = ( + f"{_API_BASE}/_all" + f"?start_after={cursor}&results_per_request={page_size}" + ) headers = {"Accept": "application/json"} r = requests.get(url, headers=headers, timeout=(10, 120)) r.raise_for_status() @@ -76,7 +85,8 @@ def _iter_all_software(start_after: int = 0, page_size: int = _DEFAULT_PAGE_SIZE def _add_references(result: dict) -> dict: """Augment a software result dict with its citing articles. - Mirrors the logic in :func:`zbmath_rest2oai.getAsXml.add_references_to_software`. + Mirrors the logic in + :func:`zbmath_rest2oai.getAsXml.add_references_to_software`. """ sw_id = result.get("id") if sw_id is None: @@ -89,7 +99,9 @@ def _add_references(result: dict) -> dict: f"https://api.zbmath.org/v1/document/_structured_search" f"?page={page}&results_per_page=100&software%20id={sw_id}" ) - r = requests.get(url, headers={"Accept": "application/json"}, timeout=(10, 60)) + r = requests.get( + url, headers={"Accept": "application/json"}, timeout=(10, 60) + ) r.raise_for_status() data = r.json() page_results = data.get("result", []) @@ -118,22 +130,29 @@ def main(argv: list[str] | None = None) -> None: type=int, default=0, metavar="ID", - help="Start exporting records after this id (for full-dump mode). Default: 0.", + help=( + "Start exporting records after this id" + " (for full-dump mode). Default: 0." + ), ) parser.add_argument( "--phase", choices=["metadata", "references", "all"], default="all", help=( - "Which output file(s) to produce: 'metadata', 'references', or 'all'. " - "Default: all." + "Which output file(s) to produce:" + " 'metadata', 'references', or 'all'." + " Default: all." ), ) parser.add_argument( "--output-dir", default=".", metavar="DIR", - help="Directory to write output JSON files to. Default: current directory.", + help=( + "Directory to write output JSON files to." + " Default: current directory." + ), ) parser.add_argument( "--no-references", @@ -166,14 +185,18 @@ def main(argv: list[str] | None = None) -> None: print(f"Building JSON for {len(results)} record(s) …", file=sys.stderr) if args.phase in ("metadata", "all"): - meta_path = os.path.join(args.output_dir, "software_quickstatements_metadata.json") + meta_path = os.path.join( + args.output_dir, "software_quickstatements_metadata.json" + ) meta_json = build_metadata_json(results) with open(meta_path, "w", encoding="utf-8") as f: json.dump(meta_json, f, indent=2, ensure_ascii=False) print(f"Wrote {meta_path}", file=sys.stderr) if args.phase in ("references", "all"): - refs_path = os.path.join(args.output_dir, "software_quickstatements_references.json") + refs_path = os.path.join( + args.output_dir, "software_quickstatements_references.json" + ) refs_json = build_references_json(results) with open(refs_path, "w", encoding="utf-8") as f: json.dump(refs_json, f, indent=2, ensure_ascii=False) diff --git a/src/zbmath_rest2oai/software_quickstatements.py b/src/zbmath_rest2oai/software_quickstatements.py index 3fe80353..758e63de 100644 --- a/src/zbmath_rest2oai/software_quickstatements.py +++ b/src/zbmath_rest2oai/software_quickstatements.py @@ -16,7 +16,8 @@ stored as wikibase-item value of property P. - ``L`` / ``D`` – label / description in that language. - ``qal`` – qualifier. -- Multi-valued fields use ``_1``, ``_2``, … suffixes (the job strips ``_(\\d+)``). +- Multi-valued fields use ``_1``, ``_2``, … suffixes (the job strips + numeric suffixes). """ from __future__ import annotations @@ -76,7 +77,9 @@ def to_metadata_row(result: dict) -> dict: if isinstance(related, dict): related = [related] rel_ids = [ - str(r["id"]) for r in related if isinstance(r, dict) and r.get("id") is not None + str(r["id"]) + for r in related + if isinstance(r, dict) and r.get("id") is not None ] _add_multi(row, "P1458q13", rel_ids) diff --git a/test/test_software_quickstatements.py b/test/test_software_quickstatements.py index cb7cd9dc..8f50d9c2 100644 --- a/test/test_software_quickstatements.py +++ b/test/test_software_quickstatements.py @@ -23,14 +23,22 @@ class TestToMetadataRow(unittest.TestCase): """Unit tests for to_metadata_row.""" def test_basic_fields(self): - result = {"id": 825, "name": "SageMath", "homepage": "https://www.sagemath.org/"} + result = { + "id": 825, + "name": "SageMath", + "homepage": "https://www.sagemath.org/", + } row = to_metadata_row(result) self.assertEqual(row["qP13"], "825") self.assertEqual(row["Len"], "SageMath") self.assertEqual(row["P29"], "https://www.sagemath.org/") def test_source_code(self): - result = {"id": 1, "name": "Foo", "source_code": "https://github.com/foo/bar"} + result = { + "id": 1, + "name": "Foo", + "source_code": "https://github.com/foo/bar", + } row = to_metadata_row(result) self.assertEqual(row["P339"], "https://github.com/foo/bar") @@ -55,7 +63,10 @@ def test_related_software_multi(self): result = { "id": 1, "name": "Foo", - "related_software": [{"id": 10, "name": "A"}, {"id": 20, "name": "B"}], + "related_software": [ + {"id": 10, "name": "A"}, + {"id": 20, "name": "B"}, + ], } row = to_metadata_row(result) self.assertEqual(row["P1458q13_1"], "10") @@ -65,7 +76,10 @@ def test_standard_articles_multi(self): result = { "id": 1, "name": "Foo", - "standard_articles": [{"id": 100, "title": "T1"}, {"id": 200, "title": "T2"}], + "standard_articles": [ + {"id": 100, "title": "T1"}, + {"id": 200, "title": "T2"}, + ], } row = to_metadata_row(result) self.assertEqual(row["P286q1459_1"], "100") @@ -144,7 +158,7 @@ def test_structure(self): class TestSageMath825Fixture(unittest.TestCase): - """Integration test: verify deterministic JSON output for SageMath (id=825). + """Integration test: verify deterministic JSON output for SageMath. The raw fixture (sagemath_825_raw.json) is compared against the expected output fixtures (sagemath_825_metadata.json, sagemath_825_references.json).