From 75e09cd7764a90f93adf57cd2581e83b2de84ea5 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 24 Feb 2026 15:35:48 +0000
Subject: [PATCH 1/4] Initial plan


From ea408cd2938b4bbb7656cc70da1944f3cb9c6f38 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 24 Feb 2026 15:43:19 +0000
Subject: [PATCH 2/4] Add QuickStatements JSON output support for software
 exports

Co-authored-by: physikerwelt <2777736+physikerwelt@users.noreply.github.com>
---
 README.md                                     |  43 ++++
 .../run_software_quickstatements.py           | 184 ++++++++++++++++++
 .../software_quickstatements.py               | 156 +++++++++++++++
 test/data/software/sagemath_825_metadata.json |  16 ++
 test/data/software/sagemath_825_raw.json      |  54 +++++
 .../software/sagemath_825_references.json     |  12 ++
 test/test_software_quickstatements.py         | 184 ++++++++++++++++++
 7 files changed, 649 insertions(+)
 create mode 100644 src/zbmath_rest2oai/run_software_quickstatements.py
 create mode 100644 src/zbmath_rest2oai/software_quickstatements.py
 create mode 100644 test/data/software/sagemath_825_metadata.json
 create mode 100644 test/data/software/sagemath_825_raw.json
 create mode 100644 test/data/software/sagemath_825_references.json
 create mode 100644 test/test_software_quickstatements.py
diff --git a/README.md b/README.md
index ce280bd2..044a1cb7 100644
--- a/README.md
+++ b/README.md
@@ -19,5 +19,48 @@ Hint for a proper installation:
 ## Deployment
 The project is containerized and managed via [Portainer](https://portainer.portal.mardi4nfdi.de/#!/home).
 
+## QuickStatements JSON export for software records
+
+The module `zbmath_rest2oai.software_quickstatements` converts zbMath software
+records into a custom JSON format consumed by the
+[MathSearch QuickStatements job](https://github.com/MathSearch/MathSearch).
+
+Two JSON files are produced, matching the two update phases described in
+[MaRDIRoadmap issue #173](https://github.com/MaRDI4NFDI/MaRDIRoadmap/issues/173):
+
+| File | Contents |
+|------|----------|
+| `software_quickstatements_metadata.json` | `qP13` (swMath id lookup), `Len` (label), `P29` (homepage), `P339` (source code), `P226_*` (MSC classifications), `P1458q13_*` (related software), `P286q1459_*` (standard articles) |
+| `software_quickstatements_references.json` | `qP1451` (citing article lookup), `P1463q13` (software id) |
+
+### Key conventions
+
+- `qP<id>` – find the item whose property `P<id>` equals the value.
+- `P<x>q<id>` – value is an external id looked up via property `P<id>`; result
+  stored as a wikibase-item value of property `P<x>`.
+- `L<lang>` / `D<lang>` – label / description in language `lang`.
+- Multi-valued fields use `_1`, `_2`, … suffixes (the job strips `_N` suffixes where N is a digit sequence).
+
+### CLI usage
+
+```bash
+# Export all software (paged, from id 0)
+python -m zbmath_rest2oai.run_software_quickstatements
+
+# Export a single software record (e.g. SageMath, swMath id 825)
+python -m zbmath_rest2oai.run_software_quickstatements --id 825
+
+# Only produce the metadata file
+python -m zbmath_rest2oai.run_software_quickstatements --phase metadata
+
+# Only produce the references file
+python -m zbmath_rest2oai.run_software_quickstatements --phase references
+
+# Write to a specific directory
+python -m zbmath_rest2oai.run_software_quickstatements --output-dir /tmp/out
+```
+
+The existing XML/XSLT-based OAI-PMH pipeline is unaffected.
+
 ## Support
 For inquiries, contact **[support@zbmath.org](mailto:support@zbmath.org)**.
\ No newline at end of file
diff --git a/src/zbmath_rest2oai/run_software_quickstatements.py b/src/zbmath_rest2oai/run_software_quickstatements.py
new file mode 100644
index 00000000..d65d3932
--- /dev/null
+++ b/src/zbmath_rest2oai/run_software_quickstatements.py
@@ -0,0 +1,184 @@
+"""CLI: export software records from zbMath REST API as QuickStatements JSON.
+
+Produces two output files (by default in the current directory):
+  - software_quickstatements_metadata.json
+  - software_quickstatements_references.json
+
+Usage examples::
+
+    # Export all software (paged, starting from id 0)
+    python -m zbmath_rest2oai.run_software_quickstatements
+
+    # Export a single software record by swMath id
+    python -m zbmath_rest2oai.run_software_quickstatements --id 825
+
+    # Only produce the metadata file
+    python -m zbmath_rest2oai.run_software_quickstatements --phase metadata
+
+    # Only produce the references file
+    python -m zbmath_rest2oai.run_software_quickstatements --phase references
+
+    # Write to a specific directory
+    python -m zbmath_rest2oai.run_software_quickstatements --output-dir /tmp/out
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+
+import requests
+
+from zbmath_rest2oai.software_quickstatements import build_metadata_json, build_references_json
+
+
+_API_BASE = "https://api.zbmath.org/v1/software"
+_DEFAULT_PAGE_SIZE = 500
+
+
+def _fetch_software_by_id(sw_id: int) -> dict:
+    """Fetch a single software record from the zbMath REST API."""
+    url = f"{_API_BASE}/{sw_id}"
+    headers = {"Accept": "application/json"}
+    r = requests.get(url, headers=headers, timeout=(10, 60))
+    r.raise_for_status()
+    data = r.json()
+    result = data.get("result")
+    if isinstance(result, list) and result:
+        return result[0]
+    if isinstance(result, dict):
+        return result
+    raise ValueError(f"Unexpected result format for id {sw_id}: {data!r}")
+
+
+def _iter_all_software(start_after: int = 0, page_size: int = _DEFAULT_PAGE_SIZE):
+    """Yield software result dicts from the zbMath REST API (paged)."""
+    cursor = start_after
+    while True:
+        url = f"{_API_BASE}/_all?start_after={cursor}&results_per_request={page_size}"
+        headers = {"Accept": "application/json"}
+        r = requests.get(url, headers=headers, timeout=(10, 120))
+        r.raise_for_status()
+        data = r.json()
+        results = data.get("result", [])
+        if not results:
+            break
+        for item in results:
+            if isinstance(item, dict):
+                yield item
+                cursor = item.get("id", cursor)
+        if len(results) < page_size:
+            break
+
+
+def _add_references(result: dict) -> dict:
+    """Augment a software result dict with its citing articles.
+
+    Mirrors the logic in :func:`zbmath_rest2oai.getAsXml.add_references_to_software`.
+    """
+    sw_id = result.get("id")
+    if sw_id is None:
+        return result
+
+    references: list[int] = []
+    page = 0
+    while True:
+        url = (
+            f"https://api.zbmath.org/v1/document/_structured_search"
+            f"?page={page}&results_per_page=100&software%20id={sw_id}"
+        )
+        r = requests.get(url, headers={"Accept": "application/json"}, timeout=(10, 60))
+        r.raise_for_status()
+        data = r.json()
+        page_results = data.get("result", [])
+        if not page_results:
+            break
+        for entry in page_results:
+            references.append(entry["id"])
+        page += 1
+
+    result["references"] = references
+    return result
+
+
+def main(argv: list[str] | None = None) -> None:
+    parser = argparse.ArgumentParser(
+        description="Export zbMath software records as QuickStatements JSON."
+    )
+    parser.add_argument(
+        "--id",
+        type=int,
+        metavar="SWMATH_ID",
+        help="Export a single software record by its swMath id.",
+    )
+    parser.add_argument(
+        "--start-after",
+        type=int,
+        default=0,
+        metavar="ID",
+        help="Start exporting records after this id (for full-dump mode). Default: 0.",
+    )
+    parser.add_argument(
+        "--phase",
+        choices=["metadata", "references", "all"],
+        default="all",
+        help=(
+            "Which output file(s) to produce: 'metadata', 'references', or 'all'. "
+            "Default: all."
+        ),
+    )
+    parser.add_argument(
+        "--output-dir",
+        default=".",
+        metavar="DIR",
+        help="Directory to write output JSON files to. Default: current directory.",
+    )
+    parser.add_argument(
+        "--no-references",
+        action="store_true",
+        help=(
+            "Skip fetching citing articles for each software record. "
+            "Useful when running with --phase metadata only."
+        ),
+    )
+    args = parser.parse_args(argv)
+
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    if args.id is not None:
+        print(f"Fetching software id={args.id} …", file=sys.stderr)
+        record = _fetch_software_by_id(args.id)
+        if args.phase in ("references", "all") and not args.no_references:
+            record = _add_references(record)
+        results = [record]
+    else:
+        print("Fetching all software records …", file=sys.stderr)
+        results = []
+        for record in _iter_all_software(start_after=args.start_after):
+            if args.phase in ("references", "all") and not args.no_references:
+                record = _add_references(record)
+            results.append(record)
+            if len(results) % 100 == 0:
+                print(f"  … {len(results)} records fetched", file=sys.stderr)
+
+    print(f"Building JSON for {len(results)} record(s) …", file=sys.stderr)
+
+    if args.phase in ("metadata", "all"):
+        meta_path = os.path.join(args.output_dir, "software_quickstatements_metadata.json")
+        meta_json = build_metadata_json(results)
+        with open(meta_path, "w", encoding="utf-8") as f:
+            json.dump(meta_json, f, indent=2, ensure_ascii=False)
+        print(f"Wrote {meta_path}", file=sys.stderr)
+
+    if args.phase in ("references", "all"):
+        refs_path = os.path.join(args.output_dir, "software_quickstatements_references.json")
+        refs_json = build_references_json(results)
+        with open(refs_path, "w", encoding="utf-8") as f:
+            json.dump(refs_json, f, indent=2, ensure_ascii=False)
+        print(f"Wrote {refs_path}", file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/zbmath_rest2oai/software_quickstatements.py b/src/zbmath_rest2oai/software_quickstatements.py
new file mode 100644
index 00000000..3fe80353
--- /dev/null
+++ b/src/zbmath_rest2oai/software_quickstatements.py
@@ -0,0 +1,156 @@
+"""Convert zbMath software records to QuickStatements JSON for MaRDI portal.
+
+Produces two JSON output files:
+- software_quickstatements_metadata.json: metadata rows (swmathID, label,
+  homepage, source code, classifications, related software, standard articles)
+- software_quickstatements_references.json: reference rows (one per article
+  citing the software)
+
+JSON schema::
+
+    {"rows": [{"qP13": "825", "Len": "SageMath", "P29": "...", ...}, ...]}
+
+Key conventions (matching MathSearch QuickStatements job):
+- ``qP<id>`` – look up item whose property P<id> equals the value.
+- ``P<x>q<id>`` – value is an external id looked up via property P<id>; result
+  stored as wikibase-item value of property P<x>.
+- ``L<lang>`` / ``D<lang>`` – label / description in that language.
+- ``qal<P<id>`` – qualifier.
+- Multi-valued fields use ``_1``, ``_2``, … suffixes (the job strips ``_(\\d+)``).
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+def _add_multi(row: dict, key: str, values: list[Any]) -> None:
+    """Add multi-valued fields with ``_1``, ``_2``, … suffixes.
+
+    Does nothing when *values* is empty.
+    """
+    for i, val in enumerate(values, start=1):
+        row[f"{key}_{i}"] = str(val)
+
+
+def to_metadata_row(result: dict) -> dict:
+    """Build a QuickStatements metadata row from a software result dict.
+
+    Parameters
+    ----------
+    result:
+        Raw software dict as returned by the zbMath REST API (the ``result``
+        field), before or after ``apply_zbmath_api_fixes``.  If an OAI prefix
+        has already been applied to ``id`` (e.g. ``"oai:swmath.org:825"``), the
+        numeric part is extracted automatically.
+
+    Returns
+    -------
+    dict
+        A single row suitable for the ``rows`` list in the metadata JSON file.
+    """
+    raw_id = result.get("id", "")
+    # Strip OAI prefix if present (e.g. "oai:swmath.org:825" → "825")
+    sw_id = str(raw_id).split(":")[-1]
+
+    row: dict = {"qP13": sw_id}
+
+    name = result.get("name", "")
+    if name:
+        row["Len"] = str(name)
+
+    homepage = result.get("homepage", "")
+    if isinstance(homepage, str) and homepage.strip():
+        row["P29"] = homepage.strip()
+
+    source_code = result.get("source_code", "")
+    if isinstance(source_code, str) and source_code.strip():
+        row["P339"] = source_code.strip()
+
+    classifications = result.get("classification", [])
+    if isinstance(classifications, str):
+        classifications = [classifications]
+    _add_multi(row, "P226", classifications)
+
+    related = result.get("related_software", [])
+    if isinstance(related, dict):
+        related = [related]
+    rel_ids = [
+        str(r["id"]) for r in related if isinstance(r, dict) and r.get("id") is not None
+    ]
+    _add_multi(row, "P1458q13", rel_ids)
+
+    std_articles = result.get("standard_articles", [])
+    if isinstance(std_articles, dict):
+        std_articles = [std_articles]
+    std_ids = [
+        str(a["id"])
+        for a in std_articles
+        if isinstance(a, dict) and a.get("id") is not None
+    ]
+    _add_multi(row, "P286q1459", std_ids)
+
+    return row
+
+
+def to_reference_rows(result: dict) -> list[dict]:
+    """Build QuickStatements reference rows from a software result dict.
+
+    One row is emitted per article that cites (or is cited by) the software.
+
+    Parameters
+    ----------
+    result:
+        Raw software dict (same format as for :func:`to_metadata_row`).
+
+    Returns
+    -------
+    list[dict]
+        List of rows for the references JSON file.
+    """
+    raw_id = result.get("id", "")
+    sw_id = str(raw_id).split(":")[-1]
+
+    references = result.get("references", [])
+    if isinstance(references, (int, str)):
+        references = [references]
+
+    rows = []
+    for ref_id in references:
+        rows.append({"qP1451": str(ref_id), "P1463q13": sw_id})
+    return rows
+
+
+def build_metadata_json(results: list[dict]) -> dict:
+    """Build the metadata QuickStatements JSON structure.
+
+    Parameters
+    ----------
+    results:
+        List of raw software result dicts.
+
+    Returns
+    -------
+    dict
+        ``{"rows": [...]}`` ready to be serialised with ``json.dumps``.
+    """
+    return {"rows": [to_metadata_row(r) for r in results]}
+
+
+def build_references_json(results: list[dict]) -> dict:
+    """Build the references QuickStatements JSON structure.
+
+    Parameters
+    ----------
+    results:
+        List of raw software result dicts.
+
+    Returns
+    -------
+    dict
+        ``{"rows": [...]}`` ready to be serialised with ``json.dumps``.
+    """
+    rows: list[dict] = []
+    for r in results:
+        rows.extend(to_reference_rows(r))
+    return {"rows": rows}
diff --git a/test/data/software/sagemath_825_metadata.json b/test/data/software/sagemath_825_metadata.json
new file mode 100644
index 00000000..302e5294
--- /dev/null
+++ b/test/data/software/sagemath_825_metadata.json
@@ -0,0 +1,16 @@
+{
+  "rows": [
+    {
+      "qP13": "825",
+      "Len": "SageMath",
+      "P29": "https://www.sagemath.org/",
+      "P339": "https://github.com/sagemath/sage",
+      "P226_1": "68",
+      "P226_2": "11",
+      "P226_3": "14",
+      "P1458q13_1": "1",
+      "P1458q13_2": "2",
+      "P286q1459_1": "2187000"
+    }
+  ]
+}
diff --git a/test/data/software/sagemath_825_raw.json b/test/data/software/sagemath_825_raw.json
new file mode 100644
index 00000000..3925fc86
--- /dev/null
+++ b/test/data/software/sagemath_825_raw.json
@@ -0,0 +1,54 @@
+{
+  "articles_count": 42,
+  "authors": [
+    "Stein, William"
+  ],
+  "classification": [
+    "68",
+    "11",
+    "14"
+  ],
+  "dependencies": "",
+  "description": "SageMath is a free open-source mathematics software system.",
+  "homepage": "https://www.sagemath.org/",
+  "id": 825,
+  "keywords": "orms",
+  "license_terms": "GPL",
+  "name": "SageMath",
+  "operating_systems": "Linux, macOS",
+  "orms_id": 825,
+  "programming_languages": "Python",
+  "related_software": [
+    {
+      "id": 1,
+      "name": "Mathematica"
+    },
+    {
+      "id": 2,
+      "name": "PARI/GP"
+    }
+  ],
+  "source_code": "https://github.com/sagemath/sage",
+  "standard_articles": [
+    {
+      "authors": "Stein, William; Joyner, David",
+      "id": 2187000,
+      "source": "ACM SIGSAM Bull. 39, No. 2, 61-64 (2005).",
+      "title": "SAGE: system for algebra and geometry experimentation",
+      "year": 2005
+    }
+  ],
+  "references": [
+    3000001,
+    3000002
+  ],
+  "references_alt": [
+    "3000001;10.1000/test.001",
+    "3000002"
+  ],
+  "references_year_alt": [
+    "2010",
+    "2012"
+  ],
+  "zbmath_url": "https://zbmath.org/software/825"
+}
diff --git a/test/data/software/sagemath_825_references.json b/test/data/software/sagemath_825_references.json
new file mode 100644
index 00000000..6dd4740b
--- /dev/null
+++ b/test/data/software/sagemath_825_references.json
@@ -0,0 +1,12 @@
+{
+  "rows": [
+    {
+      "qP1451": "3000001",
+      "P1463q13": "825"
+    },
+    {
+      "qP1451": "3000002",
+      "P1463q13": "825"
+    }
+  ]
+}
diff --git a/test/test_software_quickstatements.py b/test/test_software_quickstatements.py
new file mode 100644
index 00000000..cb7cd9dc
--- /dev/null
+++ b/test/test_software_quickstatements.py
@@ -0,0 +1,184 @@
+"""Tests for software_quickstatements JSON output."""
+
+import json
+import os
+import unittest
+
+from zbmath_rest2oai.software_quickstatements import (
+    build_metadata_json,
+    build_references_json,
+    to_metadata_row,
+    to_reference_rows,
+)
+
+DATA_DIR = os.path.join(os.path.dirname(__file__), "data", "software")
+
+
+def load_fixture(name):
+    with open(os.path.join(DATA_DIR, name)) as f:
+        return json.load(f)
+
+
+class TestToMetadataRow(unittest.TestCase):
+    """Unit tests for to_metadata_row."""
+
+    def test_basic_fields(self):
+        result = {"id": 825, "name": "SageMath", "homepage": "https://www.sagemath.org/"}
+        row = to_metadata_row(result)
+        self.assertEqual(row["qP13"], "825")
+        self.assertEqual(row["Len"], "SageMath")
+        self.assertEqual(row["P29"], "https://www.sagemath.org/")
+
+    def test_source_code(self):
+        result = {"id": 1, "name": "Foo", "source_code": "https://github.com/foo/bar"}
+        row = to_metadata_row(result)
+        self.assertEqual(row["P339"], "https://github.com/foo/bar")
+
+    def test_empty_source_code_omitted(self):
+        result = {"id": 1, "name": "Foo", "source_code": ""}
+        row = to_metadata_row(result)
+        self.assertNotIn("P339", row)
+
+    def test_empty_homepage_omitted(self):
+        result = {"id": 1, "name": "Foo", "homepage": ""}
+        row = to_metadata_row(result)
+        self.assertNotIn("P29", row)
+
+    def test_classifications_multi(self):
+        result = {"id": 1, "name": "Foo", "classification": ["68", "11", "14"]}
+        row = to_metadata_row(result)
+        self.assertEqual(row["P226_1"], "68")
+        self.assertEqual(row["P226_2"], "11")
+        self.assertEqual(row["P226_3"], "14")
+
+    def test_related_software_multi(self):
+        result = {
+            "id": 1,
+            "name": "Foo",
+            "related_software": [{"id": 10, "name": "A"}, {"id": 20, "name": "B"}],
+        }
+        row = to_metadata_row(result)
+        self.assertEqual(row["P1458q13_1"], "10")
+        self.assertEqual(row["P1458q13_2"], "20")
+
+    def test_standard_articles_multi(self):
+        result = {
+            "id": 1,
+            "name": "Foo",
+            "standard_articles": [{"id": 100, "title": "T1"}, {"id": 200, "title": "T2"}],
+        }
+        row = to_metadata_row(result)
+        self.assertEqual(row["P286q1459_1"], "100")
+        self.assertEqual(row["P286q1459_2"], "200")
+
+    def test_standard_articles_as_dict(self):
+        """single standard_article returned as dict, not list."""
+        result = {
+            "id": 1,
+            "name": "Foo",
+            "standard_articles": {"id": 100, "title": "T1"},
+        }
+        row = to_metadata_row(result)
+        self.assertEqual(row["P286q1459_1"], "100")
+
+    def test_oai_prefix_stripped(self):
+        result = {"id": "oai:swmath.org:825", "name": "SageMath"}
+        row = to_metadata_row(result)
+        self.assertEqual(row["qP13"], "825")
+
+    def test_no_classifications_produces_no_p226(self):
+        result = {"id": 1, "name": "Foo"}
+        row = to_metadata_row(result)
+        self.assertNotIn("P226_1", row)
+
+
+class TestToReferenceRows(unittest.TestCase):
+    """Unit tests for to_reference_rows."""
+
+    def test_basic(self):
+        result = {"id": 825, "references": [3000001, 3000002]}
+        rows = to_reference_rows(result)
+        self.assertEqual(len(rows), 2)
+        self.assertEqual(rows[0], {"qP1451": "3000001", "P1463q13": "825"})
+        self.assertEqual(rows[1], {"qP1451": "3000002", "P1463q13": "825"})
+
+    def test_empty_references(self):
+        result = {"id": 1, "references": []}
+        rows = to_reference_rows(result)
+        self.assertEqual(rows, [])
+
+    def test_single_reference_as_int(self):
+        result = {"id": 2, "references": 999}
+        rows = to_reference_rows(result)
+        self.assertEqual(rows, [{"qP1451": "999", "P1463q13": "2"}])
+
+    def test_oai_prefix_stripped(self):
+        result = {"id": "oai:swmath.org:5", "references": [42]}
+        rows = to_reference_rows(result)
+        self.assertEqual(rows[0]["P1463q13"], "5")
+
+
+class TestBuildMetadataJson(unittest.TestCase):
+    """Unit tests for build_metadata_json."""
+
+    def test_structure(self):
+        results = [{"id": 1, "name": "A"}, {"id": 2, "name": "B"}]
+        output = build_metadata_json(results)
+        self.assertIn("rows", output)
+        self.assertEqual(len(output["rows"]), 2)
+        self.assertEqual(output["rows"][0]["qP13"], "1")
+        self.assertEqual(output["rows"][1]["qP13"], "2")
+
+
+class TestBuildReferencesJson(unittest.TestCase):
+    """Unit tests for build_references_json."""
+
+    def test_structure(self):
+        results = [
+            {"id": 1, "references": [10, 11]},
+            {"id": 2, "references": [20]},
+        ]
+        output = build_references_json(results)
+        self.assertIn("rows", output)
+        self.assertEqual(len(output["rows"]), 3)
+
+
+class TestSageMath825Fixture(unittest.TestCase):
+    """Integration test: verify deterministic JSON output for SageMath (id=825).
+
+    The raw fixture (sagemath_825_raw.json) is compared against the expected
+    output fixtures (sagemath_825_metadata.json, sagemath_825_references.json).
+    """
+
+    def setUp(self):
+        self.raw = load_fixture("sagemath_825_raw.json")
+        self.expected_meta = load_fixture("sagemath_825_metadata.json")
+        self.expected_refs = load_fixture("sagemath_825_references.json")
+
+    def test_metadata_matches_fixture(self):
+        result = build_metadata_json([self.raw])
+        self.assertEqual(result, self.expected_meta)
+
+    def test_references_match_fixture(self):
+        result = build_references_json([self.raw])
+        self.assertEqual(result, self.expected_refs)
+
+    def test_metadata_qp13_is_825(self):
+        result = build_metadata_json([self.raw])
+        self.assertEqual(result["rows"][0]["qP13"], "825")
+
+    def test_metadata_label_is_sagemath(self):
+        result = build_metadata_json([self.raw])
+        self.assertEqual(result["rows"][0]["Len"], "SageMath")
+
+    def test_metadata_homepage_present(self):
+        result = build_metadata_json([self.raw])
+        self.assertIn("P29", result["rows"][0])
+
+    def test_metadata_source_code_present(self):
+        result = build_metadata_json([self.raw])
+        self.assertIn("P339", result["rows"][0])
+
+
+if __name__ == "__main__":
+    unittest.main()

From 6c969e37ba61e0cbe9604271d34cc94f9cd13bc4 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 25 Feb 2026 05:43:04 +0000
Subject: [PATCH 3/4] Update SageMath fixture with real zbMATH API data and
 correct expected outputs

Co-authored-by: physikerwelt <2777736+physikerwelt@users.noreply.github.com>
---
 test/data/software/sagemath_825_metadata.json |  91 ++++++++-
 test/data/software/sagemath_825_raw.json      | 188 +++++++++++++++---
 .../software/sagemath_825_references.json     |   8 +-
 3 files changed, 251 insertions(+), 36 deletions(-)

diff --git a/test/data/software/sagemath_825_metadata.json b/test/data/software/sagemath_825_metadata.json
index 302e5294..987a0f31 100644
--- a/test/data/software/sagemath_825_metadata.json
+++ b/test/data/software/sagemath_825_metadata.json
@@ -5,12 +5,95 @@
       "Len": "SageMath",
       "P29": "https://www.sagemath.org/",
       "P339": "https://github.com/sagemath/sage",
-      "P226_1": "68",
+      "P226_1": "05",
       "P226_2": "11",
       "P226_3": "14",
-      "P1458q13_1": "1",
-      "P1458q13_2": "2",
-      "P286q1459_1": "2187000"
+      "P226_4": "20",
+      "P226_5": "68",
+      "P226_6": "00",
+      "P226_7": "01",
+      "P226_8": "03",
+      "P226_9": "06",
+      "P226_10": "08",
+      "P226_11": "12",
+      "P226_12": "13",
+      "P226_13": "15",
+      "P226_14": "16",
+      "P226_15": "17",
+      "P226_16": "18",
+      "P226_17": "19",
+      "P226_18": "22",
+      "P226_19": "26",
+      "P226_20": "28",
+      "P226_21": "30",
+      "P226_22": "31",
+      "P226_23": "32",
+      "P226_24": "33",
+      "P226_25": "34",
+      "P226_26": "35",
+      "P226_27": "37",
+      "P226_28": "39",
+      "P226_29": "40",
+      "P226_30": "41",
+      "P226_31": "42",
+      "P226_32": "43",
+      "P226_33": "44",
+      "P226_34": "46",
+      "P226_35": "47",
+      "P226_36": "49",
+      "P226_37": "51",
+      "P226_38": "52",
+      "P226_39": "53",
+      "P226_40": "54",
+      "P226_41": "55",
+      "P226_42": "57",
+      "P226_43": "58",
+      "P226_44": "60",
+      "P226_45": "62",
+      "P226_46": "65",
+      "P226_47": "70",
+      "P226_48": "74",
+      "P226_49": "76",
+      "P226_50": "78",
+      "P226_51": "80",
+      "P226_52": "81",
+      "P226_53": "82",
+      "P226_54": "83",
+      "P226_55": "85",
+      "P226_56": "86",
+      "P226_57": "90",
+      "P226_58": "91",
+      "P226_59": "92",
+      "P226_60": "93",
+      "P226_61": "94",
+      "P226_62": "97",
+      "P1458q13_1": "540",
+      "P1458q13_2": "23170",
+      "P1458q13_3": "320",
+      "P1458q13_4": "7248",
+      "P1458q13_5": "680",
+      "P1458q13_6": "866",
+      "P1458q13_7": "537",
+      "P1458q13_8": "554",
+      "P1458q13_9": "16448",
+      "P1458q13_10": "7249",
+      "P1458q13_11": "545",
+      "P1458q13_12": "14460",
+      "P1458q13_13": "27596",
+      "P1458q13_14": "611",
+      "P1458q13_15": "7823",
+      "P1458q13_16": "23728",
+      "P1458q13_17": "4968",
+      "P1458q13_18": "724",
+      "P1458q13_19": "560",
+      "P1458q13_20": "6874",
+      "P286q1459_1": "7045592",
+      "P286q1459_2": "6418360",
+      "P286q1459_3": "6532438",
+      "P286q1459_4": "6462232",
+      "P286q1459_5": "6114556",
+      "P286q1459_6": "5785538",
+      "P286q1459_7": "5380273"
     }
   ]
 }
diff --git a/test/data/software/sagemath_825_raw.json b/test/data/software/sagemath_825_raw.json
index 3925fc86..e995a1c9 100644
--- a/test/data/software/sagemath_825_raw.json
+++ b/test/data/software/sagemath_825_raw.json
@@ -1,54 +1,182 @@
 {
-  "articles_count": 42,
+  "articles_count": 3439,
   "authors": [
-    "Stein, William"
+    "The Sage Developers",
+    "Stein, William",
+    "Joyner, David",
+    "Kohel, David",
+    "Cremona, John",
+    "Eröcal, Burçin"
   ],
   "classification": [
-    "68",
+    "05",
     "11",
-    "14"
+    "14",
+    "20",
+    "68",
+    "00",
+    "01",
+    "03",
+    "06",
+    "08",
+    "12",
+    "13",
+    "15",
+    "16",
+    "17",
+    "18",
+    "19",
+    "22",
+    "26",
+    "28",
+    "30",
+    "31",
+    "32",
+    "33",
+    "34",
+    "35",
+    "37",
+    "39",
+    "40",
+    "41",
+    "42",
+    "43",
+    "44",
+    "46",
+    "47",
+    "49",
+    "51",
+    "52",
+    "53",
+    "54",
+    "55",
+    "57",
+    "58",
+    "60",
+    "62",
+    "65",
+    "70",
+    "74",
+    "76",
+    "78",
+    "80",
+    "81",
+    "82",
+    "83",
+    "85",
+    "86",
+    "90",
+    "91",
+    "92",
+    "93",
+    "94",
+    "97"
   ],
-  "dependencies": "",
-  "description": "SageMath is a free open-source mathematics software system.",
+  "dependencies": null,
+  "description": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.",
   "homepage": "https://www.sagemath.org/",
   "id": 825,
-  "keywords": "orms",
-  "license_terms": "GPL",
+  "keywords": [
+    "orms",
+    "Python",
+    "Cython",
+    "Sage",
+    "Open Source",
+    "Interfaces"
+  ],
+  "license_terms": "GPL (≥ 3)",
   "name": "SageMath",
-  "operating_systems": "Linux, macOS",
-  "orms_id": 825,
-  "programming_languages": "Python",
+  "operating_systems": null,
+  "orms_id": "255",
+  "programming_languages": null,
   "related_software": [
-    {
-      "id": 1,
-      "name": "Mathematica"
-    },
-    {
-      "id": 2,
-      "name": "PARI/GP"
-    }
+    {"id": 540, "name": "Magma"},
+    {"id": 23170, "name": "GitHub"},
+    {"id": 320, "name": "GAP"},
+    {"id": 7248, "name": "OEIS"},
+    {"id": 680, "name": "PARI/GP"},
+    {"id": 866, "name": "SINGULAR"},
+    {"id": 537, "name": "Macaulay2"},
+    {"id": 554, "name": "Mathematica"},
+    {"id": 16448, "name": "Sage-Combinat"},
+    {"id": 7249, "name": "LMFDB"},
+    {"id": 545, "name": "Maple"},
+    {"id": 14460, "name": "Python"},
+    {"id": 27596, "name": "ecdata"},
+    {"id": 611, "name": "nauty"},
+    {"id": 7823, "name": "Traces"},
+    {"id": 23728, "name": "MathOverflow"},
+    {"id": 4968, "name": "DLMF"},
+    {"id": 724, "name": "polymake"},
+    {"id": 560, "name": "Maxima"},
+    {"id": 6874, "name": "SnapPy"}
   ],
   "source_code": "https://github.com/sagemath/sage",
   "standard_articles": [
     {
-      "authors": "Stein, William; Joyner, David",
-      "id": 2187000,
-      "source": "ACM SIGSAM Bull. 39, No. 2, 61-64 (2005).",
-      "title": "SAGE: system for algebra and geometry experimentation",
-      "year": 2005
+      "authors": [],
+      "id": 7045592,
+      "source": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.",
+      "title": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.",
+      "year": "2019"
+    },
+    {
+      "authors": [],
+      "id": 6418360,
+      "source": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.",
+      "title": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.",
+      "year": "2015"
+    },
+    {
+      "authors": [],
+      "id": 6532438,
+      "source": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.",
+      "title": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.",
+      "year": "2013"
+    },
+    {
+      "authors": [],
+      "id": 6462232,
+      "source": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.",
+      "title": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.",
+      "year": "2013"
+    },
+    {
+      "authors": [],
+      "id": 6114556,
+      "source": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.",
+      "title": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.",
+      "year": "2012"
+    },
+    {
+      "authors": [],
+      "id": 5785538,
+      "source": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.",
+      "title": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.",
+      "year": "2010"
+    },
+    {
+      "authors": [],
+      "id": 5380273,
+      "source": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.",
+      "title": "zbMATH Open Web Interface contents unavailable due to conflicting licenses.",
+      "year": "2008"
     }
   ],
   "references": [
-    3000001,
-    3000002
+    6383667,
+    6414141,
+    6462232
   ],
   "references_alt": [
-    "3000001;10.1000/test.001",
-    "3000002"
+    "6383667;10.1145/2930266",
+    "6414141",
+    "6462232"
   ],
   "references_year_alt": [
-    "2010",
-    "2012"
+    "2016",
+    "2015",
+    "2013"
   ],
   "zbmath_url": "https://zbmath.org/software/825"
 }
diff --git a/test/data/software/sagemath_825_references.json b/test/data/software/sagemath_825_references.json
index 6dd4740b..9df59ac4 100644
--- a/test/data/software/sagemath_825_references.json
+++ b/test/data/software/sagemath_825_references.json
@@ -1,11 +1,15 @@
 {
   "rows": [
     {
-      "qP1451": "3000001",
+      "qP1451": "6383667",
       "P1463q13": "825"
     },
     {
-      "qP1451": "3000002",
+      "qP1451": "6414141",
+      "P1463q13": "825"
+    },
+    {
+      "qP1451": "6462232",
       "P1463q13": "825"
     }
   ]

From e54393e638c17a30c73e981af7323e5b2837e361 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 25 Feb 2026 05:51:29 +0000
Subject: [PATCH 4/4] Fix FLK-E501 line-too-long warnings in new Python files

Co-authored-by: physikerwelt <2777736+physikerwelt@users.noreply.github.com>
---
 .../run_software_quickstatements.py           | 47 ++++++++++++++-----
 .../software_quickstatements.py               |  7 ++-
 test/test_software_quickstatements.py         | 24 ++++++++--
 3 files changed, 59 insertions(+), 19 deletions(-)

diff --git a/src/zbmath_rest2oai/run_software_quickstatements.py b/src/zbmath_rest2oai/run_software_quickstatements.py
index d65d3932..0ced47e8 100644
--- a/src/zbmath_rest2oai/run_software_quickstatements.py
+++ b/src/zbmath_rest2oai/run_software_quickstatements.py
@@ -19,7 +19,8 @@
     python -m zbmath_rest2oai.run_software_quickstatements --phase references
 
     # Write to a specific directory
-    python -m zbmath_rest2oai.run_software_quickstatements --output-dir /tmp/out
+    python -m zbmath_rest2oai.run_software_quickstatements \
+        --output-dir /tmp/out
 """
 
 from __future__ import annotations
@@ -31,7 +32,10 @@
 
 import requests
 
-from zbmath_rest2oai.software_quickstatements import build_metadata_json, build_references_json
+from zbmath_rest2oai.software_quickstatements import (
+    build_metadata_json,
+    build_references_json,
+)
 
 
 _API_BASE = "https://api.zbmath.org/v1/software"
@@ -53,11 +57,16 @@ def _fetch_software_by_id(sw_id: int) -> dict:
     raise ValueError(f"Unexpected result format for id {sw_id}: {data!r}")
 
 
-def _iter_all_software(start_after: int = 0, page_size: int = _DEFAULT_PAGE_SIZE):
+def _iter_all_software(
+    start_after: int = 0, page_size: int = _DEFAULT_PAGE_SIZE
+):
     """Yield software result dicts from the zbMath REST API (paged)."""
     cursor = start_after
     while True:
-        url = f"{_API_BASE}/_all?start_after={cursor}&results_per_request={page_size}"
+        url = (
+            f"{_API_BASE}/_all"
+            f"?start_after={cursor}&results_per_request={page_size}"
+        )
         headers = {"Accept": "application/json"}
         r = requests.get(url, headers=headers, timeout=(10, 120))
         r.raise_for_status()
@@ -76,7 +85,8 @@ def _iter_all_software(start_after: int = 0, page_size: int = _DEFAULT_PAGE_SIZE
 def _add_references(result: dict) -> dict:
     """Augment a software result dict with its citing articles.
 
-    Mirrors the logic in :func:`zbmath_rest2oai.getAsXml.add_references_to_software`.
+    Mirrors the logic in
+    :func:`zbmath_rest2oai.getAsXml.add_references_to_software`.
     """
     sw_id = result.get("id")
     if sw_id is None:
@@ -89,7 +99,9 @@ def _add_references(result: dict) -> dict:
             f"https://api.zbmath.org/v1/document/_structured_search"
             f"?page={page}&results_per_page=100&software%20id={sw_id}"
         )
-        r = requests.get(url, headers={"Accept": "application/json"}, timeout=(10, 60))
+        r = requests.get(
+            url, headers={"Accept": "application/json"}, timeout=(10, 60)
+        )
         r.raise_for_status()
         data = r.json()
         page_results = data.get("result", [])
@@ -118,22 +130,29 @@ def main(argv: list[str] | None = None) -> None:
         type=int,
         default=0,
         metavar="ID",
-        help="Start exporting records after this id (for full-dump mode). Default: 0.",
+        help=(
+            "Start exporting records after this id"
+            " (for full-dump mode). Default: 0."
+        ),
     )
     parser.add_argument(
         "--phase",
         choices=["metadata", "references", "all"],
         default="all",
         help=(
-            "Which output file(s) to produce: 'metadata', 'references', or 'all'. "
-            "Default: all."
+            "Which output file(s) to produce:"
+            " 'metadata', 'references', or 'all'."
+            " Default: all."
         ),
     )
     parser.add_argument(
         "--output-dir",
         default=".",
         metavar="DIR",
-        help="Directory to write output JSON files to. Default: current directory.",
+        help=(
+            "Directory to write output JSON files to."
+            " Default: current directory."
+        ),
     )
     parser.add_argument(
         "--no-references",
@@ -166,14 +185,18 @@ def main(argv: list[str] | None = None) -> None:
     print(f"Building JSON for {len(results)} record(s) …", file=sys.stderr)
 
     if args.phase in ("metadata", "all"):
-        meta_path = os.path.join(args.output_dir, "software_quickstatements_metadata.json")
+        meta_path = os.path.join(
+            args.output_dir, "software_quickstatements_metadata.json"
+        )
         meta_json = build_metadata_json(results)
         with open(meta_path, "w", encoding="utf-8") as f:
             json.dump(meta_json, f, indent=2, ensure_ascii=False)
         print(f"Wrote {meta_path}", file=sys.stderr)
 
     if args.phase in ("references", "all"):
-        refs_path = os.path.join(args.output_dir, "software_quickstatements_references.json")
+        refs_path = os.path.join(
+            args.output_dir, "software_quickstatements_references.json"
+        )
         refs_json = build_references_json(results)
         with open(refs_path, "w", encoding="utf-8") as f:
             json.dump(refs_json, f, indent=2, ensure_ascii=False)
diff --git a/src/zbmath_rest2oai/software_quickstatements.py b/src/zbmath_rest2oai/software_quickstatements.py
index 3fe80353..758e63de 100644
--- a/src/zbmath_rest2oai/software_quickstatements.py
+++ b/src/zbmath_rest2oai/software_quickstatements.py
@@ -16,7 +16,8 @@
   stored as wikibase-item value of property P<x>.
 - ``L<lang>`` / ``D<lang>`` – label / description in that language.
 - ``qal<P<id>`` – qualifier.
-- Multi-valued fields use ``_1``, ``_2``, … suffixes (the job strips ``_(\\d+)``).
+- Multi-valued fields use ``_1``, ``_2``, … suffixes (the job strips
+  numeric suffixes).
 """
 
 from __future__ import annotations
@@ -76,7 +77,9 @@ def to_metadata_row(result: dict) -> dict:
     if isinstance(related, dict):
         related = [related]
     rel_ids = [
-        str(r["id"]) for r in related if isinstance(r, dict) and r.get("id") is not None
+        str(r["id"])
+        for r in related
+        if isinstance(r, dict) and r.get("id") is not None
     ]
     _add_multi(row, "P1458q13", rel_ids)
 
diff --git a/test/test_software_quickstatements.py b/test/test_software_quickstatements.py
index cb7cd9dc..8f50d9c2 100644
--- a/test/test_software_quickstatements.py
+++ b/test/test_software_quickstatements.py
@@ -23,14 +23,22 @@ class TestToMetadataRow(unittest.TestCase):
     """Unit tests for to_metadata_row."""
 
     def test_basic_fields(self):
-        result = {"id": 825, "name": "SageMath", "homepage": "https://www.sagemath.org/"}
+        result = {
+            "id": 825,
+            "name": "SageMath",
+            "homepage": "https://www.sagemath.org/",
+        }
         row = to_metadata_row(result)
         self.assertEqual(row["qP13"], "825")
         self.assertEqual(row["Len"], "SageMath")
         self.assertEqual(row["P29"], "https://www.sagemath.org/")
 
     def test_source_code(self):
-        result = {"id": 1, "name": "Foo", "source_code": "https://github.com/foo/bar"}
+        result = {
+            "id": 1,
+            "name": "Foo",
+            "source_code": "https://github.com/foo/bar",
+        }
         row = to_metadata_row(result)
         self.assertEqual(row["P339"], "https://github.com/foo/bar")
 
@@ -55,7 +63,10 @@ def test_related_software_multi(self):
         result = {
             "id": 1,
             "name": "Foo",
-            "related_software": [{"id": 10, "name": "A"}, {"id": 20, "name": "B"}],
+            "related_software": [
+                {"id": 10, "name": "A"},
+                {"id": 20, "name": "B"},
+            ],
         }
         row = to_metadata_row(result)
         self.assertEqual(row["P1458q13_1"], "10")
@@ -65,7 +76,10 @@ def test_standard_articles_multi(self):
         result = {
             "id": 1,
             "name": "Foo",
-            "standard_articles": [{"id": 100, "title": "T1"}, {"id": 200, "title": "T2"}],
+            "standard_articles": [
+                {"id": 100, "title": "T1"},
+                {"id": 200, "title": "T2"},
+            ],
         }
         row = to_metadata_row(result)
         self.assertEqual(row["P286q1459_1"], "100")
@@ -144,7 +158,7 @@ def test_structure(self):
 
 
 class TestSageMath825Fixture(unittest.TestCase):
-    """Integration test: verify deterministic JSON output for SageMath (id=825).
+    """Integration test: verify deterministic JSON output for SageMath.
 
     The raw fixture (sagemath_825_raw.json) is compared against the expected
     output fixtures (sagemath_825_metadata.json, sagemath_825_references.json).