From 4821062de1e2f18240227db0b9b359ee5fcae4cd Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Mon, 25 Aug 2025 12:45:35 -0500 Subject: [PATCH 1/3] Fix: PMCID extraction --- src/pubget/_utils.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/pubget/_utils.py b/src/pubget/_utils.py index a1c88a0..b94fe97 100644 --- a/src/pubget/_utils.py +++ b/src/pubget/_utils.py @@ -136,12 +136,14 @@ def get_pmcid(article: Union[etree.ElementTree, etree.Element]) -> int: pmcid = article.find( "front/article-meta/article-id[@pub-id-type='pmcid']" ) - if pmc is None and pmcid is None: - raise ValueError("No PMC ID found in the article XML.") - if pmc: + + val = None + if pmc is not None: val = pmc.text - else: - val = pmcid.text.replace("PMC", "") + elif pmcid is not None: + val = pmcid.text + if val.startswith("PMC"): + val = val[3:] return int(val) From 755d92eac5e779ec961a9354e8a636d7252fb5e6 Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Mon, 25 Aug 2025 12:51:50 -0500 Subject: [PATCH 2/3] Raise error if val is still None --- src/pubget/_utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/pubget/_utils.py b/src/pubget/_utils.py index b94fe97..a50140d 100644 --- a/src/pubget/_utils.py +++ b/src/pubget/_utils.py @@ -145,6 +145,9 @@ def get_pmcid(article: Union[etree.ElementTree, etree.Element]) -> int: if val.startswith("PMC"): val = val[3:] + if val is None or not val.isdigit(): + raise ValueError("No valid PMCID found in article XML.") + return int(val) From 1df08d227455ed9b66912a10e99d9e9dd09936a8 Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Fri, 29 Aug 2025 14:49:17 -0500 Subject: [PATCH 3/3] Fix PMCID extraction from text --- src/pubget/_metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pubget/_metadata.py b/src/pubget/_metadata.py index 8728f2e..abbdcab 100644 --- a/src/pubget/_metadata.py +++ b/src/pubget/_metadata.py @@ -90,7 +90,7 @@ def _add_license(article: etree.Element, metadata: Dict[str, Any]) -> None: def _add_id(article_id: etree.Element, metadata: Dict[str, Any]) -> None: id_type = article_id.get("pub-id-type") - if id_type not in ["pmc", "pmid", "doi"]: + if id_type not in ["pmc", "pmid", "doi", "pmcid"]: return if id_type == "pmc": id_type = "pmcid"