diff --git a/src/pubget/_metadata.py b/src/pubget/_metadata.py index 8728f2e..abbdcab 100644 --- a/src/pubget/_metadata.py +++ b/src/pubget/_metadata.py @@ -90,7 +90,7 @@ def _add_license(article: etree.Element, metadata: Dict[str, Any]) -> None: def _add_id(article_id: etree.Element, metadata: Dict[str, Any]) -> None: id_type = article_id.get("pub-id-type") - if id_type not in ["pmc", "pmid", "doi"]: + if id_type not in ["pmc", "pmid", "doi", "pmcid"]: return if id_type == "pmc": id_type = "pmcid" diff --git a/src/pubget/_utils.py b/src/pubget/_utils.py index a1c88a0..a50140d 100644 --- a/src/pubget/_utils.py +++ b/src/pubget/_utils.py @@ -136,12 +136,17 @@ def get_pmcid(article: Union[etree.ElementTree, etree.Element]) -> int: pmcid = article.find( "front/article-meta/article-id[@pub-id-type='pmcid']" ) - if pmc is None and pmcid is None: - raise ValueError("No PMC ID found in the article XML.") - if pmc: + + val = None + if pmc is not None: val = pmc.text - else: - val = pmcid.text.replace("PMC", "") + elif pmcid is not None: + val = pmcid.text + if val.startswith("PMC"): + val = val[3:] + + if val is None or not val.isdigit(): + raise ValueError("No valid PMCID found in article XML.") return int(val)