diff --git a/src/swmath2swh/staging_deposit_v2.py b/src/swmath2swh/staging_deposit_v2.py index 3c8b1ee3..0bccdbe5 100644 --- a/src/swmath2swh/staging_deposit_v2.py +++ b/src/swmath2swh/staging_deposit_v2.py @@ -1,9 +1,7 @@ -from swmath2swh.restApi_software_Json import process_metadata -from swmath2swh.convertSoftware_from_json_toXml import convert_json_to_xml + import defusedxml.ElementTree as DET # Using defusedxml for safe XML parsing from defusedxml.lxml import fromstring import lxml.etree as ET -import pandas as pd import subprocess import time import tempfile @@ -15,7 +13,7 @@ env['SWMATH_USER_DEPOSIT'] = os.getenv('SWMATH_USER_DEPOSIT') env['SWMATH_PWD_DEPOSIT'] = os.getenv('SWMATH_PWD_DEPOSIT') -xsl_filename = '../xslt/software/xslt_SWH_deposit.xslt' +xsl_filename = '../../xslt/software/xslt_SWH_deposit.xslt' # Fetch XML data r = requests.get("https://oai.portal.mardi4nfdi.de/oai/OAIHandler?verb=GetRecord&metadataPrefix=codemeta&identifier=oai:swmath.org:4532") @@ -34,8 +32,11 @@ transform = ET.XSLT(xslt) newdom = transform(lxml_dom) formatted_newdom = ET.tostring(newdom, pretty_print=True, encoding='unicode') +formatted_newdom = re.sub(r'xmlns:xsi="[^"]+"', '', formatted_newdom) formatted_newdom = re.sub(r'xmlns:ns\d+="[^"]+"', '', formatted_newdom) formatted_newdom = re.sub(r'ns\d+:', 'codemeta:', formatted_newdom) +formatted_newdom = re.sub(r'<\s*([^>]+?)\s*>', r'<\1>', formatted_newdom) +formatted_newdom = re.sub(r'<\s*/\s*([^>]+?)\s*>', r'', formatted_newdom) print(formatted_newdom) # Write transformed XML to a temporary file