CERNDocumentServer · zubeydecivelek · May 5, 2026 · kpsherva · May 8, 2026 · zubeydecivelek
diff --git a/cds_migrator_kit/rdm/records/transform/models/eco.py b/cds_migrator_kit/rdm/records/transform/models/eco.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2026 CERN.
+#
+# CDS-RDM is free software; you can redistribute it and/or modify it under
+# the terms of the MIT License; see LICENSE file for more details.
+
+"""CDS-RDM ECO model."""
+
+from cds_migrator_kit.rdm.records.transform.models.base_record import (
+    rdm_base_record_model,
+)
+from cds_migrator_kit.transform.overdo import CdsOverdo
+
+
+class ECOModel(CdsOverdo):
+    """Translation model for ECO records."""
+
+    __query__ = """
+        (
+            980__:POSTER
+            OR (980__:BROCHURE AND 690C_:CERNOFFICIALPRESSBROCHURE)
+            OR (
+                (980__:BROCHURE AND 690C_:CERNEXPERIMENTBROCHURE)
+                OR (
+                    980__:CMSOUTREACH
+                    AND (
+                        6531_.a:Brochure
+                        OR 6531_.a:brochure
+                        OR 6531_a:Brochure
+                        OR 6531_a:brochure
+                    )
+                )
+            )
+            OR (980__:NOTE AND 710__.5:IR)
+        )
+        AND -595__a:Press
+        AND -980__:LHCb_Misc
+        AND -690C_a:PRIVATLAS
+    """
+
+    __ignore_keys__ = {
+        "0247_9",  # source of pid, only value: OSTI, 2948638, 2853279
+        "0248_a",
+        "0248_p",
+        "0248_q",
+        "035__d",  # oai harvest tag
+        "035__h",  # oai harvest tag
+        "035__m",  # oai harvest tag
+        "100__m",  # email of contributor
+        "245__9",  # source of title, only value: submitter
+        "270__m",  # email of contact person - TODO: is it okay to ignore? example: 2908973
+        "270__p",  # contact person name - TODO: is it okay to ignore?
+        "300__a",  # number of pages
+        "340__a",  # Physical medium
+        "520__9",  # abstract provenance
+        "541__e",  # Original source poster https://cds.cern.ch/record/2695195/export/hm
+        "594__a",  # PUB: 2749806, 2749822
+        "6531_9",  # scheme of keywords
+        "700__m",  # email of contributor
+        "773__p",  # display name of the related link TODO: is it okay to ignore?
+        "773__y",  # year, TODO: is it okay to ignore? https://cds.cern.ch/record/1452204/export/xm
+        "773__v",  # TODO: is it okay to ignore? https://cds.cern.ch/record/1452204/export/xm
+        "852__c",
+        "852__h",
+        "8560_f",  # contact email
+        "8564_8",  # file id
+        "8564_s",  # bibdoc id
+        "8564_x",  # icon thumbnails sizes
+        "8564_y",  # file description - handled by files dump
+        "8564_z",  # DM metadata
+        "937__c",  # last modified by
+        "937__s",  # last modification date
+        "960__a",  # base number
+        "961__a",  # CDS modification tag
+        "961__b",  # CDS modification tag
+        "961__c",  # CDS modification tag
+        "961__h",  # CDS modification tag
+        "961__l",  # CDS modification tag
+        "961__x",  # CDS modification tag
+        "981__a",  # duplicate record id
+    }
+
+    _default_fields = {
+        "custom_fields": {},
+        "languages": [],
+        "related_identifiers": [],
+        "creators": [{"person_or_org": {"type": "organizational", "name": "CERN"}}],
+    }
+
+
+eco_model = ECOModel(
+    bases=(rdm_base_record_model,),
+    entry_point_group="cds_migrator_kit.migrator.rules.eco",
+)
diff --git a/cds_migrator_kit/rdm/records/transform/models/it.py b/cds_migrator_kit/rdm/records/transform/models/it.py
@@ -32,6 +32,8 @@ class ITModel(CdsOverdo):
                     -980__:BOOK
                     -690C_:YELLOWREPORT
                     -690C_:"YELLOW REPORT"
+                    -690C_:CERNOFFICIALPRESSBROCHURE
+                    -690C_:CERNEXPERIMENTBROCHURE
                     -980__:THESIS
                     -980__:INTNOTECMSPUBL
                     """

diff --git a/cds_migrator_kit/rdm/records/transform/transform.py b/cds_migrator_kit/rdm/records/transform/transform.py
@@ -472,7 +472,7 @@ def field_experiments(record_json, custom_fields_dict):
                 "cern:experiments", []
             )
             for experiment in experiments:
-                if experiment.lower().strip() == "not applicable":
+                if experiment.lower().strip() in ["not applicable", "select:"]:
                     continue
                 result = search_vocabulary(experiment, "experiments")
 

diff --git a/cds_migrator_kit/rdm/records/transform/xml_processing/rules/base.py b/cds_migrator_kit/rdm/records/transform/xml_processing/rules/base.py
@@ -73,8 +73,8 @@ def created(self, key, value):
         source = clean_val("s", value, str)
         # h = human catalogued
         # n = script catalogued or via submission
-        if source not in ["n", "h", "m", "r"]:
-            raise UnexpectedValue(subfield="s", field=key, value=value)
+        if source not in ["n", "h", "m", "r", "d"]:
+            raise UnexpectedValue(subfield="s", field=key, value=source)
     date_values = value.get("w")
     if not date_values or not date_values[0]:
         return datetime.date.today().isoformat()
@@ -801,6 +801,10 @@ def related_identifiers_787(self, key, value):
             "relation_type": {"id": "references"},
             "resource_type": {"id": "publication-conferencepaper"},
         },
+        "paper": {
+            "relation_type": {"id": "references"},
+            "resource_type": {"id": "publication-article"},
+        },
     }
 
     if recid:

diff --git a/cds_migrator_kit/rdm/records/transform/xml_processing/rules/bulletin_issue.py b/cds_migrator_kit/rdm/records/transform/xml_processing/rules/bulletin_issue.py
@@ -197,7 +197,7 @@ def subjects_bulletin(self, key, value):
 @for_each_value
 def urls_bulletin(self, key, value):
     content_type = value.get("x", "")
-    if content_type == "icon":
+    if "icon" in content_type:
         # ignore icon urls (conditionally ignoring by accessing the value
         url_q = value.get("q", "")
         url_u = value.get("u", "")

diff --git a/cds_migrator_kit/rdm/records/transform/xml_processing/rules/eco.py b/cds_migrator_kit/rdm/records/transform/xml_processing/rules/eco.py
@@ -0,0 +1,244 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2026 CERN.
+#
+# CDS-RDM is free software; you can redistribute it and/or modify it under
+# the terms of the MIT License; see LICENSE file for more details.
+
+"""CDS-RDM ECO rules."""
+
+import re
+
+from dojson.utils import IgnoreKey, for_each_value, force_list
+
+from cds_migrator_kit.errors import UnexpectedValue
+from cds_migrator_kit.rdm.records.transform.xml_processing.rules.it import (
+    corporate_author,
+)
+from cds_migrator_kit.transform.xml_processing.quality.decorators import (
+    require,
+)
+from cds_migrator_kit.transform.xml_processing.quality.parsers import (
+    StringValue,
+)
+from cds_migrator_kit.transform.xml_processing.rules.base import (
+    languages as base_languages,
+)
+
+from ...models.eco import eco_model as model
+from .base import identifiers
+from .base import note as base_note
+from .base import report_number, urls
+from .bulletin_issue import (
+    additional_descriptions,
+    additional_titles_bulletin,
+    rel_identifiers,
+    translated_description,
+    urls_bulletin,
+)
+from .it import corporate_author
+from .publications import internal_notes, journal, organisation, related_identifiers
+
+model.over("additional_titles", "(^246_[1_])", override=True)(
+    additional_titles_bulletin
+)
+model.over("additional_descriptions", "(^500__)")(additional_descriptions)
+model.over("additional_descriptions", "(^590__)")(translated_description)
+model.over("internal_notes", "^562__")(internal_notes)
+model.over("contributors", "^901__")(organisation)
+model.over("creators", "(^110__)")(corporate_author)
+model.over("eco_urls", "^8564[1_]", override=True)(urls_bulletin)
+
+
+@model.over("internal_notes", "^595__")
+@for_each_value
+def internal_notes(self, key, value):
+    """Translates internal notes."""
+    subject_notes = force_list(value.get("s", ""))
+    if subject_notes:
+        # add them as subjects
+        subjects = self.get("subjects", [])
+        for note in subject_notes:
+            subjects.append({"subject": note})
+        self["subjects"] = subjects
+    base_note(self, key, value)
+    raise IgnoreKey("internal_notes")
+
+
+@model.over("eco_report_number", "(^037__)|(^088__)", override=True)
+@for_each_value
+def eco_report_number(self, key, value):
+    """Translates report number."""
+    identifier = value.get("a", "")
+    # Check it's email TODO: how to handle?
+    if key == "088__" and "@" in identifier:
+        pass
+    else:
+        _identifier = report_number(self, key, value)
+        identifiers = self.get("identifiers", [])
+
+        if _identifier and _identifier not in identifiers:
+            identifiers += _identifier
+        self["identifiers"] = identifiers
+    raise IgnoreKey("eco_report_number")
+
+
+@model.over("eco_related_identifiers", "(^962__)", override=True)
+@for_each_value
+def eco_related_identifiers(self, key, value):
+    """Translates related identifiers."""
+    scheme = value.get("l", "")
+    if scheme:
+        rel_identifiers(self, key, value)
+        raise IgnoreKey("eco_related_identifiers")
+    rel_identifier = related_identifiers(self, key, value)
+    if rel_identifier:
+        rel_id = rel_identifier[0]
+        rel_ids = self.get("related_identifiers", [])
+        if rel_id not in rel_ids:
+            rel_ids.append(rel_id)
+            self["related_identifiers"] = rel_ids
+    raise IgnoreKey("eco_related_identifiers")
+
+
+@model.over("eco_identifiers", "^035__", override=True)
+@for_each_value
+def eco_identifiers(self, key, value):
+    """Translates identifiers."""
+    original_scheme = StringValue(value.get("9", "")).parse()
+    scheme = original_scheme.lower()
+
+    # TODO: handle photo identifier
+    if scheme == "phopho":
+        id_value = StringValue(value.get("a", "")).parse()
+        new_id = {"scheme": "photo", "identifier": id_value}
+        raise IgnoreKey("eco_identifiers")
+    identifiers(self, key, value)
+    raise IgnoreKey("eco_identifiers")
+
+
+@model.over("resource_type", "^980__", override=True)
+def resource_type(self, key, value):
+    """Translates resource_type."""
+    value = value.get("a") if "a" in value else value.get("b")
+    if value:
+        value = value.strip().lower()
+
+    mapping = {
+        "poster": {"id": "poster"},
+        "brochure": {"id": "publication-brochure"},
+        "note": {"id": "publication-technicalnote"},
+        "conferencepaper": {"id": "publication-conferencepaper"},
+    }
+
+    try:
+        return mapping[value]
+    except KeyError:
+        raise UnexpectedValue("Unknown resource type (ECO)", field=key, value=value)
+
+
+@model.over("collection", "^690C_", override=True)
+@for_each_value
+def collection(self, key, value):
+    """Translates collection."""
+    collection = value.get("a", "")
+    if collection.strip().upper() == "CERN":
+        raise IgnoreKey("collection")
+    if collection.strip().upper() not in [
+        "POSTER",
+        "PREPRINT",
+    ]:
+        raise UnexpectedValue(subfield="a", field=key, value=value)
+    subjects = self.get("subjects", [])
+    subjects.append(
+        {
+            "subject": f"collection:{collection}",
+        }
+    )
+    self["subjects"] = subjects
+    raise IgnoreKey("collection")
+
+
+@model.over("related_ids", "^773__")
+@for_each_value
+def related_ids(self, key, value):
+    """Translated related links."""
+    related_link = value.get("u", "")
+    if not related_link:
+        _custom_fields = journal(self, key, value)
+        self["custom_fields"] = _custom_fields
+        raise IgnoreKey("related_ids")
+
+    # Transform like the base `urls` rule
+    rel_ids = urls(self, key, value)
+    if not rel_ids:
+        raise IgnoreKey("related_ids")
+    rel_id = rel_ids[0]
+    related_identifiers = self.get("related_identifiers", [])
+    if rel_id not in related_identifiers:
+        related_identifiers.append(rel_id)
+    self["related_identifiers"] = related_identifiers
+
+    raise IgnoreKey("related_ids")
+
+
+@model.over("submitter_info", "^923__")
+@for_each_value
+def submitter_info(self, key, value):
+    """Translates submitter information."""
+    submitter_info = value.get("r", "")
+    names = submitter_info.strip().split(",")
+
+    if len(names) == 2:
+        names = {"family_name": names[0].strip(), "given_name": names[1].strip()}
+    else:
+        names = {"family_name": names[0].strip()}
+    contributor = {
+        "person_or_org": {
+            "type": "personal",
+            **names,
+        },
+        "role": {"id": "contactperson"},
+    }
+    contributors = self.get("contributors", [])
+    contributors.append(contributor)
+    self["contributors"] = contributors
+    raise IgnoreKey("submitter_info")
+
+
+@model.over("languages", "^041__", override=True)
+@for_each_value
+@require(["a"])
+def language(self, key, value):
+    """Translates languages fields."""
+    langs = value.get("a")
+    languages = self.get("languages", [])
+    if "-" in langs or "/" in langs:
+        # https://cds.cern.ch/record/921930/export/xm
+        language_codes = re.split(r"[-/]+", langs)
+        for lang in language_codes:
+            if not lang:
+                continue
+            new_langs = base_languages(self, key, {"a": lang})
+            languages.extend(new_langs)
+    else:
+        new_langs = base_languages(self, key, value)
+        languages.extend(new_langs)
+    self["languages"] = languages
+    raise IgnoreKey("language")
+
+
+@model.over("field_993", "^993__", override=True)
+@for_each_value
+def field_993(self, key, value):
+    """Translates field 993 as a keyword."""
+    value = value.get("q", "")
+    if value and value not in ["Project Management"]:
+        raise UnexpectedValue(field=key, subfield="a", value=value)
+    _subjects = self.get("subjects", [])
+    subject = {
+        "subject": value,
+    }
+    _subjects.append(subject)
+    self["subjects"] = _subjects
+    raise IgnoreKey("field_993")