From 5e9156f919d67f19c270893b1070af29b6820e4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Thu, 19 Mar 2026 17:03:12 +0100 Subject: [PATCH] add(staff-association): new model and transformation rules --- .../transform/models/staff_association.py | 103 ++++++++++++++++++ .../transform/xml_processing/rules/base.py | 18 +++ .../xml_processing/rules/bulletin_issue.py | 2 +- .../xml_processing/rules/staff_association.py | 48 ++++++++ cds_migrator_kit/rdm/streams.yaml | 11 ++ setup.cfg | 5 + 6 files changed, 186 insertions(+), 1 deletion(-) create mode 100644 cds_migrator_kit/rdm/records/transform/models/staff_association.py create mode 100644 cds_migrator_kit/rdm/records/transform/xml_processing/rules/staff_association.py diff --git a/cds_migrator_kit/rdm/records/transform/models/staff_association.py b/cds_migrator_kit/rdm/records/transform/models/staff_association.py new file mode 100644 index 00000000..d790bfa5 --- /dev/null +++ b/cds_migrator_kit/rdm/records/transform/models/staff_association.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2026 CERN. +# +# CDS-RDM is free software; you can redistribute it and/or modify it under +# the terms of the MIT License; see LICENSE file for more details. + +"""CDS-RDM Staff Association model.""" +from cds_migrator_kit.rdm.records.transform.models.bulletin_issue import ( + bull_issue_model, +) +from cds_migrator_kit.transform.overdo import CdsOverdo + + +class StaffAssociationModel(CdsOverdo): + """Translation model for Staff Association.""" + + __query__ = """ + ( + 980__:BULLETINSTAFF + -980__:CERN_BULLETIN_ARTICLE + -980__:CERN_BULLETIN_ISSUE + ) + OR + ( + 980__:STAFFASSOCIATION + 594__:PUB + ) + """ + + # Copy-pasted from bulletin issue + __ignore_keys__ = { + "0248_a", + "0248_p", + "0248_q", + "100__m", # email of contributor + "110__a", # corporate author, always CERN, safe to ignore + "300__a", # number of pages + "336__a", # DM metadata + "5831_2", # DM tags 1054836 + "5831_5", # DM tags + "5831_a", # DM tags + "5831_c", # DM tags + "5831_f", # DM tags + "5831_i", # DM tags + "5831_k", # DM tags + "5831_u", # DM tags + "5831_3", # DM tags + "5831_6", # DM tags + "5831_n", # DM tags + "5831_b", # DM tags + "5831_o", # DM tags + "583__a", # DM tags + "583__c", # DM tags + "583__z", # DM tags + "594__a", # values: "no", "pub" + "650172", # scheme of subjects + "6531_9", # scheme of keywords + "691__a", # draft/online values, redundant + "700__m", # email of contributor + "773__p", # title of the "CERN Bulletin" series + "773__t", # CERN Bulletin value, redundant + "773__y", # year, duplicate of 260 + "8560_f", # contact email + "8564_8", # file id + "8564_s", # bibdoc id + "8564_x", # icon thumbnails sizes + "8564_y", # file description - done by files dump + "8564_2", # DM metadata + "8564_q", # DM metadata + "8564_w", # DM metadata + "8564_z", # DM metadata + "8567_2", # DM tags + "8567_q", # DM tags + "8567_w", # DM tags + "8567_d", # DM tags + "859__a", # TODO: Implement rule for this, 2595/3306 records have this field + "906__m", # edit rights, will be granted by the community + "937__c", # last modified by + "937__s", # last modification date + "960__a", # base number + "961__a", # CDS modification tag # TODO + "961__b", # CDS modification tag # TODO + "961__c", # CDS modification tag # TODO + "961__h", # CDS modification tag # TODO + "961__l", # CDS modification tag # TODO + "961__x", # CDS modification tag # TODO + "981__a", # duplicate record id + # "246_1a", + # "690C_a", + } + + _default_fields = { + # TODO should we keep this custom field? + "custom_fields": {"journal:journal": {"title": "CERN Bulletin"}}, + "creators": [{"person_or_org": {"type": "organizational", "name": "CERN"}}], + } + + +staff_association_model = StaffAssociationModel( + bases=(bull_issue_model,), + entry_point_group="cds_migrator_kit.migrator.rules.staff_association", +) diff --git a/cds_migrator_kit/rdm/records/transform/xml_processing/rules/base.py b/cds_migrator_kit/rdm/records/transform/xml_processing/rules/base.py index c0b4ee17..5a7eb007 100644 --- a/cds_migrator_kit/rdm/records/transform/xml_processing/rules/base.py +++ b/cds_migrator_kit/rdm/records/transform/xml_processing/rules/base.py @@ -801,10 +801,28 @@ def related_identifiers_787(self, key, value): "relation_type": {"id": "references"}, "resource_type": {"id": "publication-conferencepaper"}, }, + "corresponding video": { + "relation_type": {"id": "references"}, + "resource_type": {"id": "audio"}, + }, + "manuscript": { + "relation_type": {"id": "isderivedfrom"}, + "resource_type": {"id": "publication-preprint"}, + }, + "bulletin article": { + "relation_type": {"id": "references"}, + "resource_type": {"id": "publication-periodicalarticle"}, + }, } if recid: if description: + if description not in relation_map.keys(): + raise UnexpectedValue( + f"Unexpected relation description {description}", + field=key, + value=value, + ) new_id = { "identifier": recid, "scheme": "cds", diff --git a/cds_migrator_kit/rdm/records/transform/xml_processing/rules/bulletin_issue.py b/cds_migrator_kit/rdm/records/transform/xml_processing/rules/bulletin_issue.py index df373cb9..b405f654 100644 --- a/cds_migrator_kit/rdm/records/transform/xml_processing/rules/bulletin_issue.py +++ b/cds_migrator_kit/rdm/records/transform/xml_processing/rules/bulletin_issue.py @@ -229,7 +229,7 @@ def urls_bulletin_bis(self, key, value): @model.over("custom_fields_journal", "(^916__)", override=True) -def issue_number(self, key, value): +def custom_fields_journal(self, key, value): _custom_fields = self.get("custom_fields", {}) issue = value.get("z") diff --git a/cds_migrator_kit/rdm/records/transform/xml_processing/rules/staff_association.py b/cds_migrator_kit/rdm/records/transform/xml_processing/rules/staff_association.py new file mode 100644 index 00000000..3fcd8f6f --- /dev/null +++ b/cds_migrator_kit/rdm/records/transform/xml_processing/rules/staff_association.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2026 CERN. +# +# CDS-RDM is free software; you can redistribute it and/or modify it under +# the terms of the MIT License; see LICENSE file for more details. + +"""CDS-RDM Staff Association rules.""" + +from dojson.errors import IgnoreKey + +from cds_migrator_kit.errors import UnexpectedValue +from cds_migrator_kit.rdm.records.transform.xml_processing.rules.base import ( + additional_titles, +) +from cds_migrator_kit.transform.xml_processing.quality.decorators import for_each_value + +from ...models.staff_association import staff_association_model as model +from .bulletin_issue import collection +from .publications import internal_notes + +model.over("internal_notes", "^562__")(internal_notes) +model.over("additional_titles", "(^242__)")(additional_titles) + + +@model.over("resource_type", "^980__", override=True) +def resource_type(self, key, value): + """Translates resource_type.""" + value = value.get("a") if "a" in value else value.get("b") + if value: + value = value.lower() + if value in ["bulletinstaff", "staffassociation"]: + # TODO what is the resource type? + return {"id": "publication-periodicalarticle"} + raise UnexpectedValue( + "Unknown resource type (STAFF ASSOCIATION)", field=key, value=value + ) + + +@model.over("collection", "^690C_", override=True) +@for_each_value +def staff_association_collection(self, key, value): + """Translates collection field.""" + collection_a = value.get("a", "").strip().lower() + # Drop sa documents + if collection_a == "sa documents": + raise IgnoreKey("collection") + collection(self, key, value) diff --git a/cds_migrator_kit/rdm/streams.yaml b/cds_migrator_kit/rdm/streams.yaml index 87e52a0a..893a66f6 100644 --- a/cds_migrator_kit/rdm/streams.yaml +++ b/cds_migrator_kit/rdm/streams.yaml @@ -122,3 +122,14 @@ records: missing_users: cds_migrator_kit/rdm/data/users communities_ids: - "" + staff_association: + data_dir: cds_migrator_kit/rdm/data/staff_association + tmp_dir: cds_migrator_kit/rdm/tmp/staff_association + log_dir: cds_migrator_kit/rdm/log/staff_association + extract: + dirpath: cds_migrator_kit/rdm/data/staff_association/dump/ + transform: + files_dump_dir: cds_migrator_kit/rdm/data/staff_association/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "9ab1f6bd-b213-4bb7-9249-13b9665e453a" diff --git a/setup.cfg b/setup.cfg index 493dceca..2b6abaec 100644 --- a/setup.cfg +++ b/setup.cfg @@ -83,6 +83,7 @@ cds_migrator_kit.migrator.models = en = cds_migrator_kit.rdm.records.transform.models.en:en_model annual_rep = cds_migrator_kit.rdm.records.transform.models.annual_report:annual_rep_model fap = cds_migrator_kit.rdm.records.transform.models.fap:fap_model + staff_association = cds_migrator_kit.rdm.records.transform.models.staff_association:staff_association_model cds_migrator_kit.migrator.rules.base = base = cds_migrator_kit.transform.xml_processing.rules.base cds_migrator_kit.migrator.rdm.rules.base = @@ -164,6 +165,10 @@ cds_migrator_kit.migrator.rules.fap = base = cds_migrator_kit.transform.xml_processing.rules.base base_records = cds_migrator_kit.rdm.records.transform.xml_processing.rules.base fap = cds_migrator_kit.rdm.records.transform.xml_processing.rules.fap +cds_migrator_kit.migrator.rules.staff_association = + base = cds_migrator_kit.transform.xml_processing.rules.base + base_records = cds_migrator_kit.rdm.records.transform.xml_processing.rules.base + staff_association = cds_migrator_kit.rdm.records.transform.xml_processing.rules.staff_association cds_migrator_kit.migrator.rules.people = people = cds_migrator_kit.rdm.users.transform.xml_processing.rules.people invenio_pidstore.minters =