Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions cds_migrator_kit/rdm/records/transform/models/staff_association.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2026 CERN.
#
# CDS-RDM is free software; you can redistribute it and/or modify it under
# the terms of the MIT License; see LICENSE file for more details.

"""CDS-RDM Staff Association model."""
from cds_migrator_kit.rdm.records.transform.models.bulletin_issue import (
bull_issue_model,
)
from cds_migrator_kit.transform.overdo import CdsOverdo


class StaffAssociationModel(CdsOverdo):
"""Translation model for Staff Association."""

__query__ = """
(
980__:BULLETINSTAFF
-980__:CERN_BULLETIN_ARTICLE
-980__:CERN_BULLETIN_ISSUE
)
OR
(
980__:STAFFASSOCIATION
594__:PUB
)
"""

# Copy-pasted from bulletin issue
__ignore_keys__ = {
"0248_a",
"0248_p",
"0248_q",
"100__m", # email of contributor
"110__a", # corporate author, always CERN, safe to ignore
"300__a", # number of pages
"336__a", # DM metadata
"5831_2", # DM tags 1054836
"5831_5", # DM tags
"5831_a", # DM tags
"5831_c", # DM tags
"5831_f", # DM tags
"5831_i", # DM tags
"5831_k", # DM tags
"5831_u", # DM tags
"5831_3", # DM tags
"5831_6", # DM tags
"5831_n", # DM tags
"5831_b", # DM tags
"5831_o", # DM tags
"583__a", # DM tags
"583__c", # DM tags
"583__z", # DM tags
"594__a", # values: "no", "pub"
"650172", # scheme of subjects
"6531_9", # scheme of keywords
"691__a", # draft/online values, redundant
"700__m", # email of contributor
"773__p", # title of the "CERN Bulletin" series
"773__t", # CERN Bulletin value, redundant
"773__y", # year, duplicate of 260
"8560_f", # contact email
"8564_8", # file id
"8564_s", # bibdoc id
"8564_x", # icon thumbnails sizes
"8564_y", # file description - done by files dump
"8564_2", # DM metadata
"8564_q", # DM metadata
"8564_w", # DM metadata
"8564_z", # DM metadata
"8567_2", # DM tags
"8567_q", # DM tags
"8567_w", # DM tags
"8567_d", # DM tags
"859__a", # TODO: Implement rule for this, 2595/3306 records have this field
"906__m", # edit rights, will be granted by the community
"937__c", # last modified by
"937__s", # last modification date
"960__a", # base number
"961__a", # CDS modification tag # TODO
"961__b", # CDS modification tag # TODO
"961__c", # CDS modification tag # TODO
"961__h", # CDS modification tag # TODO
"961__l", # CDS modification tag # TODO
"961__x", # CDS modification tag # TODO
"981__a", # duplicate record id
# "246_1a",
# "690C_a",
}

_default_fields = {
# TODO should we keep this custom field?
"custom_fields": {"journal:journal": {"title": "CERN Bulletin"}},
Comment on lines +93 to +95
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want this custom field like bulletin?

"creators": [{"person_or_org": {"type": "organizational", "name": "CERN"}}],
}


staff_association_model = StaffAssociationModel(
bases=(bull_issue_model,),
entry_point_group="cds_migrator_kit.migrator.rules.staff_association",
)
Original file line number Diff line number Diff line change
Expand Up @@ -801,10 +801,28 @@ def related_identifiers_787(self, key, value):
"relation_type": {"id": "references"},
"resource_type": {"id": "publication-conferencepaper"},
},
"corresponding video": {
"relation_type": {"id": "references"},
"resource_type": {"id": "audio"},
Comment thread
zubeydecivelek marked this conversation as resolved.
},
Comment thread
zubeydecivelek marked this conversation as resolved.
"manuscript": {
"relation_type": {"id": "isderivedfrom"},
"resource_type": {"id": "publication-preprint"},
},
Comment on lines +808 to +811
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it the only record? because this particular one is not a preprint

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes that's the only record

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then please check with the curators what it should be, this one seems incorrect

"bulletin article": {
"relation_type": {"id": "references"},
"resource_type": {"id": "publication-periodicalarticle"},
},
}

if recid:
if description:
if description not in relation_map.keys():
raise UnexpectedValue(
f"Unexpected relation description {description}",
field=key,
value=value,
)
new_id = {
"identifier": recid,
"scheme": "cds",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def urls_bulletin_bis(self, key, value):


@model.over("custom_fields_journal", "(^916__)", override=True)
def issue_number(self, key, value):
def custom_fields_journal(self, key, value):
_custom_fields = self.get("custom_fields", {})

issue = value.get("z")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2026 CERN.
#
# CDS-RDM is free software; you can redistribute it and/or modify it under
# the terms of the MIT License; see LICENSE file for more details.

"""CDS-RDM Staff Association rules."""

from dojson.errors import IgnoreKey

from cds_migrator_kit.errors import UnexpectedValue
from cds_migrator_kit.rdm.records.transform.xml_processing.rules.base import (
additional_titles,
)
from cds_migrator_kit.transform.xml_processing.quality.decorators import for_each_value

from ...models.staff_association import staff_association_model as model
from .bulletin_issue import collection
from .publications import internal_notes

model.over("internal_notes", "^562__")(internal_notes)
model.over("additional_titles", "(^242__)")(additional_titles)


@model.over("resource_type", "^980__", override=True)
def resource_type(self, key, value):
"""Translates resource_type."""
value = value.get("a") if "a" in value else value.get("b")
if value:
value = value.lower()
if value in ["bulletinstaff", "staffassociation"]:
# TODO what is the resource type?
return {"id": "publication-periodicalarticle"}
raise UnexpectedValue(
"Unknown resource type (STAFF ASSOCIATION)", field=key, value=value
)


@model.over("collection", "^690C_", override=True)
@for_each_value
def staff_association_collection(self, key, value):
"""Translates collection field."""
collection_a = value.get("a", "").strip().lower()
# Drop sa documents
if collection_a == "sa documents":
raise IgnoreKey("collection")
collection(self, key, value)
11 changes: 11 additions & 0 deletions cds_migrator_kit/rdm/streams.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,14 @@ records:
missing_users: cds_migrator_kit/rdm/data/users
communities_ids:
- ""
staff_association:
data_dir: cds_migrator_kit/rdm/data/staff_association
tmp_dir: cds_migrator_kit/rdm/tmp/staff_association
log_dir: cds_migrator_kit/rdm/log/staff_association
extract:
dirpath: cds_migrator_kit/rdm/data/staff_association/dump/
transform:
files_dump_dir: cds_migrator_kit/rdm/data/staff_association/files/
missing_users: cds_migrator_kit/rdm/data/users
communities_ids:
- "9ab1f6bd-b213-4bb7-9249-13b9665e453a"
5 changes: 5 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ cds_migrator_kit.migrator.models =
en = cds_migrator_kit.rdm.records.transform.models.en:en_model
annual_rep = cds_migrator_kit.rdm.records.transform.models.annual_report:annual_rep_model
fap = cds_migrator_kit.rdm.records.transform.models.fap:fap_model
staff_association = cds_migrator_kit.rdm.records.transform.models.staff_association:staff_association_model
cds_migrator_kit.migrator.rules.base =
base = cds_migrator_kit.transform.xml_processing.rules.base
cds_migrator_kit.migrator.rdm.rules.base =
Expand Down Expand Up @@ -164,6 +165,10 @@ cds_migrator_kit.migrator.rules.fap =
base = cds_migrator_kit.transform.xml_processing.rules.base
base_records = cds_migrator_kit.rdm.records.transform.xml_processing.rules.base
fap = cds_migrator_kit.rdm.records.transform.xml_processing.rules.fap
cds_migrator_kit.migrator.rules.staff_association =
base = cds_migrator_kit.transform.xml_processing.rules.base
base_records = cds_migrator_kit.rdm.records.transform.xml_processing.rules.base
staff_association = cds_migrator_kit.rdm.records.transform.xml_processing.rules.staff_association
cds_migrator_kit.migrator.rules.people =
people = cds_migrator_kit.rdm.users.transform.xml_processing.rules.people
invenio_pidstore.minters =
Expand Down
Loading