Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .beads/issues.jsonl

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added
### Fixed

### Changed
- **Repeated control fields silently dropped** ([#77](https://github.com/dchud/mrrc/issues/77), [#79](https://github.com/dchud/mrrc/pull/79)): Control fields (e.g., multiple 007s) were stored in `IndexMap<String, String>`, causing later values to overwrite earlier ones. Changed to `IndexMap<String, Vec<String>>` across all three record types (`Record`, `AuthorityRecord`, `HoldingsRecord`). Updated all 7 serialization formats (ISO 2709, JSON, MARCJSON, MARCXML, CSV, Dublin Core/MODS, BIBFRAME) to emit all values. The pymarc-compatible `get_fields()` API now correctly returns all repeated control fields, matching pymarc behavior.

### Fixed
### Changed

### Documentation
- **Simplified control field API**: Removed `get_control_field_values()` accessor from `Record` — redundant with direct access to the public `control_fields` map. `get_control_field()` remains as a convenience for non-repeatable tags (001, 003, 005, 008).

## [0.7.5] - 2026-04-05

Expand Down
15 changes: 6 additions & 9 deletions mrrc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -849,8 +849,7 @@ def get_fields(self, *tags: str) -> List['Field']:
if not tags:
# Return all control fields, then all data fields
for tag in _CONTROL_TAGS:
value = self._inner.control_field(tag)
if value is not None:
for value in self._inner.control_field_values(tag):
result.append(Field(tag, data=value))
for field in self._inner.fields():
wrapper = Field.__new__(Field)
Expand All @@ -861,8 +860,7 @@ def get_fields(self, *tags: str) -> List['Field']:
# Return fields for specified tags
for tag in tags:
if _is_control_tag(tag):
value = self._inner.control_field(tag)
if value is not None:
for value in self._inner.control_field_values(tag):
result.append(Field(tag, data=value))
else:
for field in self._inner.get_fields(tag):
Expand Down Expand Up @@ -1453,11 +1451,10 @@ def __eq__(self, other: Any) -> bool:
return False

# Compare control fields
for code, value in self._inner.control_fields():
if self._inner.control_field(code) != value:
return False
if other._inner.control_field(code) != value:
return False
self_cfs = self._inner.control_fields()
other_cfs = other._inner.control_fields()
if self_cfs != other_cfs:
return False

return True

Expand Down
12 changes: 11 additions & 1 deletion mrrc/_mrrc.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,17 @@ class Record:
"""
...
def control_fields(self) -> List[tuple[str, str]]:
"""Get all control fields as (tag, value) tuples."""
"""Get all control fields as (tag, value) tuples.

Repeated tags (e.g., multiple 007 fields) produce multiple entries.
"""
...
def control_field_values(self, tag: str) -> List[str]:
"""Get all values for a control field tag.

Returns all values for tags that may be repeated (e.g., 006, 007).
Returns an empty list if the tag doesn't exist.
"""
...
def get_field(self, tag: str) -> Optional[Field]: ...
def get_fields(self, tag: str) -> List[Field]: ...
Expand Down
17 changes: 15 additions & 2 deletions src-python/src/wrappers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -705,15 +705,28 @@ impl PyRecord {
result
}

/// Get all control fields as a dict-like structure
/// Get all control fields as a list of (tag, value) tuples
///
/// Repeated tags (e.g., multiple 007 fields) produce multiple entries.
pub fn control_fields(&self) -> Vec<(String, String)> {
self.inner
.control_fields
.iter()
.map(|(tag, value)| (tag.clone(), value.clone()))
.flat_map(|(tag, values)| values.iter().map(move |value| (tag.clone(), value.clone())))
.collect()
}

/// Get all values for a control field tag
///
/// Returns all values for tags that may be repeated (e.g., 006, 007).
pub fn control_field_values(&self, tag: &str) -> Vec<String> {
self.inner
.control_fields
.get(tag)
.map(|values| values.iter().map(String::clone).collect())
.unwrap_or_default()
}

/// Remove all fields with a given tag
///
/// Returns the removed fields.
Expand Down
30 changes: 20 additions & 10 deletions src/authority_record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ pub struct AuthorityRecord {
/// Record leader (24 bytes)
pub leader: Leader,
/// Control fields (000-009) - preserves insertion order
pub control_fields: IndexMap<String, String>,
/// Multiple values per tag are supported (e.g., repeated 006/007 fields)
pub control_fields: IndexMap<String, Vec<String>>,
/// Variable fields (010+) - unified storage, preserves insertion order
pub fields: IndexMap<String, Vec<Field>>,
}
Expand Down Expand Up @@ -100,13 +101,16 @@ impl AuthorityRecord {

/// Add a control field
pub fn add_control_field(&mut self, tag: String, value: String) {
self.control_fields.insert(tag, value);
self.control_fields.entry(tag).or_default().push(value);
}

/// Get a control field value
/// Get the first control field value for a tag
#[must_use]
pub fn get_control_field(&self, tag: &str) -> Option<&str> {
self.control_fields.get(tag).map(String::as_str)
self.control_fields
.get(tag)
.and_then(|v| v.first())
.map(String::as_str)
}

/// Set the heading (1XX field)
Expand Down Expand Up @@ -327,19 +331,25 @@ impl MarcRecord for AuthorityRecord {
}

fn add_control_field(&mut self, tag: impl Into<String>, value: impl Into<String>) {
self.control_fields.insert(tag.into(), value.into());
self.control_fields
.entry(tag.into())
.or_default()
.push(value.into());
}

fn get_control_field(&self, tag: &str) -> Option<&str> {
self.control_fields.get(tag).map(String::as_str)
self.control_fields
.get(tag)
.and_then(|v| v.first())
.map(String::as_str)
}

fn control_fields_iter(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
Box::new(
self.control_fields
Box::new(self.control_fields.iter().flat_map(|(tag, values)| {
values
.iter()
.map(|(tag, value)| (tag.as_str(), value.as_str())),
)
.map(move |value| (tag.as_str(), value.as_str()))
}))
}

fn get_fields(&self, tag: &str) -> Option<&[Field]> {
Expand Down
6 changes: 4 additions & 2 deletions src/authority_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,10 @@ impl<W: Write> AuthorityMarcWriter<W> {
};

// Write control fields
for (tag, value) in &record.control_fields {
add_field(tag, None, Some(value), None, &mut directory, &mut data)?;
for (tag, values) in &record.control_fields {
for value in values {
add_field(tag, None, Some(value), None, &mut directory, &mut data)?;
}
}

// Write all variable fields (010+) in tag order
Expand Down
9 changes: 8 additions & 1 deletion src/bibframe/converter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ impl<'a> MarcToBibframeConverter<'a> {
self.record
.control_fields
.get("001")
.and_then(|v| v.first())
.map_or("unknown", String::as_str)
} else {
"unknown"
Expand Down Expand Up @@ -1637,6 +1638,7 @@ impl<'a> MarcToBibframeConverter<'a> {
self.record
.control_fields
.get("001")
.and_then(|v| v.first())
.map_or("unknown", String::as_str)
} else {
"unknown"
Expand Down Expand Up @@ -1674,7 +1676,12 @@ impl<'a> MarcToBibframeConverter<'a> {
}

// Add creation date from 008/00-05 if available
if let Some(field_008) = self.record.control_fields.get("008") {
if let Some(field_008) = self
.record
.control_fields
.get("008")
.and_then(|v| v.first())
{
if field_008.len() >= 6 {
let date_entered = &field_008[0..6];
self.graph.add(
Expand Down
18 changes: 11 additions & 7 deletions src/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,12 @@ pub fn records_to_csv(records: &[Record]) -> Result<String> {

for record in records {
// Write control fields
for (tag, value) in &record.control_fields {
// Escape value if needed
let escaped_value = escape_csv_value(value);
writeln!(output, "{tag},,,{escaped_value}").ok();
for (tag, values) in &record.control_fields {
for value in values {
// Escape value if needed
let escaped_value = escape_csv_value(value);
writeln!(output, "{tag},,,{escaped_value}").ok();
}
}

// Write data fields with subfields
Expand Down Expand Up @@ -188,10 +190,12 @@ where

for record in records {
// Write control fields
for (tag, value) in &record.control_fields {
for (tag, values) in &record.control_fields {
if filter(tag) {
let escaped_value = escape_csv_value(value);
writeln!(output, "{tag},,,{escaped_value}").ok();
for value in values {
let escaped_value = escape_csv_value(value);
writeln!(output, "{tag},,,{escaped_value}").ok();
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/dublin_core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ fn extract_identifiers(record: &Record, dc: &mut DublinCoreRecord) {
}

// Control number (001)
if let Some(control_001) = record.control_fields.get("001") {
if let Some(control_001) = record.control_fields.get("001").and_then(|v| v.first()) {
dc.identifier.push(format!("Control#: {control_001}"));
}
}
Expand Down
16 changes: 9 additions & 7 deletions src/encoding_validation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,15 @@ impl EncodingValidator {
let mut inconsistent_field_count = 0usize;

// Check control fields
for value in record.control_fields.values() {
if Self::is_likely_different_encoding(value, primary_encoding) {
inconsistent_field_count += 1;
let detected = Self::detect_encoding_from_string(value);
if let Some(enc) = detected {
if enc != primary_encoding && !mixed_encodings.contains(&enc) {
mixed_encodings.push(enc);
for values in record.control_fields.values() {
for value in values {
if Self::is_likely_different_encoding(value, primary_encoding) {
inconsistent_field_count += 1;
let detected = Self::detect_encoding_from_string(value);
if let Some(enc) = detected {
if enc != primary_encoding && !mixed_encodings.contains(&enc) {
mixed_encodings.push(enc);
}
}
}
}
Expand Down
10 changes: 6 additions & 4 deletions src/holdings_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ impl<R: Read> HoldingsMarcReader<R> {

// Parse directory (entries are 12 bytes each: 3 bytes tag + 4 bytes length + 5 bytes start position)
let mut fields: indexmap::IndexMap<String, Vec<Field>> = indexmap::IndexMap::new();
let mut control_fields: indexmap::IndexMap<String, String> = indexmap::IndexMap::new();
let mut control_fields: indexmap::IndexMap<String, Vec<String>> = indexmap::IndexMap::new();

let mut i = 0;
while i + 12 <= directory_bytes.len() {
Expand Down Expand Up @@ -180,7 +180,7 @@ impl<R: Read> HoldingsMarcReader<R> {
MarcError::InvalidRecord("Invalid control field encoding".to_string())
})?
.to_string();
control_fields.insert(tag, value);
control_fields.entry(tag).or_default().push(value);
} else {
// Data field
if field_data.len() < 3 {
Expand Down Expand Up @@ -238,8 +238,10 @@ impl<R: Read> HoldingsMarcReader<R> {
let mut record = HoldingsRecord::new(leader);

// Add control fields
for (tag, value) in control_fields {
record.add_control_field(tag, value);
for (tag, values) in control_fields {
for value in values {
record.add_control_field(tag.clone(), value);
}
}

// Organize data fields by their functional role
Expand Down
30 changes: 20 additions & 10 deletions src/holdings_record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ pub struct HoldingsRecord {
/// Record leader (24 bytes)
pub leader: Leader,
/// Control fields (000-009) - preserves insertion order
pub control_fields: IndexMap<String, String>,
/// Multiple values per tag are supported (e.g., repeated 006/007 fields)
pub control_fields: IndexMap<String, Vec<String>>,
/// Variable fields (010+) - unified storage, preserves insertion order
pub fields: IndexMap<String, Vec<Field>>,
}
Expand Down Expand Up @@ -103,13 +104,16 @@ impl HoldingsRecord {

/// Add a control field
pub fn add_control_field(&mut self, tag: String, value: String) {
self.control_fields.insert(tag, value);
self.control_fields.entry(tag).or_default().push(value);
}

/// Get a control field value
/// Get the first control field value for a tag
#[must_use]
pub fn get_control_field(&self, tag: &str) -> Option<&str> {
self.control_fields.get(tag).map(String::as_str)
self.control_fields
.get(tag)
.and_then(|v| v.first())
.map(String::as_str)
}

/// Add a location field (852)
Expand Down Expand Up @@ -371,19 +375,25 @@ impl MarcRecord for HoldingsRecord {
}

fn add_control_field(&mut self, tag: impl Into<String>, value: impl Into<String>) {
self.control_fields.insert(tag.into(), value.into());
self.control_fields
.entry(tag.into())
.or_default()
.push(value.into());
}

fn get_control_field(&self, tag: &str) -> Option<&str> {
self.control_fields.get(tag).map(String::as_str)
self.control_fields
.get(tag)
.and_then(|v| v.first())
.map(String::as_str)
}

fn control_fields_iter(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
Box::new(
self.control_fields
Box::new(self.control_fields.iter().flat_map(|(tag, values)| {
values
.iter()
.map(|(tag, value)| (tag.as_str(), value.as_str())),
)
.map(move |value| (tag.as_str(), value.as_str()))
}))
}

fn get_fields(&self, tag: &str) -> Option<&[Field]> {
Expand Down
6 changes: 4 additions & 2 deletions src/holdings_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,10 @@ impl<W: Write> HoldingsMarcWriter<W> {
};

// Write control fields
for (tag, value) in &record.control_fields {
add_field(tag, None, Some(value), None, &mut directory, &mut data)?;
for (tag, values) in &record.control_fields {
for value in values {
add_field(tag, None, Some(value), None, &mut directory, &mut data)?;
}
}

// Write data fields (all organized in fields map)
Expand Down
10 changes: 6 additions & 4 deletions src/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,12 @@ pub fn record_to_json(record: &Record) -> Result<Value> {
}));

// Add control fields
for (tag, value) in &record.control_fields {
fields.push(json!({
tag: value
}));
for (tag, values) in &record.control_fields {
for value in values {
fields.push(json!({
tag: value
}));
}
}

// Add data fields
Expand Down
Loading
Loading