From 9ec3c31992dcfd91367d19a29255bbf72eb798ef Mon Sep 17 00:00:00 2001
From: Daniel Chudnov <dchud@umich.edu>
Date: Sun, 12 Apr 2026 17:46:12 -0400
Subject: [PATCH] Add to_unicode, permissive, and recovery_mode flags to
 MARCReader (closes #78)

pymarc-compatible kwargs for MARCReader:
- to_unicode: accepted for compat, warns if False (mrrc always converts)
- permissive: yields None for bad records instead of raising (pymarc behavior)
- recovery_mode: exposes mrrc's RecoveryMode for salvaging partial data

Updated migration guide, reading tutorial, and quickstart with error
handling documentation.

Bead: bd-y331

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .beads/issues.jsonl                       |   4 +-
 docs/getting-started/quickstart-python.md |   2 +
 docs/guides/migration-from-pymarc.md      |  52 ++++++
 docs/tutorials/python/reading-records.md  |  27 +++
 mrrc/__init__.py                          |  56 +++++-
 mrrc/_mrrc.pyi                            |   2 +-
 src-python/src/readers.rs                 |  28 ++-
 tests/python/test_marcreader_flags.py     | 212 ++++++++++++++++++++++
 8 files changed, 366 insertions(+), 17 deletions(-)
 create mode 100644 tests/python/test_marcreader_flags.py

diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl
index 189e40c..ffaa93b 100644
--- a/.beads/issues.jsonl
+++ b/.beads/issues.jsonl
@@ -3,7 +3,7 @@
 {"id":"bd-264q","title":"Add __str__ and __repr__ to Python Record wrapper","description":"The Python Record wrapper class in mrrc/__init__.py does not define __str__ or __repr__. Python's data model skips __getattr__ for dunder methods, so str(rec) returns the default '<mrrc.Record object at 0x...>' instead of delegating to the Rust PyRecord's __str__ (which returns 'Record(type=a)'). Need to add explicit __str__ and __repr__ methods that delegate to self._inner. Reported in https://github.com/dchud/mrrc/issues/57 comment.","status":"closed","priority":2,"issue_type":"bug","created_at":"2026-03-04T21:06:29.006105Z","created_by":"dchud","updated_at":"2026-03-04T21:53:01.013120Z","closed_at":"2026-03-04T21:53:01.013107Z","close_reason":"Fixed in PR #60, merged","source_repo":".","compaction_level":0,"original_size":0}
 {"id":"bd-3kee","title":"Add Record.get(tag) alias for pymarc compatibility","description":"pymarc's Record.get(tag, default=None) returns the first field matching a tag, or a default value. mrrc has the equivalent as Record.get_field(tag) but lacks the .get() alias.\n\nAdd a Record.get() method to the Python wrapper in mrrc/__init__.py that delegates to get_field() with an optional default parameter:\n\n    def get(self, tag: str, default=None):\n        return self.get_field(tag) or default\n\nThis is a pure Python wrapper addition — no Rust changes needed. Update _mrrc.pyi stubs accordingly.\n\npymarc source: Record.get() delegates to Record.__getitem__ with a try/except KeyError fallback.","status":"closed","priority":3,"issue_type":"feature","created_at":"2026-03-04T21:16:19.327037Z","created_by":"dchud","updated_at":"2026-03-04T21:53:01.068246Z","closed_at":"2026-03-04T21:53:01.068233Z","close_reason":"Fixed in PR #60, merged","source_repo":".","compaction_level":0,"original_size":0}
 {"id":"bd-3peu","title":"Fix check.sh --quick: Python tests run against stale extension","status":"closed","priority":1,"issue_type":"bug","created_at":"2026-03-04T16:56:07.259492Z","created_by":"dchud","updated_at":"2026-03-04T17:35:12.613323Z","closed_at":"2026-03-04T17:35:12.613307Z","close_reason":"Merged in PR #56","source_repo":".","compaction_level":0,"original_size":0}
-{"id":"bd-7dre","title":"Fix repeated control fields being dropped (e.g. multiple 007s)","status":"in_progress","priority":1,"issue_type":"bug","created_at":"2026-04-10T01:47:09.917469Z","created_by":"dchud","updated_at":"2026-04-10T01:48:10.983328Z","source_repo":".","compaction_level":0,"original_size":0,"comments":[{"id":18,"issue_id":"bd-7dre","author":"dchud","text":"Elevated to GitHub issue #77: https://github.com/dchud/mrrc/issues/77","created_at":"2026-04-10T01:48:10Z"}]}
+{"id":"bd-7dre","title":"Fix repeated control fields being dropped (e.g. multiple 007s)","status":"closed","priority":1,"issue_type":"bug","created_at":"2026-04-10T01:47:09.917469Z","created_by":"dchud","updated_at":"2026-04-12T21:18:06.004792Z","closed_at":"2026-04-12T21:18:06.004656Z","close_reason":"Fixed in PR #79, merged to main","source_repo":".","compaction_level":0,"original_size":0,"comments":[{"id":18,"issue_id":"bd-7dre","author":"dchud","text":"Elevated to GitHub issue #77: https://github.com/dchud/mrrc/issues/77","created_at":"2026-04-10T01:48:10Z"}]}
 {"id":"bd-93qn","title":"record_to_dublin_core_xml not exported from mrrc Python wrapper","status":"closed","priority":2,"issue_type":"bug","created_at":"2026-03-04T18:59:47.002714Z","created_by":"dchud","updated_at":"2026-03-04T19:45:29.894208Z","closed_at":"2026-03-04T19:45:29.894196Z","close_reason":"Fixed in PR #59, merged","source_repo":".","compaction_level":0,"original_size":0}
 {"id":"bd-b7mg","title":"record_to_json/xml fail on records from xml_to_record()","notes":"GitHub issue: https://github.com/dchud/mrrc/issues/57","status":"closed","priority":1,"issue_type":"bug","created_at":"2026-03-04T18:55:40.894350Z","created_by":"dchud","updated_at":"2026-03-04T19:45:29.002501Z","closed_at":"2026-03-04T19:45:29.002489Z","close_reason":"Fixed in PR #58, merged","source_repo":".","compaction_level":0,"original_size":0}
 {"id":"bd-qjjs","title":"Pymarc API compatibility: comprehensive gap analysis and implementation plan","status":"closed","priority":2,"issue_type":"feature","created_at":"2026-03-22T20:43:50.852232Z","created_by":"dchud","updated_at":"2026-03-22T22:43:09.600381Z","closed_at":"2026-03-22T22:43:09.600049Z","close_reason":"Replaced by bd-14jh — clean consolidated version","source_repo":".","compaction_level":0,"original_size":0,"comments":[{"id":6,"issue_id":"bd-qjjs","author":"dchud","text":"## Pymarc API compatibility gaps identified from rmarc bench_deathmatch.py\n\nAnalysis of https://github.com/matthewdeanmartin/rmarc/blob/main/bench/bench_deathmatch.py reveals 4 API differences where mrrc diverges from pymarc's public API.\n\n### Gap 1: Record convenience accessors are methods, pymarc uses properties\n\nIn pymarc, `title`, `author`, `isbn`, `issn`, `subjects`, `notes`, and `location` are all `@property` decorated — accessed as `record.title`, not `record.title()`.\n\nIn mrrc, these are all plain methods requiring parentheses: `record.title()`.\n\nThe benchmark script has a special mrrc branch: `rec.title()` vs `rec.title` for pymarc/rmarc.\n\n**Fix:** Convert these to `@property` in `mrrc/__init__.py`. The underlying Rust `_inner` methods stay as-is; only the Python wrapper changes. This is a breaking change for existing mrrc users calling `record.title()` — could add a deprecation shim that detects callable usage.\n\n### Gap 2: Missing `Field.value()` method\n\npymarc's `Field.value()` returns all subfield values space-joined (or `data` for control fields). mrrc's Field wrapper has no `value()` method. The benchmark uses `str(field)` for mrrc vs `field.value()` for pymarc.\n\n**Fix:** Add `def value(self) -> str` to both `Field` and `ControlField` in the Python wrapper. For `Field`, return space-joined subfield values (matching pymarc). For `ControlField`, return `self.value` (already an attribute, but the method form is what pymarc uses).\n\n### Gap 3: Missing `Record.as_json()` method\n\npymarc uses `record.as_json(**kwargs)` which serializes via `json.dumps(record.as_dict(), **kwargs)`. mrrc has `record.to_json()` instead.\n\n**Fix:** Add `as_json()` as an alias or new method on `Record`. Ideally also add `as_dict()` for full compat. Keep `to_json()` as well since it's already in our public API.\n\n### Gap 4: Missing `parse_xml_to_array()` top-level function\n\npymarc exports `parse_xml_to_array(xml_file, strict=False, normalize_form=None)` as a top-level convenience function. mrrc uses `xml_to_records()` instead, with different argument conventions (takes a string, not a file path/handle).\n\n**Fix:** Add `parse_xml_to_array()` as a top-level function in `mrrc/__init__.py` that wraps `xml_to_records()`. Accept file paths and file-like objects for full pymarc compat.\n\n---\n\nElevated to GitHub issue #65: https://github.com/dchud/mrrc/issues/65","created_at":"2026-03-22T20:44:08Z"},{"id":7,"issue_id":"bd-qjjs","author":"dchud","text":"## Additional pymarc API compatibility gaps (deep comparison)\n\nFull API surface comparison reveals more gaps beyond the 4 from the benchmark.\n\n### Gap 5: Record.as_marc() / as_marc21() — serialize to ISO 2709 bytes\npymarc has `record.as_marc()` and `record.as_marc21()` returning `bytes`. mrrc has no equivalent for serializing a single Record back to raw MARC binary. This is critical for round-trip workflows (read, modify, write back).\n\n### Gap 6: Record.as_dict()\npymarc has `record.as_dict()` returning a plain dict. `as_json()` is built on top of it. mrrc has neither.\n\n### Gap 7: Record.add_field() signature mismatch\npymarc: `add_field(self, *fields)` — accepts multiple fields. mrrc: `add_field(self, field)` — accepts exactly one.\n\n### Gap 8: Record.remove_field() signature mismatch + missing remove_fields()\npymarc: `remove_field(self, *fields)` — accepts multiple. mrrc: one at a time. pymarc also has `remove_fields(self, *tags)` for bulk removal by tag string — mrrc has no equivalent.\n\n### Gap 9: Record.add_ordered_field() / add_grouped_field() missing\npymarc has both for inserting fields in tag-sorted or tag-grouped positions.\n\n### Gap 10: Naming mismatches (no underscore in pymarc)\n- pymarc: `record.physicaldescription` → mrrc: `record.physical_description()`\n- pymarc: `record.uniformtitle` → mrrc: `record.uniform_title()`\n- pymarc: `record.addedentries` → mrrc: missing entirely\n\n### Gap 11: Record.pubyear return type\npymarc returns `Optional[str]` (e.g. \"2024\"). mrrc returns `Optional[int]`. This breaks code doing string operations on pubyear.\n\n### Gap 12: Field.value() (already noted in Gap 2 above)\n\n### Gap 13: Field.format_field() missing\npymarc `field.format_field()` returns a human-readable string. Used internally by Record property helpers like `title`. Different from `value()`.\n\n### Gap 14: Field.add_subfield() missing pos parameter\npymarc: `add_subfield(self, code, value, pos=None)`. mrrc only supports append.\n\n### Gap 15: Record.decode_marc() missing\npymarc has `decode_marc(self, marc, to_unicode=True, ...)` to populate a Record from raw bytes. Used by MARCReader internally but also called directly by users.\n\n### Gap 16: Field.as_marc() / as_marc21() missing\npymarc can serialize individual fields to ISO 2709 bytes.\n\n---\n\n### GIL considerations for as_json() and parse_xml_to_array()\n\nNeither needs GIL release. Both operate on data already in Python memory:\n- as_json() serializes a single already-parsed Record — too fast to justify GIL overhead\n- parse_xml_to_array() wraps xml_to_records() which takes a &str — no Python I/O during Rust parsing\n\nThe GIL 3-phase model is only needed for MARCReader-style iterators that interleave Python file I/O with Rust parsing across many iterations.","created_at":"2026-03-22T20:48:58Z"},{"id":8,"issue_id":"bd-qjjs","author":"dchud","text":"## Correction: parse_xml_to_array() SHOULD follow MARCReader's input dispatch pattern\n\npymarc's parse_xml_to_array() accepts file paths, open file handles, and StringIO objects. If we implement it to match, it should follow the same input-dispatch pattern MARCReader already uses:\n\n- **String path / pathlib.Path** → read file in Rust (RustFile backend, no GIL)\n- **File-like object** → read content via Python (GIL held), then parse in Rust\n- **String content** → parse directly in Rust (no GIL needed)\n\nThe XML *parsing* itself is always GIL-free (pure Rust on &str). The GIL concern is only about the file-reading step. This is exactly the MARCReader pattern from readers.rs — detect input type, use native I/O when possible, fall back to Python I/O with proper GIL management when given a Python file object.\n\nThis means parse_xml_to_array() needs a Rust-side implementation (or at minimum a Python wrapper that handles the dispatch), not just a thin alias over xml_to_records().","created_at":"2026-03-22T20:49:19Z"},{"id":9,"issue_id":"bd-qjjs","author":"dchud","text":"## Clarification: GIL avoidance is the priority\n\nThe goal is to avoid holding the GIL as much as possible, not to replicate the MARCReader pattern specifically. For parse_xml_to_array():\n\n- **String path / pathlib.Path** → read + parse entirely in Rust, zero GIL\n- **String content** → parse entirely in Rust, zero GIL\n- **Python file object** → only case where GIL is unavoidable (need it to call .read())\n\nThe MARCReader's 3-phase model is a solution to a problem we may not have here. If we can handle the common cases (file paths, string content) entirely GIL-free, that's the right approach. Only add GIL management for the Python file object fallback, and even then minimize the window.","created_at":"2026-03-22T20:50:54Z"},{"id":10,"issue_id":"bd-qjjs","author":"dchud","text":"## Documentation: show preferred GIL-free usage with explicit examples\n\nWhen implementing parse_xml_to_array(), update docs (intro guide, API reference, examples) to show the preferred path-based approach with clear comments about why, the same way we document passing paths to MARCReader() as preferred. E.g.:\n\n```python\n# Preferred: file path lets mrrc read + parse entirely in Rust (no GIL)\nrecords = mrrc.parse_xml_to_array(\"collection.xml\")\n\n# Also good: string content parsed entirely in Rust (no GIL)\nrecords = mrrc.parse_xml_to_array(xml_string)\n\n# Works but slower: Python file object requires GIL for .read()\nwith open(\"collection.xml\") as f:\n    records = mrrc.parse_xml_to_array(f)\n```\n\nThis pattern should be consistent with how MARCReader path-based usage is already documented across the docs.","created_at":"2026-03-22T20:52:05Z"},{"id":11,"issue_id":"bd-qjjs","author":"dchud","text":"## Consolidated revision: complete gap inventory (supersedes earlier comments)\n\nReview of earlier comments found errors, duplicates, and missing gaps. This comment is the authoritative reference.\n\n---\n\n### Tier 1: Structural / semantic divergences (highest impact)\n\n**A. Separate ControlField class (NEW)**\npymarc has NO ControlField class — both control and data fields are `pymarc.Field`, distinguished by `is_control_field()`. mrrc exposes a separate `ControlField` class. This breaks `isinstance(f, Field)` checks and means `get_fields()` returns mixed types in mrrc vs uniform Field in pymarc.\n\n**B. ControlField uses `.value` attribute; pymarc uses `.data` (NEW)**\npymarc control fields store content in `.data`. mrrc's ControlField uses `.value`. Code doing `field.data` on a control field will fail.\n\n**C. Record convenience accessors are methods, pymarc uses @property (Gap 1)**\n`title`, `author`, `isbn`, `issn`, `issn_title`, `issnl`, `subjects`, `notes`, `location`, `series`, `sudoc`, `publisher`, `pubyear`, `physicaldescription`, `uniformtitle`, `addedentries` are all `@property` in pymarc. mrrc has them as methods (and some are missing — see Tier 2).\n\n**D. Record.__getitem__ returns None for missing tags; pymarc raises KeyError (NEW)**\npymarc `record['999']` raises `KeyError`. mrrc returns `None`. This silently changes control flow for code using try/except KeyError.\n\n**E. Field.__str__ returns useless default repr (NEW)**\npymarc `str(field)` returns MARC display format: `=245  10$aThe Great Gatsby$cF. Scott Fitzgerald`. mrrc returns `<mrrc.Field object at 0x...>` because the Python wrapper has no `__str__` and `__getattr__` doesn't intercept dunder methods. The Rust `_inner.__str__` returns `Field(245)` which is also not pymarc-compatible. Note: the rmarc benchmark uses `str(field)` for mrrc as a workaround for missing `value()`, but it doesn't actually produce useful output.\n\n**F. Record.as_marc() / as_marc21() missing (Gap 5)**\nNo way to serialize a Record to ISO 2709 bytes. MARCWriter can write to files, but there's no method to get bytes from a single Record. Critical for round-trip workflows.\n\n---\n\n### Tier 2: Missing methods and signature mismatches\n\n**G. Field.value() missing (Gap 2)**\npymarc `field.value()` returns space-joined subfield values (or `.data` for control fields). mrrc has nothing equivalent. NOTE: For ControlField, we can't just add a `value()` method because `.value` is already an attribute — resolve by renaming attribute to `.data` (matching pymarc) per Gap B above.\n\n**H. Field.format_field() missing (Gap 13)**\npymarc `field.format_field()` returns human-readable text (no indicators or subfield codes). Different from `value()` only in edge cases but used internally by Record property helpers like `title`.\n\n**I. Record.as_json(**kwargs) missing (Gap 3)**\npymarc `record.as_json(**kwargs)` wraps `json.dumps(record.as_dict(), **kwargs)`. mrrc only has `to_json()` with no kwargs support. Keep `to_json()` in our public API.\n\n**J. Record.as_dict() missing (Gap 6)**\npymarc builds `as_json()` on `as_dict()`. Should implement both.\n\n**K. parse_xml_to_array() missing (Gap 4)**\npymarc top-level function accepting file paths, file handles, StringIO. mrrc has `xml_to_records()` (string-only). Implementation should avoid holding the GIL: file paths and string content → entirely Rust, zero GIL. Python file objects → GIL only for `.read()`, then Rust parsing. Document preferred path-based usage with examples, consistent with MARCReader docs.\n\n**L. add_field(*fields) signature mismatch (Gap 7)**\npymarc accepts `*fields`. mrrc accepts exactly one.\n\n**M. remove_field(*fields) + remove_fields(*tags) (Gap 8)**\npymarc `remove_field` accepts multiple. mrrc accepts one (Field or str). pymarc also has `remove_fields(*tags)` for bulk removal by tag — mrrc has no equivalent.\n\n**N. add_ordered_field() / add_grouped_field() missing (Gap 9)**\npymarc has both for inserting fields in tag-sorted or tag-grouped positions.\n\n**O. Field.add_subfield() missing pos parameter (Gap 14)**\npymarc: `add_subfield(self, code, value, pos=None)`. mrrc only supports append.\n\n**P. Field.as_marc() / as_marc21() missing (Gap 16)**\npymarc can serialize individual fields to ISO 2709 bytes.\n\n---\n\n### Tier 3: Naming and return type mismatches\n\n**Q. Property name mismatches (Gap 10)**\n- pymarc `physicaldescription` → mrrc `physical_description` (underscore)\n- pymarc `uniformtitle` → mrrc `uniform_title` (underscore)\n- pymarc `addedentries` → mrrc: missing entirely (returns 700/710/711/730 fields)\n\n**R. pubyear return type (Gap 11)**\npymarc returns `Optional[str]` (e.g. `\"2020\"`). mrrc returns `Optional[int]` (`2020`). Confirmed: mrrc returns int.\n\n---\n\n### Tier 4: Lower-priority gaps\n\n**S. decode_marc() missing (Gap 15)** — semi-internal, used by MARCReader. Uncommon in user code.\n\n**T. Missing reader/writer variants** — JSONReader, JSONWriter, MARCMakerReader, XMLWriter, TextWriter.\n\n**U. No exception hierarchy** — pymarc has ~15 specific exception classes. mrrc exposes generic errors.\n\n**V. No MARC8 character encoding support.**\n\n**W. Missing convenience functions** — `map_records()`, `map_xml()`, `parse_json_to_array()`, `record_to_xml_node()`.\n\n**X. Missing constants** — `LEADER_LEN`, `DIRECTORY_ENTRY_LEN`, `END_OF_FIELD`, `END_OF_RECORD`, `SUBFIELD_INDICATOR`, etc.\n\n---\n\n### GIL policy (final, supersedes earlier comments)\n\nAvoid holding the GIL whenever possible.\n\n- `as_json()`: no GIL concern — single record, in-memory, fast.\n- `parse_xml_to_array()`: file paths and string content → entirely Rust, zero GIL. Python file objects → GIL only for `.read()`, minimize window.\n- `as_marc()` / `as_marc21()`: pure serialization, no GIL needed.\n\n---\n\n### Removed from earlier comments\n- Gap 12 was a duplicate of Gap 2 — removed.\n\nElevated to GitHub issue #65: https://github.com/dchud/mrrc/issues/65","created_at":"2026-03-22T21:00:17Z"}]}
@@ -12,7 +12,7 @@
 {"id":"bd-vc06","title":"Add negation flag to SubfieldValueQuery","status":"closed","priority":2,"issue_type":"feature","created_at":"2026-03-18T23:57:42.630104Z","created_by":"dchud","updated_at":"2026-04-06T00:39:18.974586Z","closed_at":"2026-04-06T00:39:18.974381Z","close_reason":"Completed — merged in PR #74","source_repo":".","compaction_level":0,"original_size":0,"comments":[{"id":3,"issue_id":"bd-vc06","author":"dchud","text":"## Motivation\n\nFollow-up to bd-w9cm (negation on SubfieldPatternQuery). The same use case applies to exact and partial string matching: users scanning large record sets for fields where a subfield does NOT equal or contain a value currently must filter in Python, incurring FFI overhead.\n\n## Design\n\nMirror the approach from bd-w9cm. SubfieldValueQuery already has a `partial: bool` flag — add `negate: bool` alongside it.\n\n### Rust core (`src/field_query.rs`)\n\n- Add `pub negate: bool` to `SubfieldValueQuery`\n- Update `new()` and `partial()` constructors to set `negate: false`\n- Add `pub fn negated(tag, subfield_code, value) -> Self` and `pub fn partial_negated(tag, subfield_code, value) -> Self` constructors\n- Update `matches()`:\n\n    field.get_subfield(self.subfield_code).is_some_and(|value| {\n        let matched = if self.partial {\n            value.contains(self.value.as_str())\n        } else {\n            value == self.value.as_str()\n        };\n        matched != self.negate\n    })\n\n### PyO3 bindings (`src-python/src/query.rs`)\n\n- Add `negate` as an optional keyword argument to `PySubfieldValueQuery::new()` (default `False`)\n- Add `#[getter] fn negate(&self) -> bool`\n- Update `__repr__` to include `negate=true` when set\n\n### Python stubs (`mrrc/_mrrc.pyi`)\n\n- Update `SubfieldValueQuery.__init__` signature: add `negate: bool = False`\n- Add `negate` property\n\n### Tests\n\n- Rust unit tests: negated exact match, negated partial match, negated with missing subfield (returns false)\n- Python tests in `tests/python/test_query_dsl.py`: negated value query via kwarg, verify property, integration with `fields_matching_value()`\n\n### Documentation\n\n- Update `docs/guides/query-dsl.md` (SubfieldValueQuery section)\n- Update `docs/tutorials/python/querying-fields.md` if SubfieldValueQuery is covered\n- Update `mrrc/_mrrc.pyi` stubs\n\n### Edge cases\n\nSame as bd-w9cm:\n- Missing subfield + negate=true → false (subfield must exist)\n- Repeating subfields: only the first value is checked (existing `get_subfield` behavior)\n\n### Sequencing\n\nImplement after bd-w9cm lands so both query types get consistent API and we can share test patterns.","created_at":"2026-03-18T23:58:00Z"},{"id":5,"issue_id":"bd-vc06","author":"dchud","text":"Elevated to GitHub issue #64: https://github.com/dchud/mrrc/issues/64","created_at":"2026-03-18T23:59:36Z"}]}
 {"id":"bd-w9cm","title":"Add negation flag to SubfieldPatternQuery","status":"closed","priority":2,"issue_type":"feature","created_at":"2026-03-18T23:53:40.703765Z","created_by":"dchud","updated_at":"2026-04-06T00:13:06.687346Z","closed_at":"2026-04-06T00:13:06.687035Z","close_reason":"Completed — merged in PR #73","source_repo":".","compaction_level":0,"original_size":0,"comments":[{"id":1,"issue_id":"bd-w9cm","author":"dchud","text":"## Motivation\n\nA user scanning ~250k records needed to find ones where a subfield did NOT match a regex. Currently this requires Python-side filtering (`not re.match(...)`) which means:\n- Extra string copies across the Rust/Python FFI boundary for every record\n- GIL overhead on each field value that crosses into Python\n- Inability to short-circuit inside Rust iteration\n\nAdding a `negate` flag pushes the inversion into Rust so non-matching fields never cross the FFI boundary.\n\n## Design\n\nFollow the existing `partial` flag pattern on `SubfieldValueQuery` — add a `negate: bool` field defaulting to `false`.\n\n### Rust core (`src/field_query.rs`)\n\n- Add `pub negate: bool` to `SubfieldPatternQuery`\n- Update `new()` to set `negate: false`\n- Add `pub fn negated(tag, subfield_code, pattern) -> Result<Self>` constructor (sets `negate: true`)\n- Update `matches()`: XOR the regex result with `self.negate` so that when negated, a match means \"subfield exists but does NOT match the pattern\"\n\n### PyO3 bindings (`src-python/src/query.rs`)\n\n- Add `negate` as an optional keyword argument to `PySubfieldPatternQuery::new()` (default `False`)\n- Add `#[getter] fn negate(&self) -> bool`\n- Update `__repr__` to include `negate=true` when set\n\n### Python stubs (`mrrc/_mrrc.pyi`)\n\n- Update `SubfieldPatternQuery.__init__` signature: add `negate: bool = False`\n- Add `negate` property\n\n### Tests\n\n- Rust unit tests in `src/field_query.rs`: negated match, negated non-match, negated with missing subfield (should return false — subfield must exist)\n- Python tests in `tests/python/test_query_dsl.py`: negated pattern query via kwarg, verify `negate` property, integration with `fields_matching_pattern()`\n\n### Documentation\n\n- Update `docs/guides/query-dsl.md` with negation examples\n- Update `docs/tutorials/python/querying-fields.md` if SubfieldPatternQuery is covered there\n- Update API reference docs if they document SubfieldPatternQuery parameters\n\n## Edge case: missing subfield\n\nWhen `negate=true` and the subfield doesn't exist on a field, `matches()` should return `false`. Negation means \"the subfield exists but its value doesn't match\" — not \"the subfield is absent.\" This matches user intent (filtering by value, not by presence).\n\n## Scope\n\nThis bead covers the `negate` flag only. A broader query composition framework (NotQuery, AndQuery, OrQuery) is a separate concern.","created_at":"2026-03-18T23:53:58Z"},{"id":2,"issue_id":"bd-w9cm","author":"dchud","text":"## Revision: additional detail and edge cases\n\n### Repeating subfields\n\n`get_subfield()` returns only the **first** subfield with the given code. MARC fields can have repeating subfields (e.g., multiple `$a` in a 650). With `negate=true`, the query checks whether the first value does NOT match — it does not scan all values with that code. This is consistent with the existing non-negated behavior but worth documenting explicitly since negation makes the semantics more consequential. If users need \"no subfield with this code matches,\" they can combine with Python-level filtering or we can address multi-value matching in a future enhancement.\n\n### matches() implementation detail\n\nThe cleanest implementation uses `!=` rather than a literal XOR:\n\n    field.get_subfield(self.subfield_code)\n        .is_some_and(|value| self.pattern.is_match(value) != self.negate)\n\nThis handles all cases correctly:\n- subfield exists, pattern matches, negate=false → true (correct)\n- subfield exists, pattern doesn't match, negate=false → false (correct)\n- subfield exists, pattern matches, negate=true → false (correct: match excluded)\n- subfield exists, pattern doesn't match, negate=true → true (correct: non-match included)\n- subfield missing, any negate value → false (is_some_and short-circuits)\n\nNo extra branching needed for the missing-subfield edge case.\n\n### Follow-up\n\nThe same `negate` flag would be useful on `SubfieldValueQuery` (same user scenario, exact/partial matching). That should be a separate bead to keep scope tight.\n\n### Documentation locations confirmed\n\n- `docs/guides/query-dsl.md` — SubfieldPatternQuery section at line 108, examples at line 228\n- `docs/tutorials/python/querying-fields.md` — two usage examples at lines 80, 85\n- `mrrc/_mrrc.pyi` — stub at line 295\n- No separate API reference docs beyond the stubs; mkdocs-material may auto-generate from docstrings","created_at":"2026-03-18T23:56:36Z"},{"id":4,"issue_id":"bd-w9cm","author":"dchud","text":"Elevated to GitHub issue #63: https://github.com/dchud/mrrc/issues/63","created_at":"2026-03-18T23:59:36Z"}]}
 {"id":"bd-wifu","title":"Pymarc API compatibility: verified gap analysis and implementation plan","status":"closed","priority":2,"issue_type":"feature","created_at":"2026-03-23T00:03:50.574638Z","created_by":"dchud","updated_at":"2026-04-05T21:33:52.664164Z","closed_at":"2026-04-05T21:33:52.663939Z","close_reason":"Superseded by bd-sgwi / GitHub #71 — clean ordered plan replaces accumulated analysis","source_repo":".","compaction_level":0,"original_size":0,"comments":[{"id":15,"issue_id":"bd-wifu","author":"dchud","text":"Verified pymarc API gap analysis. Every gap confirmed via live introspection of pymarc and mrrc. Replaces bd-qjjs and bd-14jh. Elevated to GitHub issue #65: https://github.com/dchud/mrrc/issues/65\n\n---\n\n## Tier 1: Structural / semantic divergences\n\nThese affect the fundamental object model and break the most pymarc-compatible code.\n\n### A. Separate ControlField class\n\npymarc has NO ControlField — both control and data fields are `pymarc.Field`, distinguished by `is_control_field()`. mrrc exposes a separate `ControlField` class. Breaks `isinstance(f, Field)` checks; `get_fields()` returns mixed types in mrrc vs uniform Field in pymarc.\n\n**Fix:** Unify into a single Field class in the wrapper, with `is_control_field()` dispatching behavior internally.\n\n### B. Control field content: mrrc `.value` attribute vs pymarc `.data` attribute\n\npymarc control fields store content in `.data`. mrrc's ControlField uses `.value`. Code doing `field.data` on a control field fails. Also creates a naming collision with `Field.value()` method (Gap G).\n\n**Fix:** Use `.data` when unifying Field (Gap A). Provides pymarc compat and clears the way for `value()` method.\n\n### C. Record convenience accessors are methods; pymarc uses @property\n\nAll 17 verified as properties in pymarc, methods in mrrc: `title`, `author`, `isbn`, `issn`, `issn_title`, `issnl`, `subjects`, `notes`, `location`, `series`, `sudoc`, `publisher`, `pubyear`. Plus 3 missing entirely from mrrc: `physicaldescription`, `uniformtitle`, `addedentries` (see Gap Q).\n\n**Fix:** Convert to `@property` in `mrrc/__init__.py`. Rust `_inner` methods stay as-is. Breaking change for `record.title()` callers — consider deprecation shim.\n\n### D. Record.__getitem__ returns None for missing tags; pymarc raises KeyError\n\nVerified: pymarc `record['999']` raises `KeyError`. mrrc returns `None`. Silently changes control flow for code using try/except KeyError. Note: `Record.get(tag, default)` already exists in both and behaves the same (returns default).\n\n**Fix:** Raise `KeyError` for missing tags in `__getitem__`.\n\n### E. Field.__str__ returns useless default repr\n\nVerified: pymarc `str(field)` → `'=245  10$aThe Great Gatsby$cF. Scott Fitzgerald'`. mrrc `str(field)` → `'<mrrc.Field object at 0x...>'`. The Python wrapper has no `__str__`; `__getattr__` doesn't intercept dunder methods. The Rust `_inner.__str__` returns `'Field(245)'` which is also not pymarc-compatible.\n\n**Fix:** Add `__str__` and `__repr__` to the wrapper Field class matching pymarc's MARC display format.\n\n### F. Record.as_marc() / as_marc21() missing\n\nVerified: pymarc `record.as_marc()` returns `bytes` (ISO 2709). mrrc has no equivalent — `MARCWriter.write_record()` writes to a file, returns `None`. Critical for round-trip workflows.\n\n**Fix:** Implement in Rust (pure serialization, no GIL needed), expose via PyO3, add wrapper method. `as_marc21()` should be an alias.\n\n### G. JSON serialization format mismatch\n\nVerified structural differences between pymarc `as_json()` and mrrc `to_json()`:\n\n**pymarc** (follows MARC-in-JSON / code4lib proposal):\n```json\n{\"leader\": \"...\", \"fields\": [{\"245\": {\"ind1\": \"1\", \"ind2\": \"0\", \"subfields\": [{\"a\": \"Title\"}, {\"c\": \"Author\"}]}}]}\n```\n\n**mrrc**:\n```json\n[{\"leader\": \"...\"}, {\"245\": {\"ind1\": \"1\", \"ind2\": \"0\", \"subfields\": {\"a\": \"Title\", \"c\": \"Author\"}}}]\n```\n\nTwo differences: (1) Top-level: pymarc uses `{leader, fields}` object, mrrc uses flat array. (2) Subfields: pymarc uses array of single-key objects (preserves order, allows duplicate codes), mrrc uses single object (loses duplicate subfield codes).\n\n**Fix:** `as_json()` and `as_dict()` must produce pymarc-compatible format. Keep `to_json()` as mrrc's native format. This likely needs Rust-side work or a Python-side reformatter.\n\n---\n\n## Tier 2: Missing methods and signature mismatches\n\n### H. Field.value() missing\n\nVerified: pymarc `field.value()` returns space-joined subfield values (or `.data` for control fields). mrrc Field has no `value` attribute or method. Depends on Gap B (renaming `.value` → `.data` on control fields).\n\n**Fix:** Add `def value(self) -> str` to the unified Field class.\n\n### I. Field.format_field() missing\n\nVerified: pymarc `field.format_field()` returns human-readable text without indicators or subfield codes. For the test data, `value()` and `format_field()` return the same string, but they can differ for fields with special formatting.\n\n**Fix:** Add `def format_field(self) -> str` to Field.\n\n### J. Record.as_json(**kwargs) and Record.as_dict() missing\n\nVerified: pymarc has both. `as_dict()` returns a plain dict; `as_json(**kwargs)` wraps `json.dumps(as_dict(), **kwargs)`. mrrc has neither. Note: `as_json()` must produce the pymarc-compatible JSON schema (Gap G), not mrrc's native format.\n\n**Fix:** Implement `as_dict()` producing pymarc-compatible structure, then `as_json(**kwargs)` on top. Keep mrrc's `to_json()` as its own format.\n\n### K. parse_xml_to_array() missing\n\nVerified: pymarc `parse_xml_to_array(xml_file, strict=False, normalize_form=None)` accepts file paths, open file handles, and StringIO. mrrc has `xml_to_records(str)` only.\n\n**Fix:** Add `parse_xml_to_array()` to `mrrc/__init__.py`. GIL policy: file paths and string content → entirely Rust, zero GIL. Python file objects → GIL only for `.read()`, minimize window. Document preferred path-based usage with explicit examples and comments explaining why, consistent with MARCReader docs:\n\n```python\n# Preferred: file path — read + parse entirely in Rust, zero GIL\nrecords = mrrc.parse_xml_to_array(\"collection.xml\")\n\n# Also good: string content — parse entirely in Rust, zero GIL\nrecords = mrrc.parse_xml_to_array(xml_string)\n\n# Works but slower: Python file object requires GIL for .read()\nwith open(\"collection.xml\") as f:\n    records = mrrc.parse_xml_to_array(f)\n```\n\n### L. add_field(*fields) signature mismatch\n\nVerified: pymarc `add_field(self, *fields)`. mrrc `add_field(self, field: Field) -> None`.\n\n**Fix:** Change signature to accept `*fields` with a loop.\n\n### M. remove_field(*fields) + remove_fields(*tags)\n\nVerified: pymarc `remove_field(self, *fields) -> None` accepts multiple. mrrc `remove_field(self, field: Union[Field, str]) -> List[Field]` accepts one. pymarc also has `remove_fields(self, *tags) -> None` for bulk removal by tag string — mrrc has no equivalent. Also note: pymarc returns None; mrrc returns the removed fields.\n\n**Fix:** Change `remove_field` to accept `*fields`. Add `remove_fields(*tags)`. Consider whether to keep mrrc's return value (non-breaking addition).\n\n### N. add_ordered_field() / add_grouped_field() missing\n\nVerified: both exist in pymarc, neither in mrrc.\n\n**Fix:** Implement in wrapper. `add_ordered_field` inserts maintaining tag sort order. `add_grouped_field` inserts after the last field with the same tag.\n\n### O. Field.add_subfield() missing pos parameter\n\nVerified: pymarc `add_subfield(self, code, value, pos=None)`. mrrc `add_subfield(self, code, value)`.\n\n**Fix:** Add optional `pos` parameter. May need Rust-side support for positional insert.\n\n### P. Field.as_marc() / as_marc21() missing\n\nVerified: pymarc Field has both, mrrc Field has neither.\n\n**Fix:** Implement in Rust, expose via PyO3. No GIL needed. `as_marc21` as alias.\n\n### Q. Field.linkage_occurrence_num() missing\n\nVerified: pymarc `field.linkage_occurrence_num() -> Optional[str]` extracts the occurrence number from subfield $6 linkage.\n\n**Fix:** Implement in wrapper or Rust.\n\n---\n\n## Tier 3: Naming and return type mismatches\n\n### R. Property name mismatches\n\nVerified:\n- pymarc `physicaldescription` → mrrc has `physical_description` (underscore) but NOT `physicaldescription`\n- pymarc `uniformtitle` → mrrc has `uniform_title` (underscore) but NOT `uniformtitle`\n- pymarc `addedentries` → mrrc: missing entirely (returns 700/710/711/730 fields)\n\n**Fix:** Add pymarc-compatible names as `@property` (part of Gap C). Keep mrrc underscore names as aliases.\n\n### S. pubyear return type\n\nVerified: mrrc returns `Optional[int]` (e.g. `2020`). pymarc returns `Optional[str]` (e.g. `\"2020\"`).\n\n**Fix:** Return `str` from the property wrapper.\n\n### T. Field.convert_legacy_subfields() missing\n\nVerified: pymarc has `Field.convert_legacy_subfields(subfields: list[str]) -> list[Subfield]` as a classmethod. Converts old `['a', 'value', 'b', 'value']` format to Subfield namedtuples.\n\n**Fix:** Add classmethod to Field wrapper. Low priority — only needed for migrating old pymarc code.\n\n---\n\n## Tier 4: Lower priority\n\nThese are real gaps but unlikely to block most pymarc-compatible code.\n\n### U. Record.as_marc() / as_marc21() (already covered as Gap F)\n\n### V. decode_marc()\n\nVerified missing. Semi-internal — populates Record from raw bytes. Uncommon in user code but used by some advanced pymarc scripts.\n\n**Fix:** Could wrap mrrc's MARCReader to parse a single record from bytes.\n\n### W. Reader/writer variants\n\nVerified all missing: JSONReader, JSONWriter, MARCMakerReader, XMLWriter, TextWriter, Reader (base), Writer (base).\n\n**Fix:** Implement as needed. JSONReader/JSONWriter most likely to be used.\n\n### X. Exception hierarchy\n\nVerified: pymarc has 10 specific exception classes (`BadSubfieldCodeWarning`, `BaseAddressInvalid`, `BaseAddressNotFound`, `EndOfRecordNotFound`, `FatalReaderError`, `FieldNotFound`, `PymarcException`, `RecordDirectoryInvalid`, `RecordLeaderInvalid`, `RecordLengthInvalid`). mrrc exports zero exception classes.\n\n**Fix:** Define exception hierarchy in Python wrapper. Map Rust errors to specific exception types.\n\n### Y. MARC8 Python API\n\nVerified: mrrc has MARC8 support in Rust core but exports nothing at the Python level. pymarc exports: `MARC8ToUnicode` class, `marc8_to_unicode()` function, `map_marc8_field()`, `map_marc8_record()`.\n\n**Fix:** Expose Rust MARC8 functionality via PyO3, add pymarc-compatible wrapper names.\n\n### Z. Convenience functions\n\nVerified all missing: `map_records()`, `map_xml()`, `parse_json_to_array()`, `record_to_xml_node()`, `normalize_subfield_code()`.\n\n**Fix:** Implement as wrappers. `parse_json_to_array()` most likely to be used.\n\n### AA. Constants\n\nVerified all missing: `LEADER_LEN`, `DIRECTORY_ENTRY_LEN`, `END_OF_FIELD`, `END_OF_RECORD`, `SUBFIELD_INDICATOR`, `MARC_XML_NS`, `MARC_XML_SCHEMA`.\n\n**Fix:** Export from `mrrc/__init__.py`. These are just integer/string constants.\n\n### BB. Record instance attributes\n\npymarc Record has `force_utf8`, `pos`, `to_unicode` as instance attributes (member descriptors). These are used during decode and by some user code. mrrc has none of these.\n\n**Fix:** Low priority. `pos` (file position) could be useful; `force_utf8`/`to_unicode` are decode-time flags.\n\n---\n\n## GIL policy\n\nAvoid holding the GIL whenever possible.\n\n- **as_json(), as_dict(), as_marc(), as_marc21():** In-memory, single record — no GIL concern.\n- **parse_xml_to_array():** File paths and string content → entirely Rust, zero GIL. Python file objects → GIL only for `.read()`, minimize window.\n- **Field.value(), format_field(), as_marc():** Pure computation, no GIL concern.","created_at":"2026-03-23T00:05:04Z"}]}
-{"id":"bd-y331","title":"Add pymarc-compatible to_unicode and permissive flags to MARCReader","status":"open","priority":3,"issue_type":"feature","created_at":"2026-04-10T01:47:10.886181Z","created_by":"dchud","updated_at":"2026-04-10T01:48:11.593780Z","source_repo":".","compaction_level":0,"original_size":0,"comments":[{"id":19,"issue_id":"bd-y331","author":"dchud","text":"Elevated to GitHub issue #78: https://github.com/dchud/mrrc/issues/78","created_at":"2026-04-10T01:48:11Z"}]}
+{"id":"bd-y331","title":"Add pymarc-compatible to_unicode and permissive flags to MARCReader","status":"in_progress","priority":3,"issue_type":"feature","created_at":"2026-04-10T01:47:10.886181Z","created_by":"dchud","updated_at":"2026-04-12T21:37:14.182755Z","source_repo":".","compaction_level":0,"original_size":0,"comments":[{"id":19,"issue_id":"bd-y331","author":"dchud","text":"Elevated to GitHub issue #78: https://github.com/dchud/mrrc/issues/78","created_at":"2026-04-10T01:48:11Z"}]}
 {"id":"mrrc-086","title":"Add parallel benchmarking suite (rayon/threading/multiprocessing)","status":"closed","priority":2,"issue_type":"feature","created_at":"2025-12-31T17:02:57.787595Z","updated_at":"2026-01-06T16:11:40.252865Z","closed_at":"2026-01-06T16:11:40.252865Z","close_reason":"Satisfied by Phase F - Comprehensive benchmarking suite with Criterion.rs, speedup curves (2.04x @ 2-thread, 3.20x @ 4-thread), fixtures (1k/10k/100k records), and pymrrc vs Rust efficiency comparison (92%) completed.","source_repo":".","compaction_level":0,"original_size":0}
 {"id":"mrrc-08k","title":"Phase 3: Linked field navigation and specialized queries","description":"Phase 3 of Advanced Field Query DSL (see design/FIELD_QUERY_DSL.md).\n\nImplement specialized query patterns for linked fields, authority control, and format-specific queries.\n\nTasks:\n1. Linked field navigation (880 field linkage)\n   - Implement subfield 6 parsing for linkage occurrence numbers\n   - Methods:\n     * field.get_linked_field(record) -> Option<Field> (880 partner)\n     * record.get_all_linked_fields(field) -> Vec<Field>\n     * record.get_field_pairs(tag) -> Vec<(Field, Option<Field>)> (original + 880)\n   - Support for bidirectional lookups\n\n2. Authority control helpers (for authority records)\n   - Methods for finding related authority headings\n   - Navigate 4XX (see from) and 5XX (see also) tracings\n   - Helper: find_related_headings(heading: &Field) -> Vec<Field>\n   - Extract authority relationship information from 7XX fields\n\n3. Format-specific query traits\n   - BibliographicQueries trait (for Record)\n   - AuthorityQueries trait (for AuthorityRecord)\n   - HoldingsQueries trait (for HoldingsRecord)\n   - Each provides domain-specific helper methods\n\n4. Comprehensive testing\n   - Test 880 field linkage with real MARC-8 encoded data\n   - Test authority record navigation patterns\n   - Test complex record type-specific queries\n   \nDependencies: Requires Phase 1 (mrrc-9n8) and Phase 2 (mrrc-131) completed","notes":"Phase 3 Status: Linked field navigation COMPLETED (28 tests). Authority control helpers TODO. Format-specific traits TODO.","status":"closed","priority":4,"issue_type":"task","created_at":"2025-12-26T17:13:18.416250Z","updated_at":"2025-12-28T16:25:38.021558Z","closed_at":"2025-12-28T16:25:38.021558Z","close_reason":"All three sub-features completed: linked field navigation, authority control helpers, format-specific query traits. 282 library tests passing.","source_repo":".","compaction_level":0,"original_size":0}
 {"id":"mrrc-0jm","title":"Update check.sh to include mrrc-python Clippy and test checks","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-06T16:50:13.684695Z","created_by":"dchud","updated_at":"2026-01-06T16:59:17.545907Z","closed_at":"2026-01-06T16:59:17.545907Z","close_reason":"Completed","source_repo":".","compaction_level":0,"original_size":0}
diff --git a/docs/getting-started/quickstart-python.md b/docs/getting-started/quickstart-python.md
index a0b7c5d..17f2f06 100644
--- a/docs/getting-started/quickstart-python.md
+++ b/docs/getting-started/quickstart-python.md
@@ -20,6 +20,8 @@ for record in MARCReader("records.mrc"):
 
 This approach uses pure Rust I/O and releases Python's GIL, enabling multi-threaded speedups.
 
+For files with malformed records, use `permissive=True` to skip errors (yields `None`), or `recovery_mode="lenient"` to salvage partial data. See the [error handling guide](../guides/migration-from-pymarc.md#error-handling) for details.
+
 ## Access Fields
 
 ```python
diff --git a/docs/guides/migration-from-pymarc.md b/docs/guides/migration-from-pymarc.md
index f06b113..2c55ee2 100644
--- a/docs/guides/migration-from-pymarc.md
+++ b/docs/guides/migration-from-pymarc.md
@@ -92,6 +92,8 @@ with open('output.mrc', 'wb') as f:
 | Operation | pymarc | mrrc | Same? |
 |-----------|--------|------|-------|
 | Create reader | `MARCReader(file_obj)` | `MARCReader('path.mrc')` (recommended) or `MARCReader(file_obj)` | Enhanced |
+| Permissive mode | `MARCReader(f, permissive=True)` | `MARCReader(f, permissive=True)` | **Same** |
+| Unicode flag | `MARCReader(f, to_unicode=True)` | `MARCReader(f, to_unicode=True)` | **Same** |
 | Read record | `reader.next()` or `next(reader)` | `next(reader)` | **Same** |
 | Write record | `writer.write(record)` | `writer.write(record)` | **Same** |
 | Iterate | `for record in reader:` | `for record in reader:` | **Same** |
@@ -338,6 +340,56 @@ from mrrc import MrrcException, MarcError
 - [ ] Update writers to use context managers: `with mrrc.MARCWriter(f) as w:` (better resource management)
 - [ ] Use `record.as_marc()`, `record.as_json()`, `record.as_dict()` for serialization
 
+## Error Handling
+
+### Permissive Mode (pymarc-compatible)
+
+pymarc's `permissive=True` flag yields `None` for records that fail to parse,
+letting callers skip bad records and keep processing. mrrc supports the same
+flag with identical behavior:
+
+```python
+# Works the same in both pymarc and mrrc
+for record in mrrc.MARCReader('records.mrc', permissive=True):
+    if record is None:
+        continue  # skip malformed record
+    print(record.title)
+```
+
+### to_unicode Flag
+
+pymarc's `to_unicode=True` (the default) converts MARC-8 encoded records to
+UTF-8. mrrc always converts MARC-8 to UTF-8 automatically — the conversion
+happens in the Rust parsing layer and cannot be disabled. The `to_unicode`
+kwarg is accepted for compatibility so existing scripts work unchanged.
+Passing `to_unicode=False` emits a warning but has no effect.
+
+### Recovery Mode (mrrc-specific)
+
+mrrc also offers a `recovery_mode` kwarg that goes beyond pymarc's
+permissive mode. Instead of skipping bad records entirely, recovery mode
+attempts to salvage valid fields from damaged records:
+
+```python
+# Attempt to recover partial data from malformed records
+reader = mrrc.MARCReader('records.mrc', recovery_mode='lenient')
+for record in reader:
+    print(f"Got {len(record.get_fields())} fields")
+
+# Even more lenient — accept partial data
+reader = mrrc.MARCReader('records.mrc', recovery_mode='permissive')
+```
+
+Recovery modes:
+- `"strict"` (default) — raise on any malformation
+- `"lenient"` — attempt to recover, salvage valid fields
+- `"permissive"` — very lenient, accept partial data
+
+Note: `permissive=True` and `recovery_mode` other than `"strict"` cannot
+be combined — they represent different error-handling strategies. Use
+`permissive=True` for pymarc-compatible "skip bad records" behavior, or
+`recovery_mode` for mrrc's "salvage what you can" approach.
+
 ## Known Differences from pymarc
 
 1. **Record constructor**: `mrrc.Record()` works (defaults to `Leader()`), or pass explicit `mrrc.Record(mrrc.Leader())`
diff --git a/docs/tutorials/python/reading-records.md b/docs/tutorials/python/reading-records.md
index 01f3728..3c2fe79 100644
--- a/docs/tutorials/python/reading-records.md
+++ b/docs/tutorials/python/reading-records.md
@@ -128,6 +128,33 @@ except FileNotFoundError:
     print("File not found")
 ```
 
+### Permissive Mode
+
+When processing files that may contain malformed records, use
+`permissive=True` to skip bad records instead of raising errors.
+This matches pymarc's `permissive` flag behavior:
+
+```python
+for record in MARCReader("records.mrc", permissive=True):
+    if record is None:
+        continue  # skip malformed record
+    print(record.title)
+```
+
+### Recovery Mode
+
+For more control over error handling, use `recovery_mode` to attempt
+salvaging valid fields from damaged records:
+
+```python
+# Attempt to recover partial data
+for record in MARCReader("records.mrc", recovery_mode="lenient"):
+    print(f"Got {len(record.get_fields())} fields")
+```
+
+See the [migration guide](../guides/migration-from-pymarc.md#error-handling)
+for details on the differences between `permissive` and `recovery_mode`.
+
 ## Complete Example
 
 This example analyzes a MARC file to summarize the collection by language and material type:
diff --git a/mrrc/__init__.py b/mrrc/__init__.py
index c2e11df..3826074 100644
--- a/mrrc/__init__.py
+++ b/mrrc/__init__.py
@@ -1464,19 +1464,55 @@ def __hash__(self) -> int:
 
 
 class MARCReader:
-    """MARC Reader wrapper."""
-    
-    def __init__(self, file_obj):
+    """MARC Reader wrapper.
+
+    Args:
+        file_obj: File path (str), pathlib.Path, bytes/bytearray, or file-like object.
+        to_unicode: Accepted for pymarc compatibility. mrrc always converts
+            MARC-8 to UTF-8; passing ``False`` emits a warning.
+        permissive: When ``True``, yields ``None`` for records that fail to
+            parse instead of raising, matching pymarc's ``permissive`` behavior.
+        recovery_mode: mrrc-native error handling: ``"strict"`` (default, raise
+            on errors), ``"lenient"`` (attempt to salvage valid fields), or
+            ``"permissive"`` (very lenient, accept partial data). Cannot be
+            combined with ``permissive=True``.
+    """
+
+    def __init__(self, file_obj, to_unicode: bool = True, permissive: bool = False,
+                 recovery_mode: str = "strict"):
         """Create a new MARC reader."""
-        self._inner = _MARCReader(file_obj)
-    
+        if not to_unicode:
+            import warnings
+            warnings.warn(
+                "mrrc always converts MARC-8 to UTF-8; to_unicode=False has no effect",
+                stacklevel=2,
+            )
+        if permissive and recovery_mode != "strict":
+            raise ValueError(
+                "Cannot combine permissive=True with recovery_mode other than "
+                "'strict' — they represent different error-handling strategies"
+            )
+        self._permissive = permissive
+        self._inner = _MARCReader(file_obj, recovery_mode=recovery_mode)
+
     def __iter__(self):
         """Iterate over records."""
         return self
-    
-    def __next__(self) -> Record:
-        """Get next record."""
-        record = next(self._inner)
+
+    def __next__(self) -> Optional[Record]:
+        """Get next record.
+
+        When ``permissive=True``, returns ``None`` for records that fail
+        to parse instead of raising, matching pymarc behavior.
+        """
+        try:
+            record = next(self._inner)
+        except StopIteration:
+            raise
+        except Exception:
+            if self._permissive:
+                return None
+            raise
         wrapper = Record(None)
         wrapper._inner = record
         # Create a Leader wrapper from the Rust record's leader
@@ -1486,7 +1522,7 @@ def __next__(self) -> Record:
         wrapper._leader = leader
         wrapper._leader_modified = False
         return wrapper
-    
+
     @property
     def backend_type(self) -> str:
         """The backend type: ``"rust_file"``, ``"cursor"``, or ``"python_file"``."""
diff --git a/mrrc/_mrrc.pyi b/mrrc/_mrrc.pyi
index 778e13d..5918fa7 100644
--- a/mrrc/_mrrc.pyi
+++ b/mrrc/_mrrc.pyi
@@ -366,7 +366,7 @@ class MARCReader:
                 futures = [executor.submit(process_file, f) for f in file_list]
                 results = [f.result() for f in futures]
     """
-    def __init__(self, file: Any) -> None: ...
+    def __init__(self, file: Any, *, recovery_mode: str = "strict") -> None: ...
     def __repr__(self) -> str: ...
     def __iter__(self) -> Iterator[Record]: ...
     def __next__(self) -> Record:
diff --git a/src-python/src/readers.rs b/src-python/src/readers.rs
index e5646e3..d2563e4 100644
--- a/src-python/src/readers.rs
+++ b/src-python/src/readers.rs
@@ -9,7 +9,7 @@ use crate::batched_reader::BatchedMarcReader;
 use crate::batched_unified_reader::BatchedUnifiedReader;
 use crate::parse_error::ParseError;
 use crate::wrappers::PyRecord;
-use mrrc::MarcReader;
+use mrrc::{MarcReader, RecoveryMode};
 use pyo3::prelude::*;
 use smallvec::SmallVec;
 
@@ -59,6 +59,8 @@ enum ReaderType {
 pub struct PyMARCReader {
     /// Reader backend (Python-based or unified multi-backend)
     reader: Option<ReaderType>,
+    /// Recovery mode for handling malformed records
+    recovery_mode: RecoveryMode,
 }
 
 #[pymethods]
@@ -73,12 +75,27 @@ impl PyMARCReader {
     ///
     /// # Arguments
     /// * `source` - File path (str), pathlib.Path, bytes/bytearray, or file-like object
+    /// * `recovery_mode` - Error handling mode: 'strict' (default), 'lenient', 'permissive'
     #[new]
-    pub fn new(source: &Bound<'_, PyAny>) -> PyResult<Self> {
+    #[pyo3(signature = (source, recovery_mode = "strict"))]
+    pub fn new(source: &Bound<'_, PyAny>, recovery_mode: &str) -> PyResult<Self> {
+        let rec_mode = match recovery_mode {
+            "lenient" => RecoveryMode::Lenient,
+            "permissive" => RecoveryMode::Permissive,
+            "strict" => RecoveryMode::Strict,
+            _ => {
+                return Err(pyo3::exceptions::PyValueError::new_err(format!(
+                    "Invalid recovery_mode '{}': must be 'strict', 'lenient', or 'permissive'",
+                    recovery_mode
+                )));
+            },
+        };
+
         // Try unified reader first (handles file paths and bytes)
         match BatchedUnifiedReader::new(source) {
             Ok(unified_reader) => Ok(PyMARCReader {
                 reader: Some(ReaderType::Unified(unified_reader)),
+                recovery_mode: rec_mode,
             }),
             Err(_) => {
                 // Fall back to legacy Python file wrapper
@@ -87,6 +104,7 @@ impl PyMARCReader {
                 let batched_reader = BatchedMarcReader::new(file_obj);
                 Ok(PyMARCReader {
                     reader: Some(ReaderType::Python(batched_reader)),
+                    recovery_mode: rec_mode,
                 })
             },
         }
@@ -122,11 +140,12 @@ impl PyMARCReader {
 
                     // Step 2: Parse bytes (GIL released)
                     // CRITICAL FIX: Use Python::detach() which properly releases the GIL.
+                    let rec_mode = self.recovery_mode;
                     let record = py
                         .detach(|| {
                             // Create a cursor from owned bytes
                             let cursor = std::io::Cursor::new(bytes_owned.to_vec());
-                            let mut parser = MarcReader::new(cursor);
+                            let mut parser = MarcReader::new(cursor).with_recovery_mode(rec_mode);
 
                             // Parse the single record
                             parser.read_record().map_err(|e| {
@@ -229,13 +248,14 @@ impl PyMARCReader {
         // We defer conversion to PyErr until AFTER detach() returns (GIL re-acquired).
         // This is required because PyErr construction needs the GIL.
         // NOTE: Use detach() which properly releases GIL
+        let rec_mode = slf.recovery_mode;
         let parse_result: Result<Option<mrrc::Record>, crate::parse_error::ParseError> =
             py.detach(|| {
                 // This closure runs WITHOUT the GIL held
                 // All data here is owned (no Python references)
                 // Return Rust errors only; defer PyErr conversion to Phase 3
                 let cursor = std::io::Cursor::new(record_bytes_owned.to_vec());
-                let mut parser = MarcReader::new(cursor);
+                let mut parser = MarcReader::new(cursor).with_recovery_mode(rec_mode);
 
                 // Parse the single record from bytes
                 // Return ParseError (Rust type), not PyErr
diff --git a/tests/python/test_marcreader_flags.py b/tests/python/test_marcreader_flags.py
new file mode 100644
index 0000000..f1fc61e
--- /dev/null
+++ b/tests/python/test_marcreader_flags.py
@@ -0,0 +1,212 @@
+"""
+Tests for MARCReader to_unicode, permissive, and recovery_mode flags (GitHub issue #78).
+
+Verifies pymarc-compatible constructor kwargs and mrrc-native recovery_mode.
+"""
+
+import io
+import warnings
+import pytest
+import mrrc
+
+
+# ============================================================================
+# Helpers
+# ============================================================================
+
+def _build_valid_record_bytes() -> bytes:
+    """Build a minimal valid MARC record and return as bytes."""
+    record = mrrc.Record()
+    record.add_field(mrrc.Field("001", data="test001"))
+    record.add_field(mrrc.Field("008", data="230327s2023    wau    ef     000 1 eng d"))
+    f245 = mrrc.Field("245", "1", "0")
+    f245.add_subfield("a", "Test title")
+    record.add_field(f245)
+    output = io.BytesIO()
+    writer = mrrc.MARCWriter(output)
+    writer.write_record(record)
+    writer.close()
+    return output.getvalue()
+
+
+def _build_malformed_record_bytes() -> bytes:
+    """Build a record with valid leader length but corrupt directory/data.
+
+    The leader claims 50 bytes total with a base address of 25 (1 directory
+    entry of 12 bytes + field terminator). The directory entry has an invalid
+    tag and the data area is garbage. This is enough for the batch reader to
+    read the correct number of bytes but causes a parse error.
+    """
+    # Leader: 00050nam  2200025   4500  (50 bytes total, base address 25)
+    leader = b"00050nam  2200025   4500"
+    # Directory: one 12-byte entry with garbage, then field terminator
+    directory = b"XXX000100000" + b"\x1e"
+    # Data area: fill to reach 50 bytes total, end with record terminator
+    data_needed = 50 - len(leader) - len(directory) - 1  # -1 for record terminator
+    data = b"\xff" * data_needed + b"\x1d"
+    return leader + directory + data
+
+
+def _build_two_record_stream(inject_bad_second: bool = False) -> bytes:
+    """Build a stream with two records; optionally corrupt the second."""
+    good = _build_valid_record_bytes()
+    if inject_bad_second:
+        bad = _build_malformed_record_bytes()
+        return good + bad
+    second = good  # just duplicate the valid record
+    return good + second
+
+
+# ============================================================================
+# to_unicode tests
+# ============================================================================
+
+class TestToUnicode:
+    """Test to_unicode kwarg behavior."""
+
+    def test_to_unicode_true_no_warning(self):
+        """to_unicode=True (default) should not emit a warning."""
+        data = _build_valid_record_bytes()
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            mrrc.MARCReader(io.BytesIO(data), to_unicode=True)
+            assert len(w) == 0
+
+    def test_to_unicode_default_no_warning(self):
+        """Default (no to_unicode arg) should not emit a warning."""
+        data = _build_valid_record_bytes()
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            mrrc.MARCReader(io.BytesIO(data))
+            assert len(w) == 0
+
+    def test_to_unicode_false_warns(self):
+        """to_unicode=False should emit a warning."""
+        data = _build_valid_record_bytes()
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            mrrc.MARCReader(io.BytesIO(data), to_unicode=False)
+            assert len(w) == 1
+            assert "to_unicode=False has no effect" in str(w[0].message)
+
+    def test_to_unicode_false_still_reads(self):
+        """to_unicode=False should still read records normally."""
+        data = _build_valid_record_bytes()
+        with warnings.catch_warnings(record=True):
+            warnings.simplefilter("always")
+            reader = mrrc.MARCReader(io.BytesIO(data), to_unicode=False)
+            records = list(reader)
+            assert len(records) == 1
+            assert records[0].get_fields("001")[0].data == "test001"
+
+
+# ============================================================================
+# permissive tests
+# ============================================================================
+
+class TestPermissive:
+    """Test permissive kwarg behavior (pymarc compatibility)."""
+
+    def test_permissive_false_raises_on_bad_record(self):
+        """Default (permissive=False) should raise on malformed records."""
+        data = _build_two_record_stream(inject_bad_second=True)
+        reader = mrrc.MARCReader(io.BytesIO(data), permissive=False)
+        # First record should be fine
+        record = next(reader)
+        assert record is not None
+        # Second record is malformed — should raise
+        with pytest.raises(Exception):
+            next(reader)
+
+    def test_permissive_true_yields_none_for_bad_record(self):
+        """permissive=True should yield None for malformed records."""
+        data = _build_two_record_stream(inject_bad_second=True)
+        reader = mrrc.MARCReader(io.BytesIO(data), permissive=True)
+        records = list(reader)
+        # Should get the good record and None for the bad one
+        assert any(r is not None for r in records), "Should have at least one valid record"
+        assert any(r is None for r in records), "Should have None for malformed record"
+
+    def test_permissive_true_valid_records_normal(self):
+        """permissive=True should return valid records normally."""
+        data = _build_two_record_stream(inject_bad_second=False)
+        reader = mrrc.MARCReader(io.BytesIO(data), permissive=True)
+        records = list(reader)
+        assert len(records) == 2
+        assert all(r is not None for r in records)
+
+    def test_permissive_pymarc_pattern(self):
+        """The standard pymarc permissive pattern should work."""
+        data = _build_two_record_stream(inject_bad_second=True)
+        reader = mrrc.MARCReader(io.BytesIO(data), permissive=True)
+        valid_count = 0
+        error_count = 0
+        for record in reader:
+            if record is None:
+                error_count += 1
+                continue
+            valid_count += 1
+        assert valid_count >= 1
+        assert error_count >= 1
+
+
+# ============================================================================
+# recovery_mode tests
+# ============================================================================
+
+class TestRecoveryMode:
+    """Test recovery_mode kwarg behavior."""
+
+    def test_recovery_mode_default_strict(self):
+        """Default recovery_mode should be strict."""
+        data = _build_valid_record_bytes()
+        reader = mrrc.MARCReader(io.BytesIO(data))
+        records = list(reader)
+        assert len(records) == 1
+
+    def test_recovery_mode_lenient(self):
+        """recovery_mode='lenient' should be accepted."""
+        data = _build_valid_record_bytes()
+        reader = mrrc.MARCReader(io.BytesIO(data), recovery_mode="lenient")
+        records = list(reader)
+        assert len(records) == 1
+
+    def test_recovery_mode_permissive(self):
+        """recovery_mode='permissive' should be accepted."""
+        data = _build_valid_record_bytes()
+        reader = mrrc.MARCReader(io.BytesIO(data), recovery_mode="permissive")
+        records = list(reader)
+        assert len(records) == 1
+
+    def test_recovery_mode_invalid_raises(self):
+        """Invalid recovery_mode should raise ValueError."""
+        data = _build_valid_record_bytes()
+        with pytest.raises(ValueError, match="Invalid recovery_mode"):
+            mrrc.MARCReader(io.BytesIO(data), recovery_mode="invalid")
+
+
+# ============================================================================
+# Conflict validation tests
+# ============================================================================
+
+class TestConflictValidation:
+    """Test that conflicting options are rejected."""
+
+    def test_permissive_with_lenient_raises(self):
+        """permissive=True + recovery_mode='lenient' should raise ValueError."""
+        data = _build_valid_record_bytes()
+        with pytest.raises(ValueError, match="Cannot combine"):
+            mrrc.MARCReader(io.BytesIO(data), permissive=True, recovery_mode="lenient")
+
+    def test_permissive_with_permissive_recovery_raises(self):
+        """permissive=True + recovery_mode='permissive' should raise ValueError."""
+        data = _build_valid_record_bytes()
+        with pytest.raises(ValueError, match="Cannot combine"):
+            mrrc.MARCReader(io.BytesIO(data), permissive=True, recovery_mode="permissive")
+
+    def test_permissive_with_strict_ok(self):
+        """permissive=True + recovery_mode='strict' (default) should be fine."""
+        data = _build_valid_record_bytes()
+        reader = mrrc.MARCReader(io.BytesIO(data), permissive=True, recovery_mode="strict")
+        records = list(reader)
+        assert len(records) == 1