diff --git a/CHANGELOG.md b/CHANGELOG.md index da4a8eb..e6f995a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ All notable changes to this project will be documented in this file. The format ### Added +- `dpub convert --rights ""` stamps a free-text `` element into the EPUB's OPF metadata. When the flag is omitted, the source DAISY's `dc:rights` (if present in the NCC) carries through; the flag overrides. Useful for explicitly asserting copyright frame (Marrakesh Treaty / EU accessibility exception, etc.) when the source doesn't carry one. Closes #21. - `dpub convert --auto-cover` does a best-effort cover-image lookup against the [Open Library](https://openlibrary.org/dev/docs/api/covers) public API using the book's title, author, language, and (when ISBN-shaped) `dc:identifier`. Misses, low-confidence matches, and network failures are silent — `--auto-cover` is best-effort and DAISY books frequently aren't in Open Library at all. The match is filtered by language overlap and author-last-name overlap to reject ambiguous Dutch translations of common English titles. Mutually exclusive with `--cover` at the CLI level. New `dpub-meta` crate carries the HTTP/JSON dependency surface separate from `dpub-convert`. Privacy-relevant (sends metadata to a third party); opt-in. Closes #11. - `dpub batch -o [--jobs N] [--audio opus] [--bitrate K]` walks a directory for DAISY 2.02 books (any directory containing an `ncc.html` is a book) and converts each to EPUB 3 in parallel via rayon. Per-book errors are recorded in the JSON summary on stdout, never raised — one bad book in a catalogue never halts the queue. Exit code is non-zero when any book failed. The output filename for each book is `.epub` inside the output directory. Pipeline 2 has no batch mode at all; this is the headline of dpub's "production library tool" framing. - `dpub validate --json` and `dpub a11y --json` emit the structured `Report` as pretty JSON on stdout instead of the human-readable summary. Pairs with the existing non-zero exit code on errors so CI/pipeline consumers can either parse the report or grep on exit status. The schema is the serde-derived form of `dpub_validate::Report`; field names are stable as part of the 1.0 contract. diff --git a/crates/dpub-cli/src/main.rs b/crates/dpub-cli/src/main.rs index 1cb068a..732da8f 100644 --- a/crates/dpub-cli/src/main.rs +++ b/crates/dpub-cli/src/main.rs @@ -61,6 +61,11 @@ enum Command { /// silent — the book ships without a cover. #[arg(long)] auto_cover: bool, + /// Free-text rights statement to stamp into the EPUB's + /// `` field. Overrides any rights string in the + /// source DAISY metadata. + #[arg(long, value_name = "TEXT")] + rights: Option, }, /// Validate an existing EPUB 3 publication with EPUBCheck. Validate { @@ -147,6 +152,7 @@ fn main() -> Result<()> { no_text_cleanup, cover, auto_cover, + rights, } => cmd_convert( &ncc, &output, @@ -159,6 +165,7 @@ fn main() -> Result<()> { no_text_cleanup, cover, auto_cover, + rights, ), Command::Validate { epub, json } => cmd_validate(&epub, json), Command::A11y { epub, json } => cmd_a11y(&epub, json), @@ -185,6 +192,7 @@ fn cmd_convert( no_text_cleanup: bool, cover: Option, auto_cover: bool, + rights: Option, ) -> Result<()> { let ncc = resolve_ncc_path(ncc)?; let book = Book::from_ncc(&ncc).with_context(|| format!("loading {}", ncc.display()))?; @@ -246,6 +254,7 @@ fn cmd_convert( raw_transcript_segments: no_text_cleanup, cover, auto_cover, + rights, }; let start = std::time::Instant::now(); dpub_convert::convert_to_file(&book, output, &opts) @@ -535,6 +544,7 @@ fn cmd_batch( raw_transcript_segments: false, cover: None, auto_cover: false, + rights: None, }; let start = std::time::Instant::now(); let entries: Vec = books diff --git a/crates/dpub-convert/src/lib.rs b/crates/dpub-convert/src/lib.rs index 4718994..21d7852 100644 --- a/crates/dpub-convert/src/lib.rs +++ b/crates/dpub-convert/src/lib.rs @@ -85,6 +85,7 @@ fn build_package_metadata(book: &Book) -> PackageMetadata { .as_deref() .map(|raw| format!("urn:dpub:daisy:{raw}")), description: None, + rights: m.other.get("dc:rights").cloned(), duration_seconds: Some(book.total_audio_seconds()), narrator: m.narrator.clone(), access_modes: if m.multimedia_type.as_deref() == Some("audioFullText") { @@ -557,6 +558,12 @@ pub struct ConvertOptions { /// miss is silent (no cover embedded); a network failure is /// silent. Mutually exclusive with `cover` at the CLI layer. pub auto_cover: bool, + /// Free-text rights statement to stamp into the EPUB's + /// `` field. When `None`, the source DAISY's + /// `dc:rights` (if any) carries through; the CLI override + /// is the simplest way to assert rights when the source + /// doesn't carry one. + pub rights: Option, } /// Convert and write a DAISY 2.02 publication to an EPUB 3 file in one call. @@ -576,6 +583,10 @@ pub fn convert_to_file(book: &Book, output: &Path, opts: &ConvertOptions) -> Res publication.cover = auto_lookup_cover(book); } + if let Some(rights) = &opts.rights { + publication.metadata.rights = Some(rights.clone()); + } + // Transcribe BEFORE audio recompression — we want to feed Whisper the // original (typically MP3) bytes, not a lossy Opus pass that throws away // information whisper.cpp's frontend re-discards anyway. diff --git a/crates/dpub-convert/tests/real_conversion.rs b/crates/dpub-convert/tests/real_conversion.rs index 5b7a8f3..069d723 100644 --- a/crates/dpub-convert/tests/real_conversion.rs +++ b/crates/dpub-convert/tests/real_conversion.rs @@ -121,6 +121,7 @@ fn opus_recompression_shrinks_real_book() { raw_transcript_segments: false, cover: None, auto_cover: false, + rights: None, }, ) .expect("write opus"); diff --git a/crates/epub3-writer/src/model.rs b/crates/epub3-writer/src/model.rs index ea8b47c..8f04c9e 100644 --- a/crates/epub3-writer/src/model.rs +++ b/crates/epub3-writer/src/model.rs @@ -56,6 +56,12 @@ pub struct PackageMetadata { pub date: Option, pub source: Option, pub description: Option, + /// Free-text rights statement. Emitted as `` in the OPF + /// metadata block. Producers commonly use this to assert copyright + /// or to reference the accessibility-exception framework under + /// which the book is being distributed (Marrakesh Treaty, + /// Belgian / EU exceptions, etc.). + pub rights: Option, /// Total media duration of the publication, in seconds. pub duration_seconds: Option, diff --git a/crates/epub3-writer/src/writers.rs b/crates/epub3-writer/src/writers.rs index 5c8a671..eaa76c9 100644 --- a/crates/epub3-writer/src/writers.rs +++ b/crates/epub3-writer/src/writers.rs @@ -91,6 +91,9 @@ fn write_opf_metadata(s: &mut String, pub_: &Publication) { escape_text(desc) ); } + if let Some(rights) = &m.rights { + let _ = write!(s, " {}\n", escape_text(rights)); + } if let Some(n) = &m.narrator { let _ = write!(