diff --git a/o8v-fs/src/lib.rs b/o8v-fs/src/lib.rs index 7282ceb..b7d466e 100644 --- a/o8v-fs/src/lib.rs +++ b/o8v-fs/src/lib.rs @@ -47,6 +47,51 @@ pub use file::{truncate_error, GuardedFile}; pub use fs_trait::FileSystem; pub use root::ContainmentRoot; pub use safe_fs::SafeFs; + +/// Validate that a byte slice has consistent line endings. +/// +/// Returns `Err` containing a human-readable cause string for: +/// - Lone `\r` (classic Mac, no `\n` at all) +/// - Any lone `\r` in a `\n`-terminated file (mid-line `\r`) +/// - Mixed `\r\n` and lone `\n` +/// +/// This is the single authoritative implementation shared by `o8v write` +/// (str-level callers) and `o8v-fs` (byte-level callers). Callers wrap +/// the cause in the appropriate error type — `FsError::InvalidContent` +/// for byte-level callers, or a plain `String` for str-level callers. +pub fn validate_line_endings_bytes(content: &[u8]) -> Result<(), String> { + let has_crlf = content.windows(2).any(|w| w == b"\r\n"); + let has_lf = content.contains(&b'\n'); + // A lone \r is any \r not immediately followed by \n. + let has_lone_cr = content + .iter() + .enumerate() + .any(|(i, &b)| b == b'\r' && content.get(i + 1) != Some(&b'\n')); + + if has_lone_cr && !has_lf { + return Err( + "file uses classic Mac line endings (\\r only) \u{2014} 8v does not support this format. Convert to \\n or \\r\\n first.".to_string() + ); + } + if has_lone_cr && has_lf { + return Err( + "file contains carriage return (\\r) characters outside of \\r\\n sequences \u{2014} normalize line endings first".to_string() + ); + } + if has_crlf && has_lf { + // Check for standalone \n (not part of \r\n). + let has_standalone_lf = content + .iter() + .enumerate() + .any(|(i, &b)| b == b'\n' && (i == 0 || content[i - 1] != b'\r')); + if has_standalone_lf { + return Err( + "file has mixed line endings (LF and CRLF) \u{2014} 8v requires consistent line endings. Normalize the file first.".to_string() + ); + } + } + Ok(()) +} pub use scan::{DirEntry, DirScan}; // ─── Standalone safe write operations ─────────────────────────────────────── @@ -187,12 +232,17 @@ pub fn safe_append( write_guard::guarded_append(path, root.as_path(), content) } -/// Append to a file with an exclusive advisory lock and automatic `\n` separator. +/// Append to a file with an exclusive advisory lock and automatic line-ending-aware separator. +/// +/// Under the lock, reads the full existing file content to: +/// 1. Detect the file's line ending (CRLF if any `\r\n` present, else LF). +/// 2. Validate existing content has no mixed endings (returns error if so). +/// 3. Insert a separator using the detected line ending if the file is non-empty +/// and doesn't already end with `\n`. /// -/// Under the lock, checks whether the file ends with `\n` and inserts one if -/// needed before appending `content`. This eliminates the TOCTOU race in -/// concurrent appends where multiple callers all observe a missing trailing -/// newline and each prepend one, producing spurious blank lines. +/// Normalises bare `\n` in `content` to `\r\n` when the file uses CRLF. +/// Appends a trailing line terminator matching the file's ending if absent. +/// The full read happens inside the flock so it remains race-safe. pub fn safe_append_with_separator( path: &std::path::Path, root: &ContainmentRoot, diff --git a/o8v-fs/src/write_guard.rs b/o8v-fs/src/write_guard.rs index 5c906cc..d2afbbf 100644 --- a/o8v-fs/src/write_guard.rs +++ b/o8v-fs/src/write_guard.rs @@ -65,12 +65,22 @@ pub(crate) fn guarded_append(path: &Path, root: &Path, content: &[u8]) -> Result }) } /// Append to a file with an exclusive advisory lock, automatically inserting -/// a `\n` separator if the file does not end with one. +/// a separator if the file does not end with one. /// -/// This serializes peek-last-byte + conditional-separator + append under the -/// lock, eliminating the race where two concurrent callers both observe a -/// missing trailing newline and both prepend `\n`, producing a spurious blank -/// line. +/// Under the lock the function: +/// 1. Reads the full existing file content. +/// 2. Validates it with `validate_line_endings_bytes` — rejects CR-only, +/// lone-`\r`-in-LF, and mixed CRLF+LF files. +/// 3. Detects the file's line ending (CRLF if any `\r\n` present, else LF). +/// 4. Inserts a separator using the detected ending when the file is non-empty +/// and does not already end with `\n`. +/// 5. Normalises bare `\n` in `content` to `\r\n` when the file uses CRLF. +/// 6. Appends `content`, then ensures a trailing line terminator. +/// +/// This serializes the full read + validate + conditional-separator + append +/// under the lock, eliminating the race where two concurrent callers both +/// observe a missing trailing newline and both prepend a separator, producing +/// a spurious blank line. pub(crate) fn guarded_append_with_separator( path: &Path, root: &Path, @@ -102,41 +112,86 @@ pub(crate) fn guarded_append_with_separator( cause: e, })?; - // Under the lock: check whether the file ends with a newline. - let len = file - .metadata() - .map_err(|e| FsError::Io { - path: path.to_path_buf(), - cause: e, - })? - .len(); + // Under the lock: read the entire existing file content. + // This is necessary to: + // 1. Detect CRLF correctly even when the file has no trailing newline + // (the old 2-byte tail read fails in that case). + // 2. Validate that the existing content has no mixed line endings before + // appending (R3 fix). + let mut existing = Vec::new(); + file.seek(SeekFrom::Start(0)).map_err(|e| FsError::Io { + path: path.to_path_buf(), + cause: e, + })?; + file.read_to_end(&mut existing).map_err(|e| FsError::Io { + path: path.to_path_buf(), + cause: e, + })?; + // Seek back to end so subsequent write_all appends at EOF. + // (O_APPEND on Linux/macOS guarantees this, but we seek explicitly for + // clarity and cross-platform safety.) + file.seek(SeekFrom::End(0)).map_err(|e| FsError::Io { + path: path.to_path_buf(), + cause: e, + })?; - if len > 0 { - let mut last = [0u8; 1]; - file.seek(SeekFrom::Start(len - 1)) - .map_err(|e| FsError::Io { + // Validate existing content: reject CR-only, lone-\r-in-LF, and mixed + // CRLF+LF files. Unlock before returning so the file descriptor is + // released cleanly on error. + if !existing.is_empty() { + if let Err(cause) = crate::validate_line_endings_bytes(&existing) { + let _ = file.unlock(); + return Err(FsError::InvalidContent { path: path.to_path_buf(), - cause: e, - })?; - file.read_exact(&mut last).map_err(|e| FsError::Io { + cause, + }); + } + } + + // Detect line ending: CRLF if any \r\n is present anywhere in the file, + // LF otherwise (includes empty-file case). + let is_crlf = existing.windows(2).any(|w| w == b"\r\n"); + let line_ending: &[u8] = if is_crlf { b"\r\n" } else { b"\n" }; + + // Insert separator if file is non-empty and doesn't end with a newline. + if !existing.is_empty() && existing.last() != Some(&b'\n') { + file.write_all(line_ending).map_err(|e| FsError::Io { path: path.to_path_buf(), cause: e, })?; - if last[0] != b'\n' { - file.write_all(b"\n").map_err(|e| FsError::Io { - path: path.to_path_buf(), - cause: e, - })?; - } } - file.write_all(content).map_err(|e| FsError::Io { + // Normalise content: if the file is CRLF and content uses bare \n, convert. + let normalised: Vec; + let content_to_write: &[u8] = if is_crlf && content.contains(&b'\n') { + // Replace bare \n (not already preceded by \r) with \r\n. + let mut out = Vec::with_capacity(content.len() + content.len() / 4); + let mut i = 0; + while i < content.len() { + if content[i] == b'\n' { + if i == 0 || content[i - 1] != b'\r' { + out.push(b'\r'); + } + out.push(b'\n'); + } else { + out.push(content[i]); + } + i += 1; + } + normalised = out; + &normalised + } else { + content + }; + + file.write_all(content_to_write).map_err(|e| FsError::Io { path: path.to_path_buf(), cause: e, })?; - if !content.is_empty() && *content.last().unwrap() != b'\n' { - file.write_all(b"\n").map_err(|e| FsError::Io { + // Ensure trailing line terminator. + if !content_to_write.is_empty() && *content_to_write.last().unwrap() != b'\n' { + file.write_all(line_ending).map_err(|e| FsError::Io { path: path.to_path_buf(), cause: e, })?; diff --git a/o8v-fs/tests/stress_filesystem.rs b/o8v-fs/tests/stress_filesystem.rs index 55ef0a5..ffd4442 100644 --- a/o8v-fs/tests/stress_filesystem.rs +++ b/o8v-fs/tests/stress_filesystem.rs @@ -331,3 +331,59 @@ fn append_concurrent_50_no_spurious_blank_lines() { assert!(non_empty.contains(&expected.as_str()), "missing {expected}"); } } + +/// PR#4-R10: Concurrent 50-thread append on a CRLF file without trailing newline. +/// Each thread must detect CRLF and append with \r\n as the separator. +/// Today each thread misdetects the file as LF (same root cause as PR#4-R1), +/// so the result contains bare \n separators mixed into CRLF content. +#[test] +fn append_concurrent_50_crlf_seed_no_trailing_newline_preserves_crlf() { + use std::sync::Arc; + + let dir = tempdir().unwrap(); + let file_path = dir.path().join("crlf_concurrent.txt"); + // CRLF seed with no trailing newline -- triggers the is_crlf detection bug. + std::fs::write(&file_path, b"seed\r\nstart").unwrap(); + + let root = o8v_fs::ContainmentRoot::new(dir.path()).unwrap(); + let root = Arc::new(root); + let file_path = Arc::new(file_path); + + let handles: Vec<_> = (0..50) + .map(|i| { + let root = Arc::clone(&root); + let path = Arc::clone(&file_path); + std::thread::spawn(move || { + let line = format!("line{i}"); + o8v_fs::safe_append_with_separator(&path, &root, line.as_bytes()) + .expect("append failed"); + }) + }) + .collect(); + + for h in handles { + h.join().expect("thread panicked"); + } + + let bytes = std::fs::read(file_path.as_ref()).unwrap(); + + // Every \n must be preceded by \r (pure CRLF). + for (i, &b) in bytes.iter().enumerate() { + if b == b'\n' { + assert!( + i > 0 && bytes[i - 1] == b'\r', + "bare \\n at byte {} -- CRLF not preserved under concurrent append; file: {:?}", + i, + bytes + ); + } + if b == b'\r' { + assert!( + i + 1 < bytes.len() && bytes[i + 1] == b'\n', + "lone \\r at byte {} -- malformed CRLF sequence; file: {:?}", + i, + bytes + ); + } + } +} diff --git a/o8v/src/commands/write.rs b/o8v/src/commands/write.rs index 70c27ab..a488bbe 100644 --- a/o8v/src/commands/write.rs +++ b/o8v/src/commands/write.rs @@ -330,51 +330,13 @@ fn detect_line_ending(content: &str) -> &'static str { } } -/// Returns true if `content` contains any lone `\r` (i.e., `\r` not immediately followed by `\n`). -fn has_lone_cr(content: &str) -> bool { - content - .chars() - .zip(content.chars().skip(1).chain(std::iter::once('\0'))) - .any(|(c, next)| c == '\r' && next != '\n') -} - /// Validate that the file's line endings are supported for line-based operations. /// -/// Returns Err for: -/// - Lone `\r` (classic Mac, no `\n` at all) -/// - Any lone `\r` in a `\n`-terminated file (mid-line `\r`) -/// - Mixed `\r\n` and lone `\n` +/// Delegates to `o8v_fs::validate_line_endings_bytes` so both call sites share +/// the same logic and produce the same error messages. fn validate_line_endings(content: &str) -> Result<(), String> { - let has_crlf = content.contains("\r\n"); - let lone_cr = has_lone_cr(content); - let has_lf = content.contains('\n'); - - if lone_cr && !has_lf { - return Err( - "error: file uses classic Mac line endings (\\r only) — 8v does not support this format. Convert to \\n or \\r\\n first." - .to_string(), - ); - } - if lone_cr && has_lf { - return Err( - "error: file contains carriage return (\\r) characters outside of \\r\\n sequences — normalize line endings first" - .to_string(), - ); - } - if has_crlf && has_lf { - // Check for standalone \n (not part of \r\n) - // We know has_lf is true; check if any \n is not preceded by \r - let has_standalone_lf = content - .char_indices() - .any(|(i, c)| c == '\n' && (i == 0 || content.as_bytes()[i - 1] != b'\r')); - if has_standalone_lf { - return Err( - "error: file has mixed line endings (LF and CRLF) — 8v requires consistent line endings. Normalize the file first." - .to_string(), - ); - } - } - Ok(()) + o8v_fs::validate_line_endings_bytes(content.as_bytes()) + .map_err(|cause| format!("error: {cause}")) } /// Validate content provided by the user for line-based operations. @@ -685,7 +647,22 @@ pub(crate) fn write_to_report( })?; let existing_content = file.content(); validate_line_endings(existing_content)?; - let match_count = existing_content.matches(find.as_str()).count(); + let line_ending = detect_line_ending(existing_content); + + // Normalise find/replace to the file's line ending so that a user + // who passes pure-LF patterns against a CRLF file gets correct + // matches and CRLF-preserving output. + let find_normalised; + let replace_normalised; + let (effective_find, effective_replace) = if line_ending == "\r\n" { + find_normalised = find.replace('\n', "\r\n"); + replace_normalised = replace.replace('\n', "\r\n"); + (find_normalised.as_str(), replace_normalised.as_str()) + } else { + (find.as_str(), replace.as_str()) + }; + + let match_count = existing_content.matches(effective_find).count(); if match_count == 0 { return Err(render_not_found_hint(find, existing_content, &path_str)); @@ -701,9 +678,9 @@ pub(crate) fn write_to_report( } let new_content = if *all { - existing_content.replace(find.as_str(), replace.as_str()) + existing_content.replace(effective_find, effective_replace) } else { - existing_content.replacen(find.as_str(), replace.as_str(), 1) + existing_content.replacen(effective_find, effective_replace, 1) }; let count = if *all { match_count } else { 1 }; diff --git a/o8v/tests/e2e_write.rs b/o8v/tests/e2e_write.rs index 680edb2..e7c3fdd 100644 --- a/o8v/tests/e2e_write.rs +++ b/o8v/tests/e2e_write.rs @@ -1248,3 +1248,505 @@ fn append_ensures_trailing_newline() { "--append must produce expected content with trailing newline" ); } + +// --- Issue #3: CRLF line-ending preservation gaps -------------------------- +// +// Tests 1-3 verify the fixes for issue #3. +// Tests 4-5 are positive baselines (Insert and Delete already worked correctly). + +/// Issue #3 Test 1 -- Append regression (f330d45): +/// Seeding a pure CRLF file then appending must produce CRLF endings, +/// not a trailing bare \n. Today --append hardcodes \n regardless of +/// the file line ending, so the file ends with `appended\n` breaking CRLF purity. +#[test] +fn append_to_crlf_file_preserves_crlf_endings() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("crlf.txt"); + std::fs::write(&file, b"line1\r\nline2\r\n").unwrap(); + + let out = bin_in(tmp.path()) + .args(["write", "crlf.txt", "--append", "appended"]) + .output() + .expect("run 8v write --append"); + + assert!( + out.status.success(), + "--append on CRLF file must exit 0\nstderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + + let result = std::fs::read(&file).unwrap(); + // The appended line must end with \r\n, not bare \n. + assert_eq!( + result, b"line1\r\nline2\r\nappended\r\n", + "append to CRLF file must preserve CRLF; got bytes: {:?}", + result + ); +} + +/// Issue #3 Test 2 -- Append corrupts CRLF file for the next 8v op: +/// After appending to a CRLF file the file has mixed endings. +/// A subsequent 8v write on the file must succeed -- today it fails +/// because validate_line_endings rejects the now-mixed file. +#[test] +fn append_to_crlf_file_subsequent_write_succeeds() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("crlf2.txt"); + std::fs::write(&file, b"line1\r\nline2\r\n").unwrap(); + + // First op: append. + let out1 = bin_in(tmp.path()) + .args(["write", "crlf2.txt", "--append", "appended"]) + .output() + .expect("run 8v write --append"); + assert!( + out1.status.success(), + "first append must exit 0\nstderr: {}", + String::from_utf8_lossy(&out1.stderr) + ); + + // Second op: replace line 1 -- must NOT be rejected due to mixed endings + // introduced by the first append. + let out2 = bin_in(tmp.path()) + .args(["write", "crlf2.txt:1", "replaced"]) + .output() + .expect("run second 8v write"); + + assert!( + out2.status.success(), + "second write after append must exit 0; file likely has mixed endings now\nstdout: {}\nstderr: {}", + String::from_utf8_lossy(&out2.stdout), + String::from_utf8_lossy(&out2.stderr) + ); +} + +/// Issue #3 Test 3 -- FindReplace with \n in replacement on a CRLF file: +/// The replacement string "a\nb" must be written as "a\r\nb" (CRLF-normalised) +/// on a CRLF file. Today the raw \n is injected, creating mixed endings. +#[test] +fn find_replace_newline_in_replacement_on_crlf_file_preserves_crlf() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("crlf3.txt"); + std::fs::write(&file, b"foo\r\nbar\r\n").unwrap(); + + let out = bin_in(tmp.path()) + .args(["write", "crlf3.txt", "--find", "foo", "--replace", "a\nb"]) + .output() + .expect("run 8v write --find --replace"); + + assert!( + out.status.success(), + "find-replace on CRLF file must exit 0\nstdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + + let result = std::fs::read(&file).unwrap(); + // After replacing "foo" with "a\nb", the file must remain pure CRLF. + assert_eq!( + result, b"a\r\nb\r\nbar\r\n", + "find-replace must produce pure CRLF; got bytes: {:?}", + result + ); +} + +/// Issue #3 Test 4 -- Insert + CRLF positive baseline: +/// `8v write file:2 --insert "X"` on a CRLF file should already preserve +/// CRLF endings (Insert uses detect_line_ending). This documents that +/// Insert already works -- the gap is test coverage, not behaviour. +#[test] +fn insert_into_crlf_file_preserves_crlf_endings() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("crlf4.txt"); + std::fs::write(&file, b"line1\r\nline2\r\nline3\r\n").unwrap(); + + let out = bin_in(tmp.path()) + .args(["write", "crlf4.txt:2", "--insert", "X"]) + .output() + .expect("run 8v write --insert"); + + assert!( + out.status.success(), + "insert into CRLF file must exit 0\nstderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + + let result = std::fs::read(&file).unwrap(); + assert_eq!( + result, b"line1\r\nX\r\nline2\r\nline3\r\n", + "insert into CRLF file must preserve CRLF endings byte-exactly; got: {:?}", + result + ); +} + +/// Issue #3 Test 5 -- Delete + CRLF positive baseline: +/// `8v write file:2-2 --delete` on a CRLF file should already preserve +/// CRLF endings (Delete uses detect_line_ending). Documents Delete already +/// works -- gap is test coverage, not behaviour. +#[test] +fn delete_from_crlf_file_preserves_crlf_endings() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("crlf5.txt"); + std::fs::write(&file, b"line1\r\nline2\r\nline3\r\n").unwrap(); + + let out = bin_in(tmp.path()) + .args(["write", "crlf5.txt:2-2", "--delete"]) + .output() + .expect("run 8v write --delete"); + + assert!( + out.status.success(), + "delete from CRLF file must exit 0\nstderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + + let result = std::fs::read(&file).unwrap(); + assert_eq!( + result, b"line1\r\nline3\r\n", + "delete from CRLF file must preserve CRLF endings byte-exactly; got: {:?}", + result + ); +} + +// ─── PR #4 review reproducers ───────────────────────────────────────────────── +// +// Each test below pins a concrete finding from the three PR #4 review rounds. +// Tests marked #[ignore] exercise currently-broken behaviour; they will be +// un-ignored when the underlying bug is fixed. + +#[allow(dead_code)] +fn assert_pure_crlf(bytes: &[u8]) { + for (i, &b) in bytes.iter().enumerate() { + if b == b'\n' { + assert!( + i > 0 && bytes[i - 1] == b'\r', + "bare \\n at byte {} in {:?}", + i, + bytes + ); + } + if b == b'\r' { + assert!( + i + 1 < bytes.len() && bytes[i + 1] == b'\n', + "lone \\r at byte {} in {:?}", + i, + bytes + ); + } + } +} + +/// PR#4-R1: CRLF file without trailing newline -- append must produce pure CRLF. +/// Today `is_crlf` reads only the last 2 bytes; when the file ends without +/// `\r\n` those 2 bytes are the last chars of the final word, so detection +/// falls back to LF and the appended line gets a bare `\n` instead of `\r\n`. +#[test] +fn append_to_crlf_file_without_trailing_newline_preserves_crlf() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("crlf_no_trail.txt"); + std::fs::write(&file, b"line1\r\nline2\r\nno_trailing").unwrap(); + + let out = bin_in(tmp.path()) + .args(["write", "crlf_no_trail.txt", "--append", "appended"]) + .output() + .expect("run 8v write --append"); + + assert!( + out.status.success(), + "--append on CRLF file without trailing newline must exit 0\nstderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + + let result = std::fs::read(&file).unwrap(); + assert_eq!( + result, b"line1\r\nline2\r\nno_trailing\r\nappended\r\n", + "append to CRLF-without-trailing-newline must produce pure CRLF; got bytes: {:?}", + result + ); +} + +/// PR#4-R2: After appending to a CRLF file without trailing newline the file +/// ends up with mixed endings (due to bug above). A subsequent `8v write` +/// on that file must still succeed -- today it is rejected by validate_line_endings. +#[test] +fn append_then_subsequent_write_succeeds_on_crlf_file_without_trailing_newline() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("crlf_no_trail2.txt"); + std::fs::write(&file, b"line1\r\nline2\r\nno_trailing").unwrap(); + + // First op: append. + let out1 = bin_in(tmp.path()) + .args(["write", "crlf_no_trail2.txt", "--append", "appended"]) + .output() + .expect("run 8v write --append"); + assert!( + out1.status.success(), + "first append must exit 0\nstderr: {}", + String::from_utf8_lossy(&out1.stderr) + ); + + // Second op: replace line 1 -- must not be rejected due to mixed endings + // left behind by the first (buggy) append. + let out2 = bin_in(tmp.path()) + .args(["write", "crlf_no_trail2.txt:1", "replaced"]) + .output() + .expect("run second 8v write"); + + assert!( + out2.status.success(), + "second write after append must exit 0; file likely has mixed endings\nstdout: {}\nstderr: {}", + String::from_utf8_lossy(&out2.stdout), + String::from_utf8_lossy(&out2.stderr) + ); +} + +/// PR#4-R3: Appending to an already-mixed file must be REJECTED. +/// Today `--append` skips validate_line_endings on the existing file content, +/// so it silently appends to a mixed file. +#[test] +fn append_on_pre_existing_mixed_ending_file_is_rejected() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("mixed.txt"); + // Deliberately mixed: first line CRLF, second LF, third CRLF. + std::fs::write(&file, b"line1\r\nline2\nline3\r\n").unwrap(); + + let out = bin_in(tmp.path()) + .args(["write", "mixed.txt", "--append", "x"]) + .output() + .expect("run 8v write --append"); + + assert!( + !out.status.success(), + "append on mixed-ending file must fail\nstdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + let combined = format!( + "{}{}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert!( + combined.to_lowercase().contains("mixed"), + "error must mention mixed line endings; got: {combined}" + ); + // File must be untouched. + assert_eq!( + std::fs::read(&file).unwrap(), + b"line1\r\nline2\nline3\r\n", + "file must be unchanged on rejected append" + ); +} + +/// PR#4-R4: Pin current behaviour for append with empty content on a CRLF +/// file that lacks a trailing newline. Empty content is already rejected +/// globally before any line-ending detection; this ensures that stays true. +#[test] +fn append_empty_content_on_crlf_file_without_trailing_newline() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("crlf_no_trail3.txt"); + std::fs::write(&file, b"line1\r\nline2").unwrap(); + + let out = bin_in(tmp.path()) + .args(["write", "crlf_no_trail3.txt", "--append", ""]) + .output() + .expect("run 8v write --append"); + + // Empty content is rejected before any line-ending processing. + assert!( + !out.status.success(), + "empty append must be rejected\nstdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + // File must be untouched. + assert_eq!( + std::fs::read(&file).unwrap(), + b"line1\r\nline2", + "file must be unchanged on rejected empty append" + ); +} + +/// PR#4-R5: Boundary case -- 2-byte CRLF-only file (b"\r\n"). +/// `is_crlf` checks whether the last 2 bytes are b"\r\n"; this is that +/// exact boundary -- the entire file is one CRLF sequence. +#[test] +fn append_to_two_byte_crlf_file() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("two_byte_crlf.txt"); + std::fs::write(&file, b"\r\n").unwrap(); + + let out = bin_in(tmp.path()) + .args(["write", "two_byte_crlf.txt", "--append", "x"]) + .output() + .expect("run 8v write --append"); + + assert!( + out.status.success(), + "--append on 2-byte CRLF file must exit 0\nstderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + + let result = std::fs::read(&file).unwrap(); + assert_eq!( + result, b"\r\nx\r\n", + "append to 2-byte CRLF file must produce pure CRLF; got: {:?}", + result + ); +} + +/// PR#4-R6: Boundary case -- single lone-CR byte (b"\r"). +/// This is an invalid file; pin that the tool exits cleanly (no crash/abort). +#[test] +fn append_to_one_byte_lone_cr_file() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("lone_cr.txt"); + std::fs::write(&file, b"\r").unwrap(); + + let out = bin_in(tmp.path()) + .args(["write", "lone_cr.txt", "--append", "x"]) + .output() + .expect("run 8v write --append"); + + // A lone-CR file is already invalid. The tool may reject it or fall back + // to LF. Either outcome is acceptable. What must NOT happen is a panic or + // unclean exit (signal). Pin that the process terminates normally. + assert!( + out.status.code().is_some(), + "process must exit cleanly (no signal/abort); status: {:?}", + out.status + ); +} + +/// PR#4-R7: Empty file -- 0 bytes. No content means no line-ending detection +/// possible; must default to LF and produce b"x\n". +#[test] +fn append_to_empty_file() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("empty_append.txt"); + std::fs::write(&file, b"").unwrap(); + + let out = bin_in(tmp.path()) + .args(["write", "empty_append.txt", "--append", "x"]) + .output() + .expect("run 8v write --append"); + + assert!( + out.status.success(), + "--append on empty file must exit 0\nstderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + + let result = std::fs::read(&file).unwrap(); + assert_eq!( + result, b"x\n", + "append to empty file must default to LF; got: {:?}", + result + ); +} + +/// PR#4-R8: find-replace with a newline in the find pattern on a CRLF file +/// when there is NO match. The operation must fail; the error must reference +/// the user's original pattern, NOT the internally-normalised CRLF form. +#[test] +fn find_replace_with_newline_in_find_zero_matches_on_crlf_file() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("crlf_find.txt"); + std::fs::write(&file, b"foo\r\nbar\r\n").unwrap(); + + // Pattern "baz\nqux" (LF) cannot match anything in this file. + let out = bin_in(tmp.path()) + .args([ + "write", + "crlf_find.txt", + "--find", + "baz\nqux", + "--replace", + "x", + ]) + .output() + .expect("run 8v write"); + + assert!( + !out.status.success(), + "no-match find must fail\nstdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + let combined = format!( + "{}{}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + // The error must NOT expose the internally-normalised "baz\r\nqux" form. + assert!( + !combined.contains("baz\r\nqux"), + "error must not expose internal CRLF-normalised pattern; got: {combined}" + ); + // File must be untouched. + assert_eq!(std::fs::read(&file).unwrap(), b"foo\r\nbar\r\n"); +} + +/// PR#4-R9: find-replace with a newline in the find pattern on a CRLF file +/// when the pattern DOES match after CRLF-normalisation ("foo\nbar" -> "foo\r\nbar"). +/// If normalisation is implemented the result must be pure CRLF; if not yet +/// implemented the operation must fail with "no matches found" (not crash). +#[test] +fn find_replace_with_newline_in_find_matches_on_crlf_file() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("crlf_find2.txt"); + std::fs::write(&file, b"foo\r\nbar\r\n").unwrap(); + + // "foo\nbar" should match "foo\r\nbar" after normalisation. + let out = bin_in(tmp.path()) + .args([ + "write", + "crlf_find2.txt", + "--find", + "foo\nbar", + "--replace", + "x", + ]) + .output() + .expect("run 8v write"); + + let result = std::fs::read(&file).unwrap(); + + if out.status.success() { + // Normalisation implemented: result must be pure CRLF. + assert_eq!( + result, b"x\r\n", + "find-replace across CRLF boundary must produce pure CRLF; got: {:?}", + result + ); + } else { + // Normalisation not yet implemented: must fail with no-match error. + let combined = format!( + "{}{}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert!( + combined.contains("no matches found"), + "failure must state 'no matches found'; got: {combined}" + ); + assert_eq!( + result, b"foo\r\nbar\r\n", + "file must be unchanged on no-match" + ); + } +}