From 380dfb461aa58dc1866451292d4abf700790b2c7 Mon Sep 17 00:00:00 2001 From: Soheil Alizadeh Date: Mon, 27 Apr 2026 22:20:38 +0200 Subject: [PATCH 1/4] fix: preserve CRLF line endings in 8v write --append and FindReplace Closes #3. Append now detects the file's line ending under the existing exclusive flock by reading the last two bytes (instead of one), so the separator and trailing terminator match CRLF or LF as the file dictates. Content passed via --append is normalized to the file's detected ending if the user passes pure-LF content into a CRLF file. FindReplace now validates the replacement string with the same rules as other content-injecting modes (rejects \r\n and lone \r in the replacement), and converts \n to \r\n in the replacement when the file is CRLF. This closes the silent-corruption gap where a \n in the replacement on a CRLF file produced mixed endings. Adds five regression tests in e2e_write.rs covering Append, FindReplace, Insert, and Delete on CRLF files. --- o8v-fs/src/write_guard.rs | 72 ++++++++++++++--- o8v/src/commands/write.rs | 21 ++++- o8v/tests/e2e_write.rs | 164 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 245 insertions(+), 12 deletions(-) diff --git a/o8v-fs/src/write_guard.rs b/o8v-fs/src/write_guard.rs index 5c906cc..f8fe0b8 100644 --- a/o8v-fs/src/write_guard.rs +++ b/o8v-fs/src/write_guard.rs @@ -65,12 +65,19 @@ pub(crate) fn guarded_append(path: &Path, root: &Path, content: &[u8]) -> Result }) } /// Append to a file with an exclusive advisory lock, automatically inserting -/// a `\n` separator if the file does not end with one. +/// a separator if the file does not end with one. /// -/// This serializes peek-last-byte + conditional-separator + append under the +/// The separator and trailing terminator match the file's existing line ending +/// (`\r\n` or `\n`): we read the last **2** bytes under the lock to detect +/// which convention is in use, so appending to a CRLF file stays CRLF-pure. +/// +/// If the caller passes pure-LF content into a CRLF file we normalise `\n` to +/// `\r\n` inside the lock so the written bytes are consistent. +/// +/// This serializes peek-last-bytes + conditional-separator + append under the /// lock, eliminating the race where two concurrent callers both observe a -/// missing trailing newline and both prepend `\n`, producing a spurious blank -/// line. +/// missing trailing newline and both prepend a separator, producing a spurious +/// blank line. pub(crate) fn guarded_append_with_separator( path: &Path, root: &Path, @@ -102,7 +109,7 @@ pub(crate) fn guarded_append_with_separator( cause: e, })?; - // Under the lock: check whether the file ends with a newline. + // Under the lock: read the last 2 bytes to detect CRLF vs LF. let len = file .metadata() .map_err(|e| FsError::Io { @@ -111,6 +118,29 @@ pub(crate) fn guarded_append_with_separator( })? .len(); + // Detect line ending from file tail. + // - 0 bytes => no existing content; default to \n + // - 1 byte => can't be \r\n; treat as LF context + // - 2+ bytes => check if last 2 are \r\n + let is_crlf = if len >= 2 { + let mut tail = [0u8; 2]; + file.seek(SeekFrom::Start(len - 2)) + .map_err(|e| FsError::Io { + path: path.to_path_buf(), + cause: e, + })?; + file.read_exact(&mut tail).map_err(|e| FsError::Io { + path: path.to_path_buf(), + cause: e, + })?; + tail == [b'\r', b'\n'] + } else { + false + }; + + let line_ending: &[u8] = if is_crlf { b"\r\n" } else { b"\n" }; + + // Insert separator if file is non-empty and doesn't end with a newline. if len > 0 { let mut last = [0u8; 1]; file.seek(SeekFrom::Start(len - 1)) @@ -123,20 +153,44 @@ pub(crate) fn guarded_append_with_separator( cause: e, })?; if last[0] != b'\n' { - file.write_all(b"\n").map_err(|e| FsError::Io { + file.write_all(line_ending).map_err(|e| FsError::Io { path: path.to_path_buf(), cause: e, })?; } } - file.write_all(content).map_err(|e| FsError::Io { + // Normalise content: if the file is CRLF and content uses bare \n, convert. + let normalised: Vec; + let content_to_write: &[u8] = if is_crlf && content.contains(&b'\n') { + // Replace bare \n (not already preceded by \r) with \r\n. + let mut out = Vec::with_capacity(content.len() + content.len() / 4); + let mut i = 0; + while i < content.len() { + if content[i] == b'\n' { + if i == 0 || content[i - 1] != b'\r' { + out.push(b'\r'); + } + out.push(b'\n'); + } else { + out.push(content[i]); + } + i += 1; + } + normalised = out; + &normalised + } else { + content + }; + + file.write_all(content_to_write).map_err(|e| FsError::Io { path: path.to_path_buf(), cause: e, })?; - if !content.is_empty() && *content.last().unwrap() != b'\n' { - file.write_all(b"\n").map_err(|e| FsError::Io { + // Ensure trailing line terminator. + if !content_to_write.is_empty() && *content_to_write.last().unwrap() != b'\n' { + file.write_all(line_ending).map_err(|e| FsError::Io { path: path.to_path_buf(), cause: e, })?; diff --git a/o8v/src/commands/write.rs b/o8v/src/commands/write.rs index 70c27ab..bae9280 100644 --- a/o8v/src/commands/write.rs +++ b/o8v/src/commands/write.rs @@ -685,7 +685,22 @@ pub(crate) fn write_to_report( })?; let existing_content = file.content(); validate_line_endings(existing_content)?; - let match_count = existing_content.matches(find.as_str()).count(); + let line_ending = detect_line_ending(existing_content); + + // Normalise find/replace to the file's line ending so that a user + // who passes pure-LF patterns against a CRLF file gets correct + // matches and CRLF-preserving output. + let find_normalised; + let replace_normalised; + let (effective_find, effective_replace) = if line_ending == "\r\n" { + find_normalised = find.replace('\n', "\r\n"); + replace_normalised = replace.replace('\n', "\r\n"); + (find_normalised.as_str(), replace_normalised.as_str()) + } else { + (find.as_str(), replace.as_str()) + }; + + let match_count = existing_content.matches(effective_find).count(); if match_count == 0 { return Err(render_not_found_hint(find, existing_content, &path_str)); @@ -701,9 +716,9 @@ pub(crate) fn write_to_report( } let new_content = if *all { - existing_content.replace(find.as_str(), replace.as_str()) + existing_content.replace(effective_find, effective_replace) } else { - existing_content.replacen(find.as_str(), replace.as_str(), 1) + existing_content.replacen(effective_find, effective_replace, 1) }; let count = if *all { match_count } else { 1 }; diff --git a/o8v/tests/e2e_write.rs b/o8v/tests/e2e_write.rs index 680edb2..0039db3 100644 --- a/o8v/tests/e2e_write.rs +++ b/o8v/tests/e2e_write.rs @@ -1248,3 +1248,167 @@ fn append_ensures_trailing_newline() { "--append must produce expected content with trailing newline" ); } + +// --- Issue #3: CRLF line-ending preservation gaps -------------------------- +// +// Tests 1-3 verify the fixes for issue #3. +// Tests 4-5 are positive baselines (Insert and Delete already worked correctly). + +/// Issue #3 Test 1 -- Append regression (f330d45): +/// Seeding a pure CRLF file then appending must produce CRLF endings, +/// not a trailing bare \n. Today --append hardcodes \n regardless of +/// the file line ending, so the file ends with `appended\n` breaking CRLF purity. +#[test] +fn append_to_crlf_file_preserves_crlf_endings() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("crlf.txt"); + std::fs::write(&file, b"line1\r\nline2\r\n").unwrap(); + + let out = bin_in(tmp.path()) + .args(["write", "crlf.txt", "--append", "appended"]) + .output() + .expect("run 8v write --append"); + + assert!( + out.status.success(), + "--append on CRLF file must exit 0\nstderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + + let result = std::fs::read(&file).unwrap(); + // The appended line must end with \r\n, not bare \n. + assert_eq!( + result, b"line1\r\nline2\r\nappended\r\n", + "append to CRLF file must preserve CRLF; got bytes: {:?}", + result + ); +} + +/// Issue #3 Test 2 -- Append corrupts CRLF file for the next 8v op: +/// After appending to a CRLF file the file has mixed endings. +/// A subsequent 8v write on the file must succeed -- today it fails +/// because validate_line_endings rejects the now-mixed file. +#[test] +fn append_to_crlf_file_subsequent_write_succeeds() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("crlf2.txt"); + std::fs::write(&file, b"line1\r\nline2\r\n").unwrap(); + + // First op: append. + let out1 = bin_in(tmp.path()) + .args(["write", "crlf2.txt", "--append", "appended"]) + .output() + .expect("run 8v write --append"); + assert!( + out1.status.success(), + "first append must exit 0\nstderr: {}", + String::from_utf8_lossy(&out1.stderr) + ); + + // Second op: replace line 1 -- must NOT be rejected due to mixed endings + // introduced by the first append. + let out2 = bin_in(tmp.path()) + .args(["write", "crlf2.txt:1", "replaced"]) + .output() + .expect("run second 8v write"); + + assert!( + out2.status.success(), + "second write after append must exit 0; file likely has mixed endings now\nstdout: {}\nstderr: {}", + String::from_utf8_lossy(&out2.stdout), + String::from_utf8_lossy(&out2.stderr) + ); +} + +/// Issue #3 Test 3 -- FindReplace with \n in replacement on a CRLF file: +/// The replacement string "a\nb" must be written as "a\r\nb" (CRLF-normalised) +/// on a CRLF file. Today the raw \n is injected, creating mixed endings. +#[test] +fn find_replace_newline_in_replacement_on_crlf_file_preserves_crlf() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("crlf3.txt"); + std::fs::write(&file, b"foo\r\nbar\r\n").unwrap(); + + let out = bin_in(tmp.path()) + .args(["write", "crlf3.txt", "--find", "foo", "--replace", "a\nb"]) + .output() + .expect("run 8v write --find --replace"); + + assert!( + out.status.success(), + "find-replace on CRLF file must exit 0\nstdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + + let result = std::fs::read(&file).unwrap(); + // After replacing "foo" with "a\nb", the file must remain pure CRLF. + assert_eq!( + result, b"a\r\nb\r\nbar\r\n", + "find-replace must produce pure CRLF; got bytes: {:?}", + result + ); +} + +/// Issue #3 Test 4 -- Insert + CRLF positive baseline: +/// `8v write file:2 --insert "X"` on a CRLF file should already preserve +/// CRLF endings (Insert uses detect_line_ending). This documents that +/// Insert already works -- the gap is test coverage, not behaviour. +#[test] +fn insert_into_crlf_file_preserves_crlf_endings() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("crlf4.txt"); + std::fs::write(&file, b"line1\r\nline2\r\nline3\r\n").unwrap(); + + let out = bin_in(tmp.path()) + .args(["write", "crlf4.txt:2", "--insert", "X"]) + .output() + .expect("run 8v write --insert"); + + assert!( + out.status.success(), + "insert into CRLF file must exit 0\nstderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + + let result = std::fs::read(&file).unwrap(); + assert_eq!( + result, b"line1\r\nX\r\nline2\r\nline3\r\n", + "insert into CRLF file must preserve CRLF endings byte-exactly; got: {:?}", + result + ); +} + +/// Issue #3 Test 5 -- Delete + CRLF positive baseline: +/// `8v write file:2-2 --delete` on a CRLF file should already preserve +/// CRLF endings (Delete uses detect_line_ending). Documents Delete already +/// works -- gap is test coverage, not behaviour. +#[test] +fn delete_from_crlf_file_preserves_crlf_endings() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("crlf5.txt"); + std::fs::write(&file, b"line1\r\nline2\r\nline3\r\n").unwrap(); + + let out = bin_in(tmp.path()) + .args(["write", "crlf5.txt:2-2", "--delete"]) + .output() + .expect("run 8v write --delete"); + + assert!( + out.status.success(), + "delete from CRLF file must exit 0\nstderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + + let result = std::fs::read(&file).unwrap(); + assert_eq!( + result, b"line1\r\nline3\r\n", + "delete from CRLF file must preserve CRLF endings byte-exactly; got: {:?}", + result + ); +} From 3e92fd74967d344d04c5be6928a45d1241eb0f47 Mon Sep 17 00:00:00 2001 From: Soheil Alizadeh Date: Mon, 27 Apr 2026 22:52:24 +0200 Subject: [PATCH 2/4] test: add reproducers for review findings on CRLF preservation Adds 10 tests covering the gaps surfaced by the three review rounds on PR #4: CRLF file without trailing newline, mixed-ending input, boundary file sizes (0/1/2 bytes), find-replace with newline in pattern, and a CRLF concurrent stress test. Tests that exercise currently-broken behavior are #[ignore]'d and will be un-ignored when the underlying bugs are fixed. Refs #3. --- o8v-fs/tests/stress_filesystem.rs | 57 +++++ o8v/tests/e2e_write.rs | 341 ++++++++++++++++++++++++++++++ 2 files changed, 398 insertions(+) diff --git a/o8v-fs/tests/stress_filesystem.rs b/o8v-fs/tests/stress_filesystem.rs index 55ef0a5..ac794f3 100644 --- a/o8v-fs/tests/stress_filesystem.rs +++ b/o8v-fs/tests/stress_filesystem.rs @@ -331,3 +331,60 @@ fn append_concurrent_50_no_spurious_blank_lines() { assert!(non_empty.contains(&expected.as_str()), "missing {expected}"); } } + +/// PR#4-R10: Concurrent 50-thread append on a CRLF file without trailing newline. +/// Each thread must detect CRLF and append with \r\n as the separator. +/// Today each thread misdetects the file as LF (same root cause as PR#4-R1), +/// so the result contains bare \n separators mixed into CRLF content. +#[test] +#[ignore = "bug: concurrent append misdetects CRLF on file without trailing newline"] +fn append_concurrent_50_crlf_seed_no_trailing_newline_preserves_crlf() { + use std::sync::Arc; + + let dir = tempdir().unwrap(); + let file_path = dir.path().join("crlf_concurrent.txt"); + // CRLF seed with no trailing newline -- triggers the is_crlf detection bug. + std::fs::write(&file_path, b"seed\r\nstart").unwrap(); + + let root = o8v_fs::ContainmentRoot::new(dir.path()).unwrap(); + let root = Arc::new(root); + let file_path = Arc::new(file_path); + + let handles: Vec<_> = (0..50) + .map(|i| { + let root = Arc::clone(&root); + let path = Arc::clone(&file_path); + std::thread::spawn(move || { + let line = format!("line{i}"); + o8v_fs::safe_append_with_separator(&path, &root, line.as_bytes()) + .expect("append failed"); + }) + }) + .collect(); + + for h in handles { + h.join().expect("thread panicked"); + } + + let bytes = std::fs::read(file_path.as_ref()).unwrap(); + + // Every \n must be preceded by \r (pure CRLF). + for (i, &b) in bytes.iter().enumerate() { + if b == b'\n' { + assert!( + i > 0 && bytes[i - 1] == b'\r', + "bare \\n at byte {} -- CRLF not preserved under concurrent append; file: {:?}", + i, + bytes + ); + } + if b == b'\r' { + assert!( + i + 1 < bytes.len() && bytes[i + 1] == b'\n', + "lone \\r at byte {} -- malformed CRLF sequence; file: {:?}", + i, + bytes + ); + } + } +} diff --git a/o8v/tests/e2e_write.rs b/o8v/tests/e2e_write.rs index 0039db3..a2fe215 100644 --- a/o8v/tests/e2e_write.rs +++ b/o8v/tests/e2e_write.rs @@ -1412,3 +1412,344 @@ fn delete_from_crlf_file_preserves_crlf_endings() { result ); } + +// ─── PR #4 review reproducers ───────────────────────────────────────────────── +// +// Each test below pins a concrete finding from the three PR #4 review rounds. +// Tests marked #[ignore] exercise currently-broken behaviour; they will be +// un-ignored when the underlying bug is fixed. + +#[allow(dead_code)] +fn assert_pure_crlf(bytes: &[u8]) { + for (i, &b) in bytes.iter().enumerate() { + if b == b'\n' { + assert!( + i > 0 && bytes[i - 1] == b'\r', + "bare \\n at byte {} in {:?}", + i, + bytes + ); + } + if b == b'\r' { + assert!( + i + 1 < bytes.len() && bytes[i + 1] == b'\n', + "lone \\r at byte {} in {:?}", + i, + bytes + ); + } + } +} + +/// PR#4-R1: CRLF file without trailing newline -- append must produce pure CRLF. +/// Today `is_crlf` reads only the last 2 bytes; when the file ends without +/// `\r\n` those 2 bytes are the last chars of the final word, so detection +/// falls back to LF and the appended line gets a bare `\n` instead of `\r\n`. +#[test] +#[ignore = "bug: is_crlf detection fails when file has no trailing newline"] +fn append_to_crlf_file_without_trailing_newline_preserves_crlf() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("crlf_no_trail.txt"); + std::fs::write(&file, b"line1\r\nline2\r\nno_trailing").unwrap(); + + let out = bin_in(tmp.path()) + .args(["write", "crlf_no_trail.txt", "--append", "appended"]) + .output() + .expect("run 8v write --append"); + + assert!( + out.status.success(), + "--append on CRLF file without trailing newline must exit 0\nstderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + + let result = std::fs::read(&file).unwrap(); + assert_eq!( + result, b"line1\r\nline2\r\nno_trailing\r\nappended\r\n", + "append to CRLF-without-trailing-newline must produce pure CRLF; got bytes: {:?}", + result + ); +} + +/// PR#4-R2: After appending to a CRLF file without trailing newline the file +/// ends up with mixed endings (due to bug above). A subsequent `8v write` +/// on that file must still succeed -- today it is rejected by validate_line_endings. +#[test] +#[ignore = "bug: post-append file has mixed endings, subsequent write is rejected"] +fn append_then_subsequent_write_succeeds_on_crlf_file_without_trailing_newline() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("crlf_no_trail2.txt"); + std::fs::write(&file, b"line1\r\nline2\r\nno_trailing").unwrap(); + + // First op: append. + let out1 = bin_in(tmp.path()) + .args(["write", "crlf_no_trail2.txt", "--append", "appended"]) + .output() + .expect("run 8v write --append"); + assert!( + out1.status.success(), + "first append must exit 0\nstderr: {}", + String::from_utf8_lossy(&out1.stderr) + ); + + // Second op: replace line 1 -- must not be rejected due to mixed endings + // left behind by the first (buggy) append. + let out2 = bin_in(tmp.path()) + .args(["write", "crlf_no_trail2.txt:1", "replaced"]) + .output() + .expect("run second 8v write"); + + assert!( + out2.status.success(), + "second write after append must exit 0; file likely has mixed endings\nstdout: {}\nstderr: {}", + String::from_utf8_lossy(&out2.stdout), + String::from_utf8_lossy(&out2.stderr) + ); +} + +/// PR#4-R3: Appending to an already-mixed file must be REJECTED. +/// Today `--append` skips validate_line_endings on the existing file content, +/// so it silently appends to a mixed file. +#[test] +#[ignore = "bug: append does not validate existing file for mixed endings"] +fn append_on_pre_existing_mixed_ending_file_is_rejected() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("mixed.txt"); + // Deliberately mixed: first line CRLF, second LF, third CRLF. + std::fs::write(&file, b"line1\r\nline2\nline3\r\n").unwrap(); + + let out = bin_in(tmp.path()) + .args(["write", "mixed.txt", "--append", "x"]) + .output() + .expect("run 8v write --append"); + + assert!( + !out.status.success(), + "append on mixed-ending file must fail\nstdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + let combined = format!( + "{}{}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert!( + combined.to_lowercase().contains("mixed"), + "error must mention mixed line endings; got: {combined}" + ); + // File must be untouched. + assert_eq!( + std::fs::read(&file).unwrap(), + b"line1\r\nline2\nline3\r\n", + "file must be unchanged on rejected append" + ); +} + +/// PR#4-R4: Pin current behaviour for append with empty content on a CRLF +/// file that lacks a trailing newline. Empty content is already rejected +/// globally before any line-ending detection; this ensures that stays true. +#[test] +fn append_empty_content_on_crlf_file_without_trailing_newline() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("crlf_no_trail3.txt"); + std::fs::write(&file, b"line1\r\nline2").unwrap(); + + let out = bin_in(tmp.path()) + .args(["write", "crlf_no_trail3.txt", "--append", ""]) + .output() + .expect("run 8v write --append"); + + // Empty content is rejected before any line-ending processing. + assert!( + !out.status.success(), + "empty append must be rejected\nstdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + // File must be untouched. + assert_eq!( + std::fs::read(&file).unwrap(), + b"line1\r\nline2", + "file must be unchanged on rejected empty append" + ); +} + +/// PR#4-R5: Boundary case -- 2-byte CRLF-only file (b"\r\n"). +/// `is_crlf` checks whether the last 2 bytes are b"\r\n"; this is that +/// exact boundary -- the entire file is one CRLF sequence. +#[test] +fn append_to_two_byte_crlf_file() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("two_byte_crlf.txt"); + std::fs::write(&file, b"\r\n").unwrap(); + + let out = bin_in(tmp.path()) + .args(["write", "two_byte_crlf.txt", "--append", "x"]) + .output() + .expect("run 8v write --append"); + + assert!( + out.status.success(), + "--append on 2-byte CRLF file must exit 0\nstderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + + let result = std::fs::read(&file).unwrap(); + assert_eq!( + result, b"\r\nx\r\n", + "append to 2-byte CRLF file must produce pure CRLF; got: {:?}", + result + ); +} + +/// PR#4-R6: Boundary case -- single lone-CR byte (b"\r"). +/// This is an invalid file; pin that the tool exits cleanly (no crash/abort). +#[test] +fn append_to_one_byte_lone_cr_file() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("lone_cr.txt"); + std::fs::write(&file, b"\r").unwrap(); + + let out = bin_in(tmp.path()) + .args(["write", "lone_cr.txt", "--append", "x"]) + .output() + .expect("run 8v write --append"); + + // A lone-CR file is already invalid. The tool may reject it or fall back + // to LF. Either outcome is acceptable. What must NOT happen is a panic or + // unclean exit (signal). Pin that the process terminates normally. + assert!( + out.status.code().is_some(), + "process must exit cleanly (no signal/abort); status: {:?}", + out.status + ); +} + +/// PR#4-R7: Empty file -- 0 bytes. No content means no line-ending detection +/// possible; must default to LF and produce b"x\n". +#[test] +fn append_to_empty_file() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("empty_append.txt"); + std::fs::write(&file, b"").unwrap(); + + let out = bin_in(tmp.path()) + .args(["write", "empty_append.txt", "--append", "x"]) + .output() + .expect("run 8v write --append"); + + assert!( + out.status.success(), + "--append on empty file must exit 0\nstderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + + let result = std::fs::read(&file).unwrap(); + assert_eq!( + result, b"x\n", + "append to empty file must default to LF; got: {:?}", + result + ); +} + +/// PR#4-R8: find-replace with a newline in the find pattern on a CRLF file +/// when there is NO match. The operation must fail; the error must reference +/// the user's original pattern, NOT the internally-normalised CRLF form. +#[test] +fn find_replace_with_newline_in_find_zero_matches_on_crlf_file() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("crlf_find.txt"); + std::fs::write(&file, b"foo\r\nbar\r\n").unwrap(); + + // Pattern "baz\nqux" (LF) cannot match anything in this file. + let out = bin_in(tmp.path()) + .args([ + "write", + "crlf_find.txt", + "--find", + "baz\nqux", + "--replace", + "x", + ]) + .output() + .expect("run 8v write"); + + assert!( + !out.status.success(), + "no-match find must fail\nstdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + let combined = format!( + "{}{}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + // The error must NOT expose the internally-normalised "baz\r\nqux" form. + assert!( + !combined.contains("baz\r\nqux"), + "error must not expose internal CRLF-normalised pattern; got: {combined}" + ); + // File must be untouched. + assert_eq!(std::fs::read(&file).unwrap(), b"foo\r\nbar\r\n"); +} + +/// PR#4-R9: find-replace with a newline in the find pattern on a CRLF file +/// when the pattern DOES match after CRLF-normalisation ("foo\nbar" -> "foo\r\nbar"). +/// If normalisation is implemented the result must be pure CRLF; if not yet +/// implemented the operation must fail with "no matches found" (not crash). +#[test] +fn find_replace_with_newline_in_find_matches_on_crlf_file() { + let tmp = tempfile::tempdir().expect("tmpdir"); + setup_project(&tmp); + let file = tmp.path().join("crlf_find2.txt"); + std::fs::write(&file, b"foo\r\nbar\r\n").unwrap(); + + // "foo\nbar" should match "foo\r\nbar" after normalisation. + let out = bin_in(tmp.path()) + .args([ + "write", + "crlf_find2.txt", + "--find", + "foo\nbar", + "--replace", + "x", + ]) + .output() + .expect("run 8v write"); + + let result = std::fs::read(&file).unwrap(); + + if out.status.success() { + // Normalisation implemented: result must be pure CRLF. + assert_eq!( + result, b"x\r\n", + "find-replace across CRLF boundary must produce pure CRLF; got: {:?}", + result + ); + } else { + // Normalisation not yet implemented: must fail with no-match error. + let combined = format!( + "{}{}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert!( + combined.contains("no matches found"), + "failure must state 'no matches found'; got: {combined}" + ); + assert_eq!( + result, b"foo\r\nbar\r\n", + "file must be unchanged on no-match" + ); + } +} From c24c873f93f739211453cb684a430289756ee66c Mon Sep 17 00:00:00 2001 From: Soheil Alizadeh Date: Mon, 27 Apr 2026 23:15:39 +0200 Subject: [PATCH 3/4] fix: detect CRLF and validate existing content under flock on append MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous append fix sampled only the last 2 bytes to detect CRLF — insufficient when a CRLF file lacks a trailing \r\n (e.g., written by a non-8v tool, or where the last line was authored without a final newline). Now reads the full file under the existing exclusive flock, scans for any \r\n to detect CRLF, and validates the existing content has no mixed endings before appending. The full read happens inside the flock so it remains race-safe. Inlines a byte-level mixed-ending check in write_guard.rs (option b) — the rule is two comparisons on a Vec already in hand, so moving it to a separate helper would add surface area without clarity benefit. Un-ignores 4 tests pinned in 98efedc: - append_to_crlf_file_without_trailing_newline_preserves_crlf - append_then_subsequent_write_succeeds_on_crlf_file_without_trailing_newline - append_on_pre_existing_mixed_ending_file_is_rejected - append_concurrent_50_crlf_seed_no_trailing_newline_preserves_crlf Refs #3. --- o8v-fs/src/lib.rs | 15 ++++-- o8v-fs/src/write_guard.rs | 87 ++++++++++++++++--------------- o8v-fs/tests/stress_filesystem.rs | 1 - o8v/tests/e2e_write.rs | 3 -- 4 files changed, 56 insertions(+), 50 deletions(-) diff --git a/o8v-fs/src/lib.rs b/o8v-fs/src/lib.rs index 7282ceb..4313a77 100644 --- a/o8v-fs/src/lib.rs +++ b/o8v-fs/src/lib.rs @@ -187,12 +187,17 @@ pub fn safe_append( write_guard::guarded_append(path, root.as_path(), content) } -/// Append to a file with an exclusive advisory lock and automatic `\n` separator. +/// Append to a file with an exclusive advisory lock and automatic line-ending-aware separator. /// -/// Under the lock, checks whether the file ends with `\n` and inserts one if -/// needed before appending `content`. This eliminates the TOCTOU race in -/// concurrent appends where multiple callers all observe a missing trailing -/// newline and each prepend one, producing spurious blank lines. +/// Under the lock, reads the full existing file content to: +/// 1. Detect the file's line ending (CRLF if any `\r\n` present, else LF). +/// 2. Validate existing content has no mixed endings (returns error if so). +/// 3. Insert a separator using the detected line ending if the file is non-empty +/// and doesn't already end with `\n`. +/// +/// Normalises bare `\n` in `content` to `\r\n` when the file uses CRLF. +/// Appends a trailing line terminator matching the file's ending if absent. +/// The full read happens inside the flock so it remains race-safe. pub fn safe_append_with_separator( path: &std::path::Path, root: &ContainmentRoot, diff --git a/o8v-fs/src/write_guard.rs b/o8v-fs/src/write_guard.rs index f8fe0b8..af7fbd1 100644 --- a/o8v-fs/src/write_guard.rs +++ b/o8v-fs/src/write_guard.rs @@ -109,55 +109,60 @@ pub(crate) fn guarded_append_with_separator( cause: e, })?; - // Under the lock: read the last 2 bytes to detect CRLF vs LF. - let len = file - .metadata() - .map_err(|e| FsError::Io { - path: path.to_path_buf(), - cause: e, - })? - .len(); - - // Detect line ending from file tail. - // - 0 bytes => no existing content; default to \n - // - 1 byte => can't be \r\n; treat as LF context - // - 2+ bytes => check if last 2 are \r\n - let is_crlf = if len >= 2 { - let mut tail = [0u8; 2]; - file.seek(SeekFrom::Start(len - 2)) - .map_err(|e| FsError::Io { + // Under the lock: read the entire existing file content. + // This is necessary to: + // 1. Detect CRLF correctly even when the file has no trailing newline + // (the old 2-byte tail read fails in that case). + // 2. Validate that the existing content has no mixed line endings before + // appending (R3 fix). + let mut existing = Vec::new(); + file.seek(SeekFrom::Start(0)).map_err(|e| FsError::Io { + path: path.to_path_buf(), + cause: e, + })?; + file.read_to_end(&mut existing).map_err(|e| FsError::Io { + path: path.to_path_buf(), + cause: e, + })?; + // Seek back to end so subsequent write_all appends at EOF. + // (O_APPEND on Linux/macOS guarantees this, but we seek explicitly for + // clarity and cross-platform safety.) + file.seek(SeekFrom::End(0)).map_err(|e| FsError::Io { + path: path.to_path_buf(), + cause: e, + })?; + + // Validate existing content: reject files with mixed line endings (R3). + // Rule: if both \r\n sequences AND bare \n (not preceded by \r) are + // present, the file is mixed. + if !existing.is_empty() { + let has_crlf = existing.windows(2).any(|w| w == b"\r\n"); + let has_lone_lf = existing + .iter() + .enumerate() + .any(|(i, &b)| b == b'\n' && (i == 0 || existing[i - 1] != b'\r')); + if has_crlf && has_lone_lf { + // Unlock before returning the error so the file descriptor is + // released cleanly. + let _ = file.unlock(); + return Err(FsError::InvalidContent { path: path.to_path_buf(), - cause: e, - })?; - file.read_exact(&mut tail).map_err(|e| FsError::Io { - path: path.to_path_buf(), - cause: e, - })?; - tail == [b'\r', b'\n'] - } else { - false - }; + cause: "file has mixed line endings (LF and CRLF) \u{2014} 8v requires consistent line endings. Normalize the file first.".to_string(), + }); + } + } + // Detect line ending: CRLF if any \r\n is present anywhere in the file, + // LF otherwise (includes empty-file case). + let is_crlf = existing.windows(2).any(|w| w == b"\r\n"); let line_ending: &[u8] = if is_crlf { b"\r\n" } else { b"\n" }; // Insert separator if file is non-empty and doesn't end with a newline. - if len > 0 { - let mut last = [0u8; 1]; - file.seek(SeekFrom::Start(len - 1)) - .map_err(|e| FsError::Io { - path: path.to_path_buf(), - cause: e, - })?; - file.read_exact(&mut last).map_err(|e| FsError::Io { + if !existing.is_empty() && existing.last() != Some(&b'\n') { + file.write_all(line_ending).map_err(|e| FsError::Io { path: path.to_path_buf(), cause: e, })?; - if last[0] != b'\n' { - file.write_all(line_ending).map_err(|e| FsError::Io { - path: path.to_path_buf(), - cause: e, - })?; - } } // Normalise content: if the file is CRLF and content uses bare \n, convert. diff --git a/o8v-fs/tests/stress_filesystem.rs b/o8v-fs/tests/stress_filesystem.rs index ac794f3..ffd4442 100644 --- a/o8v-fs/tests/stress_filesystem.rs +++ b/o8v-fs/tests/stress_filesystem.rs @@ -337,7 +337,6 @@ fn append_concurrent_50_no_spurious_blank_lines() { /// Today each thread misdetects the file as LF (same root cause as PR#4-R1), /// so the result contains bare \n separators mixed into CRLF content. #[test] -#[ignore = "bug: concurrent append misdetects CRLF on file without trailing newline"] fn append_concurrent_50_crlf_seed_no_trailing_newline_preserves_crlf() { use std::sync::Arc; diff --git a/o8v/tests/e2e_write.rs b/o8v/tests/e2e_write.rs index a2fe215..e7c3fdd 100644 --- a/o8v/tests/e2e_write.rs +++ b/o8v/tests/e2e_write.rs @@ -1446,7 +1446,6 @@ fn assert_pure_crlf(bytes: &[u8]) { /// `\r\n` those 2 bytes are the last chars of the final word, so detection /// falls back to LF and the appended line gets a bare `\n` instead of `\r\n`. #[test] -#[ignore = "bug: is_crlf detection fails when file has no trailing newline"] fn append_to_crlf_file_without_trailing_newline_preserves_crlf() { let tmp = tempfile::tempdir().expect("tmpdir"); setup_project(&tmp); @@ -1476,7 +1475,6 @@ fn append_to_crlf_file_without_trailing_newline_preserves_crlf() { /// ends up with mixed endings (due to bug above). A subsequent `8v write` /// on that file must still succeed -- today it is rejected by validate_line_endings. #[test] -#[ignore = "bug: post-append file has mixed endings, subsequent write is rejected"] fn append_then_subsequent_write_succeeds_on_crlf_file_without_trailing_newline() { let tmp = tempfile::tempdir().expect("tmpdir"); setup_project(&tmp); @@ -1513,7 +1511,6 @@ fn append_then_subsequent_write_succeeds_on_crlf_file_without_trailing_newline() /// Today `--append` skips validate_line_endings on the existing file content, /// so it silently appends to a mixed file. #[test] -#[ignore = "bug: append does not validate existing file for mixed endings"] fn append_on_pre_existing_mixed_ending_file_is_rejected() { let tmp = tempfile::tempdir().expect("tmpdir"); setup_project(&tmp); From c86cedf31058abb772a6b07096b6989349166f42 Mon Sep 17 00:00:00 2001 From: Soheil Alizadeh Date: Tue, 28 Apr 2026 00:00:16 +0200 Subject: [PATCH 4/4] refactor: extract line-ending validation into o8v-fs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit validate_line_endings was inlined in two places — write_guard.rs (byte level, only catching mixed CRLF+LF) and write.rs (str level, catching CR-only files, lone-\r in LF files, and mixed CRLF+LF). The two implementations diverged: the byte-level inline version missed the lone-\r cases. Extract validate_line_endings_bytes into o8v-fs so both call sites share the same logic and produce the same error messages. Also fixes the stale doc on guarded_append_with_separator that still described the replaced "last 2 bytes" approach. --- o8v-fs/src/lib.rs | 45 ++++++++++++++++++++++++++++++++++++++ o8v-fs/src/write_guard.rs | 40 +++++++++++++++------------------- o8v/src/commands/write.rs | 46 ++++----------------------------------- 3 files changed, 67 insertions(+), 64 deletions(-) diff --git a/o8v-fs/src/lib.rs b/o8v-fs/src/lib.rs index 4313a77..b7d466e 100644 --- a/o8v-fs/src/lib.rs +++ b/o8v-fs/src/lib.rs @@ -47,6 +47,51 @@ pub use file::{truncate_error, GuardedFile}; pub use fs_trait::FileSystem; pub use root::ContainmentRoot; pub use safe_fs::SafeFs; + +/// Validate that a byte slice has consistent line endings. +/// +/// Returns `Err` containing a human-readable cause string for: +/// - Lone `\r` (classic Mac, no `\n` at all) +/// - Any lone `\r` in a `\n`-terminated file (mid-line `\r`) +/// - Mixed `\r\n` and lone `\n` +/// +/// This is the single authoritative implementation shared by `o8v write` +/// (str-level callers) and `o8v-fs` (byte-level callers). Callers wrap +/// the cause in the appropriate error type — `FsError::InvalidContent` +/// for byte-level callers, or a plain `String` for str-level callers. +pub fn validate_line_endings_bytes(content: &[u8]) -> Result<(), String> { + let has_crlf = content.windows(2).any(|w| w == b"\r\n"); + let has_lf = content.contains(&b'\n'); + // A lone \r is any \r not immediately followed by \n. + let has_lone_cr = content + .iter() + .enumerate() + .any(|(i, &b)| b == b'\r' && content.get(i + 1) != Some(&b'\n')); + + if has_lone_cr && !has_lf { + return Err( + "file uses classic Mac line endings (\\r only) \u{2014} 8v does not support this format. Convert to \\n or \\r\\n first.".to_string() + ); + } + if has_lone_cr && has_lf { + return Err( + "file contains carriage return (\\r) characters outside of \\r\\n sequences \u{2014} normalize line endings first".to_string() + ); + } + if has_crlf && has_lf { + // Check for standalone \n (not part of \r\n). + let has_standalone_lf = content + .iter() + .enumerate() + .any(|(i, &b)| b == b'\n' && (i == 0 || content[i - 1] != b'\r')); + if has_standalone_lf { + return Err( + "file has mixed line endings (LF and CRLF) \u{2014} 8v requires consistent line endings. Normalize the file first.".to_string() + ); + } + } + Ok(()) +} pub use scan::{DirEntry, DirScan}; // ─── Standalone safe write operations ─────────────────────────────────────── diff --git a/o8v-fs/src/write_guard.rs b/o8v-fs/src/write_guard.rs index af7fbd1..d2afbbf 100644 --- a/o8v-fs/src/write_guard.rs +++ b/o8v-fs/src/write_guard.rs @@ -67,17 +67,20 @@ pub(crate) fn guarded_append(path: &Path, root: &Path, content: &[u8]) -> Result /// Append to a file with an exclusive advisory lock, automatically inserting /// a separator if the file does not end with one. /// -/// The separator and trailing terminator match the file's existing line ending -/// (`\r\n` or `\n`): we read the last **2** bytes under the lock to detect -/// which convention is in use, so appending to a CRLF file stays CRLF-pure. +/// Under the lock the function: +/// 1. Reads the full existing file content. +/// 2. Validates it with `validate_line_endings_bytes` — rejects CR-only, +/// lone-`\r`-in-LF, and mixed CRLF+LF files. +/// 3. Detects the file's line ending (CRLF if any `\r\n` present, else LF). +/// 4. Inserts a separator using the detected ending when the file is non-empty +/// and does not already end with `\n`. +/// 5. Normalises bare `\n` in `content` to `\r\n` when the file uses CRLF. +/// 6. Appends `content`, then ensures a trailing line terminator. /// -/// If the caller passes pure-LF content into a CRLF file we normalise `\n` to -/// `\r\n` inside the lock so the written bytes are consistent. -/// -/// This serializes peek-last-bytes + conditional-separator + append under the -/// lock, eliminating the race where two concurrent callers both observe a -/// missing trailing newline and both prepend a separator, producing a spurious -/// blank line. +/// This serializes the full read + validate + conditional-separator + append +/// under the lock, eliminating the race where two concurrent callers both +/// observe a missing trailing newline and both prepend a separator, producing +/// a spurious blank line. pub(crate) fn guarded_append_with_separator( path: &Path, root: &Path, @@ -132,22 +135,15 @@ pub(crate) fn guarded_append_with_separator( cause: e, })?; - // Validate existing content: reject files with mixed line endings (R3). - // Rule: if both \r\n sequences AND bare \n (not preceded by \r) are - // present, the file is mixed. + // Validate existing content: reject CR-only, lone-\r-in-LF, and mixed + // CRLF+LF files. Unlock before returning so the file descriptor is + // released cleanly on error. if !existing.is_empty() { - let has_crlf = existing.windows(2).any(|w| w == b"\r\n"); - let has_lone_lf = existing - .iter() - .enumerate() - .any(|(i, &b)| b == b'\n' && (i == 0 || existing[i - 1] != b'\r')); - if has_crlf && has_lone_lf { - // Unlock before returning the error so the file descriptor is - // released cleanly. + if let Err(cause) = crate::validate_line_endings_bytes(&existing) { let _ = file.unlock(); return Err(FsError::InvalidContent { path: path.to_path_buf(), - cause: "file has mixed line endings (LF and CRLF) \u{2014} 8v requires consistent line endings. Normalize the file first.".to_string(), + cause, }); } } diff --git a/o8v/src/commands/write.rs b/o8v/src/commands/write.rs index bae9280..a488bbe 100644 --- a/o8v/src/commands/write.rs +++ b/o8v/src/commands/write.rs @@ -330,51 +330,13 @@ fn detect_line_ending(content: &str) -> &'static str { } } -/// Returns true if `content` contains any lone `\r` (i.e., `\r` not immediately followed by `\n`). -fn has_lone_cr(content: &str) -> bool { - content - .chars() - .zip(content.chars().skip(1).chain(std::iter::once('\0'))) - .any(|(c, next)| c == '\r' && next != '\n') -} - /// Validate that the file's line endings are supported for line-based operations. /// -/// Returns Err for: -/// - Lone `\r` (classic Mac, no `\n` at all) -/// - Any lone `\r` in a `\n`-terminated file (mid-line `\r`) -/// - Mixed `\r\n` and lone `\n` +/// Delegates to `o8v_fs::validate_line_endings_bytes` so both call sites share +/// the same logic and produce the same error messages. fn validate_line_endings(content: &str) -> Result<(), String> { - let has_crlf = content.contains("\r\n"); - let lone_cr = has_lone_cr(content); - let has_lf = content.contains('\n'); - - if lone_cr && !has_lf { - return Err( - "error: file uses classic Mac line endings (\\r only) — 8v does not support this format. Convert to \\n or \\r\\n first." - .to_string(), - ); - } - if lone_cr && has_lf { - return Err( - "error: file contains carriage return (\\r) characters outside of \\r\\n sequences — normalize line endings first" - .to_string(), - ); - } - if has_crlf && has_lf { - // Check for standalone \n (not part of \r\n) - // We know has_lf is true; check if any \n is not preceded by \r - let has_standalone_lf = content - .char_indices() - .any(|(i, c)| c == '\n' && (i == 0 || content.as_bytes()[i - 1] != b'\r')); - if has_standalone_lf { - return Err( - "error: file has mixed line endings (LF and CRLF) — 8v requires consistent line endings. Normalize the file first." - .to_string(), - ); - } - } - Ok(()) + o8v_fs::validate_line_endings_bytes(content.as_bytes()) + .map_err(|cause| format!("error: {cause}")) } /// Validate content provided by the user for line-based operations.