Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 55 additions & 5 deletions o8v-fs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,51 @@ pub use file::{truncate_error, GuardedFile};
pub use fs_trait::FileSystem;
pub use root::ContainmentRoot;
pub use safe_fs::SafeFs;

/// Validate that a byte slice has consistent line endings.
///
/// Returns `Err` containing a human-readable cause string for:
/// - Lone `\r` (classic Mac, no `\n` at all)
/// - Any lone `\r` in a `\n`-terminated file (mid-line `\r`)
/// - Mixed `\r\n` and lone `\n`
///
/// This is the single authoritative implementation shared by `o8v write`
/// (str-level callers) and `o8v-fs` (byte-level callers). Callers wrap
/// the cause in the appropriate error type — `FsError::InvalidContent`
/// for byte-level callers, or a plain `String` for str-level callers.
pub fn validate_line_endings_bytes(content: &[u8]) -> Result<(), String> {
let has_crlf = content.windows(2).any(|w| w == b"\r\n");
let has_lf = content.contains(&b'\n');
// A lone \r is any \r not immediately followed by \n.
let has_lone_cr = content
.iter()
.enumerate()
.any(|(i, &b)| b == b'\r' && content.get(i + 1) != Some(&b'\n'));

if has_lone_cr && !has_lf {
return Err(
"file uses classic Mac line endings (\\r only) \u{2014} 8v does not support this format. Convert to \\n or \\r\\n first.".to_string()
);
}
if has_lone_cr && has_lf {
return Err(
"file contains carriage return (\\r) characters outside of \\r\\n sequences \u{2014} normalize line endings first".to_string()
);
}
if has_crlf && has_lf {
// Check for standalone \n (not part of \r\n).
let has_standalone_lf = content
.iter()
.enumerate()
.any(|(i, &b)| b == b'\n' && (i == 0 || content[i - 1] != b'\r'));
if has_standalone_lf {
return Err(
"file has mixed line endings (LF and CRLF) \u{2014} 8v requires consistent line endings. Normalize the file first.".to_string()
);
}
}
Ok(())
}
pub use scan::{DirEntry, DirScan};

// ─── Standalone safe write operations ───────────────────────────────────────
Expand Down Expand Up @@ -187,12 +232,17 @@ pub fn safe_append(
write_guard::guarded_append(path, root.as_path(), content)
}

/// Append to a file with an exclusive advisory lock and automatic `\n` separator.
/// Append to a file with an exclusive advisory lock and automatic line-ending-aware separator.
///
/// Under the lock, reads the full existing file content to:
/// 1. Detect the file's line ending (CRLF if any `\r\n` present, else LF).
/// 2. Validate existing content has no mixed endings (returns error if so).
/// 3. Insert a separator using the detected line ending if the file is non-empty
/// and doesn't already end with `\n`.
///
/// Under the lock, checks whether the file ends with `\n` and inserts one if
/// needed before appending `content`. This eliminates the TOCTOU race in
/// concurrent appends where multiple callers all observe a missing trailing
/// newline and each prepend one, producing spurious blank lines.
/// Normalises bare `\n` in `content` to `\r\n` when the file uses CRLF.
/// Appends a trailing line terminator matching the file's ending if absent.
/// The full read happens inside the flock so it remains race-safe.
pub fn safe_append_with_separator(
path: &std::path::Path,
root: &ContainmentRoot,
Expand Down
113 changes: 84 additions & 29 deletions o8v-fs/src/write_guard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,22 @@ pub(crate) fn guarded_append(path: &Path, root: &Path, content: &[u8]) -> Result
})
}
/// Append to a file with an exclusive advisory lock, automatically inserting
/// a `\n` separator if the file does not end with one.
/// a separator if the file does not end with one.
///
/// This serializes peek-last-byte + conditional-separator + append under the
/// lock, eliminating the race where two concurrent callers both observe a
/// missing trailing newline and both prepend `\n`, producing a spurious blank
/// line.
/// Under the lock the function:
/// 1. Reads the full existing file content.
/// 2. Validates it with `validate_line_endings_bytes` — rejects CR-only,
/// lone-`\r`-in-LF, and mixed CRLF+LF files.
/// 3. Detects the file's line ending (CRLF if any `\r\n` present, else LF).
/// 4. Inserts a separator using the detected ending when the file is non-empty
/// and does not already end with `\n`.
/// 5. Normalises bare `\n` in `content` to `\r\n` when the file uses CRLF.
/// 6. Appends `content`, then ensures a trailing line terminator.
///
/// This serializes the full read + validate + conditional-separator + append
/// under the lock, eliminating the race where two concurrent callers both
/// observe a missing trailing newline and both prepend a separator, producing
/// a spurious blank line.
pub(crate) fn guarded_append_with_separator(
path: &Path,
root: &Path,
Expand Down Expand Up @@ -102,41 +112,86 @@ pub(crate) fn guarded_append_with_separator(
cause: e,
})?;

// Under the lock: check whether the file ends with a newline.
let len = file
.metadata()
.map_err(|e| FsError::Io {
path: path.to_path_buf(),
cause: e,
})?
.len();
// Under the lock: read the entire existing file content.
// This is necessary to:
// 1. Detect CRLF correctly even when the file has no trailing newline
// (the old 2-byte tail read fails in that case).
// 2. Validate that the existing content has no mixed line endings before
// appending (R3 fix).
let mut existing = Vec::new();
file.seek(SeekFrom::Start(0)).map_err(|e| FsError::Io {
path: path.to_path_buf(),
cause: e,
})?;
file.read_to_end(&mut existing).map_err(|e| FsError::Io {
path: path.to_path_buf(),
cause: e,
})?;
// Seek back to end so subsequent write_all appends at EOF.
// (O_APPEND on Linux/macOS guarantees this, but we seek explicitly for
// clarity and cross-platform safety.)
file.seek(SeekFrom::End(0)).map_err(|e| FsError::Io {
path: path.to_path_buf(),
cause: e,
})?;

if len > 0 {
let mut last = [0u8; 1];
file.seek(SeekFrom::Start(len - 1))
.map_err(|e| FsError::Io {
// Validate existing content: reject CR-only, lone-\r-in-LF, and mixed
// CRLF+LF files. Unlock before returning so the file descriptor is
// released cleanly on error.
if !existing.is_empty() {
if let Err(cause) = crate::validate_line_endings_bytes(&existing) {
let _ = file.unlock();
return Err(FsError::InvalidContent {
path: path.to_path_buf(),
cause: e,
})?;
file.read_exact(&mut last).map_err(|e| FsError::Io {
cause,
});
}
}

// Detect line ending: CRLF if any \r\n is present anywhere in the file,
// LF otherwise (includes empty-file case).
let is_crlf = existing.windows(2).any(|w| w == b"\r\n");
let line_ending: &[u8] = if is_crlf { b"\r\n" } else { b"\n" };

// Insert separator if file is non-empty and doesn't end with a newline.
if !existing.is_empty() && existing.last() != Some(&b'\n') {
file.write_all(line_ending).map_err(|e| FsError::Io {
path: path.to_path_buf(),
cause: e,
})?;
if last[0] != b'\n' {
file.write_all(b"\n").map_err(|e| FsError::Io {
path: path.to_path_buf(),
cause: e,
})?;
}
}

file.write_all(content).map_err(|e| FsError::Io {
// Normalise content: if the file is CRLF and content uses bare \n, convert.
let normalised: Vec<u8>;
let content_to_write: &[u8] = if is_crlf && content.contains(&b'\n') {
// Replace bare \n (not already preceded by \r) with \r\n.
let mut out = Vec::with_capacity(content.len() + content.len() / 4);
let mut i = 0;
while i < content.len() {
if content[i] == b'\n' {
if i == 0 || content[i - 1] != b'\r' {
out.push(b'\r');
}
out.push(b'\n');
} else {
out.push(content[i]);
}
i += 1;
}
normalised = out;
&normalised
} else {
content
};

file.write_all(content_to_write).map_err(|e| FsError::Io {
path: path.to_path_buf(),
cause: e,
})?;

if !content.is_empty() && *content.last().unwrap() != b'\n' {
file.write_all(b"\n").map_err(|e| FsError::Io {
// Ensure trailing line terminator.
if !content_to_write.is_empty() && *content_to_write.last().unwrap() != b'\n' {
file.write_all(line_ending).map_err(|e| FsError::Io {
path: path.to_path_buf(),
cause: e,
})?;
Expand Down
56 changes: 56 additions & 0 deletions o8v-fs/tests/stress_filesystem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,3 +331,59 @@ fn append_concurrent_50_no_spurious_blank_lines() {
assert!(non_empty.contains(&expected.as_str()), "missing {expected}");
}
}

/// PR#4-R10: Concurrent 50-thread append on a CRLF file without trailing newline.
/// Each thread must detect CRLF and append with \r\n as the separator.
/// Today each thread misdetects the file as LF (same root cause as PR#4-R1),
/// so the result contains bare \n separators mixed into CRLF content.
#[test]
fn append_concurrent_50_crlf_seed_no_trailing_newline_preserves_crlf() {
use std::sync::Arc;

let dir = tempdir().unwrap();
let file_path = dir.path().join("crlf_concurrent.txt");
// CRLF seed with no trailing newline -- triggers the is_crlf detection bug.
std::fs::write(&file_path, b"seed\r\nstart").unwrap();

let root = o8v_fs::ContainmentRoot::new(dir.path()).unwrap();
let root = Arc::new(root);
let file_path = Arc::new(file_path);

let handles: Vec<_> = (0..50)
.map(|i| {
let root = Arc::clone(&root);
let path = Arc::clone(&file_path);
std::thread::spawn(move || {
let line = format!("line{i}");
o8v_fs::safe_append_with_separator(&path, &root, line.as_bytes())
.expect("append failed");
})
})
.collect();

for h in handles {
h.join().expect("thread panicked");
}

let bytes = std::fs::read(file_path.as_ref()).unwrap();

// Every \n must be preceded by \r (pure CRLF).
for (i, &b) in bytes.iter().enumerate() {
if b == b'\n' {
assert!(
i > 0 && bytes[i - 1] == b'\r',
"bare \\n at byte {} -- CRLF not preserved under concurrent append; file: {:?}",
i,
bytes
);
}
if b == b'\r' {
assert!(
i + 1 < bytes.len() && bytes[i + 1] == b'\n',
"lone \\r at byte {} -- malformed CRLF sequence; file: {:?}",
i,
bytes
);
}
}
}
67 changes: 22 additions & 45 deletions o8v/src/commands/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,51 +330,13 @@ fn detect_line_ending(content: &str) -> &'static str {
}
}

/// Returns true if `content` contains any lone `\r` (i.e., `\r` not immediately followed by `\n`).
fn has_lone_cr(content: &str) -> bool {
content
.chars()
.zip(content.chars().skip(1).chain(std::iter::once('\0')))
.any(|(c, next)| c == '\r' && next != '\n')
}

/// Validate that the file's line endings are supported for line-based operations.
///
/// Returns Err for:
/// - Lone `\r` (classic Mac, no `\n` at all)
/// - Any lone `\r` in a `\n`-terminated file (mid-line `\r`)
/// - Mixed `\r\n` and lone `\n`
/// Delegates to `o8v_fs::validate_line_endings_bytes` so both call sites share
/// the same logic and produce the same error messages.
fn validate_line_endings(content: &str) -> Result<(), String> {
let has_crlf = content.contains("\r\n");
let lone_cr = has_lone_cr(content);
let has_lf = content.contains('\n');

if lone_cr && !has_lf {
return Err(
"error: file uses classic Mac line endings (\\r only) — 8v does not support this format. Convert to \\n or \\r\\n first."
.to_string(),
);
}
if lone_cr && has_lf {
return Err(
"error: file contains carriage return (\\r) characters outside of \\r\\n sequences — normalize line endings first"
.to_string(),
);
}
if has_crlf && has_lf {
// Check for standalone \n (not part of \r\n)
// We know has_lf is true; check if any \n is not preceded by \r
let has_standalone_lf = content
.char_indices()
.any(|(i, c)| c == '\n' && (i == 0 || content.as_bytes()[i - 1] != b'\r'));
if has_standalone_lf {
return Err(
"error: file has mixed line endings (LF and CRLF) — 8v requires consistent line endings. Normalize the file first."
.to_string(),
);
}
}
Ok(())
o8v_fs::validate_line_endings_bytes(content.as_bytes())
.map_err(|cause| format!("error: {cause}"))
}

/// Validate content provided by the user for line-based operations.
Expand Down Expand Up @@ -685,7 +647,22 @@ pub(crate) fn write_to_report(
})?;
let existing_content = file.content();
validate_line_endings(existing_content)?;
let match_count = existing_content.matches(find.as_str()).count();
let line_ending = detect_line_ending(existing_content);

// Normalise find/replace to the file's line ending so that a user
// who passes pure-LF patterns against a CRLF file gets correct
// matches and CRLF-preserving output.
let find_normalised;
let replace_normalised;
let (effective_find, effective_replace) = if line_ending == "\r\n" {
find_normalised = find.replace('\n', "\r\n");
replace_normalised = replace.replace('\n', "\r\n");
(find_normalised.as_str(), replace_normalised.as_str())
} else {
(find.as_str(), replace.as_str())
};

let match_count = existing_content.matches(effective_find).count();

if match_count == 0 {
return Err(render_not_found_hint(find, existing_content, &path_str));
Expand All @@ -701,9 +678,9 @@ pub(crate) fn write_to_report(
}

let new_content = if *all {
existing_content.replace(find.as_str(), replace.as_str())
existing_content.replace(effective_find, effective_replace)
} else {
existing_content.replacen(find.as_str(), replace.as_str(), 1)
existing_content.replacen(effective_find, effective_replace, 1)
};

let count = if *all { match_count } else { 1 };
Expand Down
Loading
Loading