From 32faabe6a7950c323f1f66c72298358e8b615017 Mon Sep 17 00:00:00 2001 From: Kevin Burke Date: Thu, 9 Apr 2026 20:15:33 -0700 Subject: [PATCH] tar: auto-detect gzip on read and add gzip write mode Teach tar to auto-detect gzip-compressed archives for list and extract operations while keeping archive creation explicit via -z/--gzip. The implementation now routes archive I/O through a shared compression helper so the read path can sniff gzip input and the write path can wrap output in gzip only when requested. Add integration tests covering gzip create, list, extract, explicit -z on extract/list, round-tripping a gzip archive, and invalid gzip input failure behavior. --- Cargo.lock | 4 + Cargo.toml | 2 + src/uu/tar/Cargo.toml | 1 + src/uu/tar/src/compression.rs | 104 ++++++++++++++ src/uu/tar/src/errors.rs | 8 ++ src/uu/tar/src/operations/create.rs | 26 ++-- src/uu/tar/src/operations/extract.rs | 16 ++- src/uu/tar/src/operations/list.rs | 14 +- src/uu/tar/src/tar.rs | 24 +++- tests/by-util/test_tar.rs | 203 +++++++++++++++++++++++++++ 10 files changed, 375 insertions(+), 27 deletions(-) create mode 100644 src/uu/tar/src/compression.rs diff --git a/Cargo.lock b/Cargo.lock index 1633f43..dd055c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -493,6 +493,7 @@ version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ + "crc32fast", "miniz_oxide", "zlib-rs", ] @@ -1303,6 +1304,8 @@ dependencies = [ "clap_complete", "clap_mangen", "ctor 1.0.6", + "flate2", + "libc", "phf", "phf_codegen", "pretty_assertions", @@ -1495,6 +1498,7 @@ version = "0.0.1" dependencies = [ "chrono", "clap", + "flate2", "regex", "tar", "thiserror", diff --git a/Cargo.toml b/Cargo.toml index a36b0e2..6c62416 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,6 +68,8 @@ tar = { optional = true, version = "0.0.1", package = "uu_tar", path = "src/uu/t [dev-dependencies] chrono = { workspace = true } +flate2 = "1" +libc = { workspace = true } pretty_assertions = "1" rand = { workspace = true } regex = { workspace = true } diff --git a/src/uu/tar/Cargo.toml b/src/uu/tar/Cargo.toml index 3b341c7..80bd902 100644 --- a/src/uu/tar/Cargo.toml +++ b/src/uu/tar/Cargo.toml @@ -19,6 +19,7 @@ regex = { workspace = true } tar = { workspace = true } chrono = { workspace = true } thiserror = { workspace = true } +flate2 = "1" [lib] path = "src/tar.rs" diff --git a/src/uu/tar/src/compression.rs b/src/uu/tar/src/compression.rs new file mode 100644 index 0000000..637902c --- /dev/null +++ b/src/uu/tar/src/compression.rs @@ -0,0 +1,104 @@ +// This file is part of the uutils tar package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use crate::errors::TarError; +use crate::CompressionMode; +use flate2::read::GzDecoder; +use flate2::write::GzEncoder; +use std::fs::File; +use std::io::{Read, Seek, Write}; +use std::path::Path; + +const GZIP_MAGIC: [u8; 2] = [0x1f, 0x8b]; + +pub(crate) fn open_archive_reader( + archive_path: &Path, + mode: CompressionMode, +) -> Result, TarError> { + let mut file = + File::open(archive_path).map_err(|e| TarError::from_io_error(e, archive_path))?; + let mode = match mode { + CompressionMode::Auto => detect_compression(&mut file)?, + other => other, + }; + + let reader: Box = match mode { + CompressionMode::Auto | CompressionMode::None => Box::new(file), + CompressionMode::Gzip => Box::new(GzDecoder::new(file)), + }; + + Ok(reader) +} + +pub(crate) struct ArchiveWriter { + inner: ArchiveWriterInner, +} + +enum ArchiveWriterInner { + Plain(File), + Gzip(GzEncoder), +} + +impl ArchiveWriter { + pub(crate) fn create(archive_path: &Path, mode: CompressionMode) -> Result { + let file = File::create(archive_path).map_err(|e| TarError::CannotCreateArchive { + path: archive_path.to_path_buf(), + source: e, + })?; + + let inner = match mode { + CompressionMode::Auto => { + return Err(TarError::TarOperationError( + "internal error: automatic compression is not valid for archive creation" + .to_string(), + )); + } + CompressionMode::None => ArchiveWriterInner::Plain(file), + CompressionMode::Gzip => { + ArchiveWriterInner::Gzip(GzEncoder::new(file, flate2::Compression::default())) + } + }; + + Ok(Self { inner }) + } + + pub(crate) fn finish(self) -> Result<(), TarError> { + match self.inner { + ArchiveWriterInner::Plain(mut file) => file.flush().map_err(TarError::from), + ArchiveWriterInner::Gzip(encoder) => encoder + .finish() + .map(|_| ()) + .map_err(TarError::CannotFinalizeArchive), + } + } +} + +impl Write for ArchiveWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + match &mut self.inner { + ArchiveWriterInner::Plain(file) => file.write(buf), + ArchiveWriterInner::Gzip(encoder) => encoder.write(buf), + } + } + + fn flush(&mut self) -> std::io::Result<()> { + match &mut self.inner { + ArchiveWriterInner::Plain(file) => file.flush(), + ArchiveWriterInner::Gzip(encoder) => encoder.flush(), + } + } +} + +fn detect_compression(file: &mut File) -> Result { + let mut magic = [0u8; 2]; + let n = file.read(&mut magic).map_err(TarError::Io)?; + file.seek(std::io::SeekFrom::Start(0)) + .map_err(TarError::Io)?; + + if n >= GZIP_MAGIC.len() && magic[..GZIP_MAGIC.len()] == GZIP_MAGIC { + return Ok(CompressionMode::Gzip); + } + Ok(CompressionMode::None) +} diff --git a/src/uu/tar/src/errors.rs b/src/uu/tar/src/errors.rs index 5630a95..aef9e38 100644 --- a/src/uu/tar/src/errors.rs +++ b/src/uu/tar/src/errors.rs @@ -27,6 +27,10 @@ pub enum TarError { #[error("tar: Cannot read entry path: {0}")] CannotReadEntryPath(io::Error), + /// Invalid archive format or unsupported compression stream + #[error("tar: {0}")] + InvalidArchive(String), + /// File or directory not found #[error("tar: {path}: Cannot open: No such file or directory")] FileNotFound { path: PathBuf }, @@ -51,6 +55,10 @@ pub enum TarError { #[error("tar: Cannot extract '{path}': {source}")] CannotExtract { path: PathBuf, source: io::Error }, + /// General tar operation error + #[error("tar: {0}")] + TarOperationError(String), + /// Cannot finalize the archive #[error("tar: Cannot finalize archive: {0}")] CannotFinalizeArchive(io::Error), diff --git a/src/uu/tar/src/operations/create.rs b/src/uu/tar/src/operations/create.rs index a33d801..fc86f31 100644 --- a/src/uu/tar/src/operations/create.rs +++ b/src/uu/tar/src/operations/create.rs @@ -3,9 +3,11 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +use crate::compression::ArchiveWriter; use crate::errors::TarError; +use crate::CompressionMode; use std::collections::VecDeque; -use std::fs::{self, File}; +use std::fs; use std::io::{self, BufWriter, Write}; use std::path::Component::{self, ParentDir, Prefix, RootDir}; use std::path::{self, Path, PathBuf}; @@ -26,15 +28,14 @@ use uucore::error::UResult; /// - The archive file cannot be created /// - Any input file cannot be read /// - Files cannot be added due to I/O or permission errors -pub fn create_archive(archive_path: &Path, files: &[&Path], verbose: bool) -> UResult<()> { - // Create the output file - let file = File::create(archive_path).map_err(|e| TarError::CannotCreateArchive { - path: archive_path.to_path_buf(), - source: e, - })?; - - // Create Builder instance - let mut builder = Builder::new(file); +pub fn create_archive( + archive_path: &Path, + files: &[&Path], + verbose: bool, + compression: CompressionMode, +) -> UResult<()> { + let writer = ArchiveWriter::create(archive_path, compression)?; + let mut builder = Builder::new(writer); let mut out = BufWriter::new(io::stdout().lock()); // Add each file or directory to the archive @@ -106,7 +107,10 @@ pub fn create_archive(archive_path: &Path, files: &[&Path], verbose: bool) -> UR // Finish writing the archive out.flush().map_err(TarError::Io)?; - builder.finish().map_err(TarError::CannotFinalizeArchive)?; + let writer = builder + .into_inner() + .map_err(|e| TarError::TarOperationError(format!("Failed to finalize archive: {e}")))?; + writer.finish()?; Ok(()) } diff --git a/src/uu/tar/src/operations/extract.rs b/src/uu/tar/src/operations/extract.rs index 135b66f..e11c8c6 100644 --- a/src/uu/tar/src/operations/extract.rs +++ b/src/uu/tar/src/operations/extract.rs @@ -3,8 +3,9 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +use crate::compression::open_archive_reader; use crate::errors::TarError; -use std::fs::File; +use crate::CompressionMode; use std::io::{self, BufWriter, Write}; use std::path::Path; use tar::Archive; @@ -23,12 +24,13 @@ use uucore::error::UResult; /// - The archive file cannot be opened /// - The archive format is invalid /// - Files cannot be extracted due to I/O or permission errors -pub fn extract_archive(archive_path: &Path, verbose: bool) -> UResult<()> { - // Open the archive file - let file = File::open(archive_path).map_err(|e| TarError::from_io_error(e, archive_path))?; - - // Create Archive instance - let mut archive = Archive::new(file); +pub fn extract_archive( + archive_path: &Path, + verbose: bool, + compression: CompressionMode, +) -> UResult<()> { + let reader = open_archive_reader(archive_path, compression)?; + let mut archive = Archive::new(reader); let mut out = BufWriter::new(io::stdout().lock()); // Extract to current directory diff --git a/src/uu/tar/src/operations/list.rs b/src/uu/tar/src/operations/list.rs index c424ca8..1699730 100644 --- a/src/uu/tar/src/operations/list.rs +++ b/src/uu/tar/src/operations/list.rs @@ -3,9 +3,10 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +use crate::compression::open_archive_reader; use crate::errors::TarError; +use crate::CompressionMode; use chrono::{TimeZone, Utc}; -use std::fs::File; use std::io::{self, BufWriter, Write}; use std::path::Path; use tar::Archive; @@ -13,10 +14,13 @@ use uucore::error::UResult; use uucore::fs::display_permissions_unix; /// List the contents of a tar archive, printing one entry per line. -pub fn list_archive(archive_path: &Path, verbose: bool) -> UResult<()> { - let file: File = - File::open(archive_path).map_err(|e| TarError::from_io_error(e, archive_path))?; - let mut archive = Archive::new(file); +pub fn list_archive( + archive_path: &Path, + verbose: bool, + compression: CompressionMode, +) -> UResult<()> { + let reader = open_archive_reader(archive_path, compression)?; + let mut archive = Archive::new(reader); let mut out = BufWriter::new(io::stdout().lock()); for entry_result in archive.entries().map_err(TarError::CannotReadEntries)? { diff --git a/src/uu/tar/src/tar.rs b/src/uu/tar/src/tar.rs index 16e29d1..14a77c0 100644 --- a/src/uu/tar/src/tar.rs +++ b/src/uu/tar/src/tar.rs @@ -3,6 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +pub mod compression; pub mod errors; mod operations; @@ -14,6 +15,13 @@ use uucore::format_usage; const ABOUT: &str = "an archiving utility"; const USAGE: &str = "tar key [FILE...]\n tar {-c|-t|-x} [-v] -f ARCHIVE [FILE...]"; +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum CompressionMode { + Auto, + None, + Gzip, +} + /// Determines whether a string looks like a POSIX tar keystring. /// /// A valid keystring must not start with '-', must contain at least one @@ -131,6 +139,11 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { }; let verbose = matches.get_flag("verbose"); + let explicit_compression = if matches.get_flag("gzip") { + Some(CompressionMode::Gzip) + } else { + None + }; // Handle extract operation if matches.get_flag("extract") { @@ -138,7 +151,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { uucore::error::USimpleError::new(64, "option requires an argument -- 'f'") })?; - return operations::extract::extract_archive(archive_path, verbose); + let compression = explicit_compression.unwrap_or(CompressionMode::Auto); + return operations::extract::extract_archive(archive_path, verbose, compression); } // Handle create operation @@ -159,7 +173,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { )); } - return operations::create::create_archive(archive_path, &files, verbose); + let compression = explicit_compression.unwrap_or(CompressionMode::None); + return operations::create::create_archive(archive_path, &files, verbose, compression); } // Handle list operation @@ -168,7 +183,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { uucore::error::USimpleError::new(64, "option requires an argument -- 'f'") })?; - return operations::list::list_archive(archive_path, verbose); + let compression = explicit_compression.unwrap_or(CompressionMode::Auto); + return operations::list::list_archive(archive_path, verbose, compression); } // If no operation specified, show error @@ -200,7 +216,7 @@ pub fn uu_app() -> Command { arg!(-f --file "Use archive file or device ARCHIVE") .value_parser(clap::value_parser!(PathBuf)), // Compression options - // arg!(-z --gzip "Filter through gzip"), + arg!(-z --gzip "Filter through gzip"), // arg!(-j --bzip2 "Filter through bzip2"), // arg!(-J --xz "Filter through xz"), // Common options diff --git a/tests/by-util/test_tar.rs b/tests/by-util/test_tar.rs index 51395a3..71f15cc 100644 --- a/tests/by-util/test_tar.rs +++ b/tests/by-util/test_tar.rs @@ -3,6 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +use std::io::Write; use std::path::{self, PathBuf}; use uutests::{at_and_ucmd, new_ucmd}; @@ -749,3 +750,205 @@ fn test_list_conflicts_with_extract() { .code_is(2) .stderr_contains("cannot be used with"); } + +// Gzip-compressed archive tests + +#[test] +fn test_extract_gzip_archive() { + let (at, _ucmd) = at_and_ucmd!(); + + // Build a .tar.gz in memory: tar containing one file, then gzip-compress it + let mut tar_bytes = Vec::new(); + { + let mut builder = tar_rs_crate::Builder::new(&mut tar_bytes); + let content = b"hello from gzip"; + let mut header = tar_rs_crate::Header::new_gnu(); + header.set_path("gzfile.txt").unwrap(); + header.set_size(content.len() as u64); + header.set_mode(0o644); + header.set_cksum(); + builder.append(&header, &content[..]).unwrap(); + builder.finish().unwrap(); + } + + // Gzip-compress the tar bytes + let mut gz_bytes = Vec::new(); + { + let mut encoder = + flate2::write::GzEncoder::new(&mut gz_bytes, flate2::Compression::default()); + encoder.write_all(&tar_bytes).unwrap(); + encoder.finish().unwrap(); + } + + at.write_bytes("archive.tar.gz", &gz_bytes); + + // Extract using our tar implementation + new_ucmd!() + .arg("-xf") + .arg(at.plus("archive.tar.gz")) + .current_dir(at.as_string()) + .succeeds(); + + assert!(at.file_exists("gzfile.txt")); + assert_eq!(at.read("gzfile.txt"), "hello from gzip"); +} + +#[test] +fn test_extract_gzip_archive_with_explicit_flag() { + let (at, _ucmd) = at_and_ucmd!(); + + let mut tar_bytes = Vec::new(); + { + let mut builder = tar_rs_crate::Builder::new(&mut tar_bytes); + let content = b"hello from explicit gzip"; + let mut header = tar_rs_crate::Header::new_gnu(); + header.set_path("explicit-gzfile.txt").unwrap(); + header.set_size(content.len() as u64); + header.set_mode(0o644); + header.set_cksum(); + builder.append(&header, &content[..]).unwrap(); + builder.finish().unwrap(); + } + + let mut gz_bytes = Vec::new(); + { + let mut encoder = + flate2::write::GzEncoder::new(&mut gz_bytes, flate2::Compression::default()); + encoder.write_all(&tar_bytes).unwrap(); + encoder.finish().unwrap(); + } + + at.write_bytes("archive.tar.gz", &gz_bytes); + + new_ucmd!() + .args(&["-zxf", &at.plus_as_string("archive.tar.gz")]) + .current_dir(at.as_string()) + .succeeds(); + + assert!(at.file_exists("explicit-gzfile.txt")); + assert_eq!(at.read("explicit-gzfile.txt"), "hello from explicit gzip"); +} + +#[test] +fn test_create_gzip_archive() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.write("file1.txt", "test content"); + + ucmd.args(&["-zcf", "archive.tar.gz", "file1.txt"]) + .succeeds() + .no_output(); + + let mut decoder = + flate2::read::GzDecoder::new(std::io::Cursor::new(at.read_bytes("archive.tar.gz"))); + let mut decoded = Vec::new(); + std::io::Read::read_to_end(&mut decoder, &mut decoded).unwrap(); + + let mut archive = tar_rs_crate::Archive::new(std::io::Cursor::new(decoded)); + let mut entries = archive.entries().unwrap(); + let mut entry = entries.next().unwrap().unwrap(); + let mut contents = String::new(); + std::io::Read::read_to_string(&mut entry, &mut contents).unwrap(); + + assert_eq!(entry.path().unwrap().to_str(), Some("file1.txt")); + assert_eq!(contents, "test content"); + assert!(entries.next().is_none()); +} + +#[test] +fn test_gzip_roundtrip() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.write("roundtrip.txt", "roundtrip gzip content"); + + ucmd.args(&["-zcf", "archive.tar.gz", "roundtrip.txt"]) + .succeeds() + .no_output(); + + at.remove("roundtrip.txt"); + + new_ucmd!() + .args(&["-xf", &at.plus_as_string("archive.tar.gz")]) + .current_dir(at.as_string()) + .succeeds() + .no_output(); + + assert!(at.file_exists("roundtrip.txt")); + assert_eq!(at.read("roundtrip.txt"), "roundtrip gzip content"); +} + +#[test] +fn test_list_gzip_archive() { + let (at, _ucmd) = at_and_ucmd!(); + + // Build a .tar.gz in memory + let mut tar_bytes = Vec::new(); + { + let mut builder = tar_rs_crate::Builder::new(&mut tar_bytes); + let content = b"list test content"; + let mut header = tar_rs_crate::Header::new_gnu(); + header.set_path("listed.txt").unwrap(); + header.set_size(content.len() as u64); + header.set_mode(0o644); + header.set_cksum(); + builder.append(&header, &content[..]).unwrap(); + builder.finish().unwrap(); + } + + let mut gz_bytes = Vec::new(); + { + let mut encoder = + flate2::write::GzEncoder::new(&mut gz_bytes, flate2::Compression::default()); + encoder.write_all(&tar_bytes).unwrap(); + encoder.finish().unwrap(); + } + + at.write_bytes("archive.tar.gz", &gz_bytes); + + new_ucmd!() + .args(&["-tf", &at.plus_as_string("archive.tar.gz")]) + .succeeds() + .stdout_contains("listed.txt"); +} + +#[test] +fn test_list_gzip_archive_with_explicit_flag() { + let (at, _ucmd) = at_and_ucmd!(); + + let mut tar_bytes = Vec::new(); + { + let mut builder = tar_rs_crate::Builder::new(&mut tar_bytes); + let content = b"explicit gzip"; + let mut header = tar_rs_crate::Header::new_gnu(); + header.set_path("explicit.txt").unwrap(); + header.set_size(content.len() as u64); + header.set_mode(0o644); + header.set_cksum(); + builder.append(&header, &content[..]).unwrap(); + builder.finish().unwrap(); + } + + let mut gz_bytes = Vec::new(); + { + let mut encoder = + flate2::write::GzEncoder::new(&mut gz_bytes, flate2::Compression::default()); + encoder.write_all(&tar_bytes).unwrap(); + encoder.finish().unwrap(); + } + + at.write_bytes("archive.tar.gz", &gz_bytes); + + new_ucmd!() + .args(&["-ztf", &at.plus_as_string("archive.tar.gz")]) + .succeeds() + .stdout_contains("explicit.txt"); +} + +#[test] +fn test_extract_invalid_gzip_archive_fails() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.write("invalid.tar.gz", "definitely not gzip"); + + ucmd.args(&["-xf", "invalid.tar.gz"]).fails().code_is(2); +}