diff --git a/Cargo.lock b/Cargo.lock index 1633f43..dd055c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -493,6 +493,7 @@ version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ + "crc32fast", "miniz_oxide", "zlib-rs", ] @@ -1303,6 +1304,8 @@ dependencies = [ "clap_complete", "clap_mangen", "ctor 1.0.6", + "flate2", + "libc", "phf", "phf_codegen", "pretty_assertions", @@ -1495,6 +1498,7 @@ version = "0.0.1" dependencies = [ "chrono", "clap", + "flate2", "regex", "tar", "thiserror", diff --git a/Cargo.toml b/Cargo.toml index a36b0e2..6c62416 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,6 +68,8 @@ tar = { optional = true, version = "0.0.1", package = "uu_tar", path = "src/uu/t [dev-dependencies] chrono = { workspace = true } +flate2 = "1" +libc = { workspace = true } pretty_assertions = "1" rand = { workspace = true } regex = { workspace = true } diff --git a/src/uu/tar/Cargo.toml b/src/uu/tar/Cargo.toml index 3b341c7..80bd902 100644 --- a/src/uu/tar/Cargo.toml +++ b/src/uu/tar/Cargo.toml @@ -19,6 +19,7 @@ regex = { workspace = true } tar = { workspace = true } chrono = { workspace = true } thiserror = { workspace = true } +flate2 = "1" [lib] path = "src/tar.rs" diff --git a/src/uu/tar/src/compression.rs b/src/uu/tar/src/compression.rs new file mode 100644 index 0000000..637902c --- /dev/null +++ b/src/uu/tar/src/compression.rs @@ -0,0 +1,104 @@ +// This file is part of the uutils tar package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use crate::errors::TarError; +use crate::CompressionMode; +use flate2::read::GzDecoder; +use flate2::write::GzEncoder; +use std::fs::File; +use std::io::{Read, Seek, Write}; +use std::path::Path; + +const GZIP_MAGIC: [u8; 2] = [0x1f, 0x8b]; + +pub(crate) fn open_archive_reader( + archive_path: &Path, + mode: CompressionMode, +) -> Result, TarError> { + let mut file = + File::open(archive_path).map_err(|e| TarError::from_io_error(e, archive_path))?; + let mode = match mode { + CompressionMode::Auto => detect_compression(&mut file)?, + other => other, + }; + + let reader: Box = match mode { + CompressionMode::Auto | CompressionMode::None => Box::new(file), + CompressionMode::Gzip => Box::new(GzDecoder::new(file)), + }; + + Ok(reader) +} + +pub(crate) struct ArchiveWriter { + inner: ArchiveWriterInner, +} + +enum ArchiveWriterInner { + Plain(File), + Gzip(GzEncoder), +} + +impl ArchiveWriter { + pub(crate) fn create(archive_path: &Path, mode: CompressionMode) -> Result { + let file = File::create(archive_path).map_err(|e| TarError::CannotCreateArchive { + path: archive_path.to_path_buf(), + source: e, + })?; + + let inner = match mode { + CompressionMode::Auto => { + return Err(TarError::TarOperationError( + "internal error: automatic compression is not valid for archive creation" + .to_string(), + )); + } + CompressionMode::None => ArchiveWriterInner::Plain(file), + CompressionMode::Gzip => { + ArchiveWriterInner::Gzip(GzEncoder::new(file, flate2::Compression::default())) + } + }; + + Ok(Self { inner }) + } + + pub(crate) fn finish(self) -> Result<(), TarError> { + match self.inner { + ArchiveWriterInner::Plain(mut file) => file.flush().map_err(TarError::from), + ArchiveWriterInner::Gzip(encoder) => encoder + .finish() + .map(|_| ()) + .map_err(TarError::CannotFinalizeArchive), + } + } +} + +impl Write for ArchiveWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + match &mut self.inner { + ArchiveWriterInner::Plain(file) => file.write(buf), + ArchiveWriterInner::Gzip(encoder) => encoder.write(buf), + } + } + + fn flush(&mut self) -> std::io::Result<()> { + match &mut self.inner { + ArchiveWriterInner::Plain(file) => file.flush(), + ArchiveWriterInner::Gzip(encoder) => encoder.flush(), + } + } +} + +fn detect_compression(file: &mut File) -> Result { + let mut magic = [0u8; 2]; + let n = file.read(&mut magic).map_err(TarError::Io)?; + file.seek(std::io::SeekFrom::Start(0)) + .map_err(TarError::Io)?; + + if n >= GZIP_MAGIC.len() && magic[..GZIP_MAGIC.len()] == GZIP_MAGIC { + return Ok(CompressionMode::Gzip); + } + Ok(CompressionMode::None) +} diff --git a/src/uu/tar/src/errors.rs b/src/uu/tar/src/errors.rs index 5630a95..aef9e38 100644 --- a/src/uu/tar/src/errors.rs +++ b/src/uu/tar/src/errors.rs @@ -27,6 +27,10 @@ pub enum TarError { #[error("tar: Cannot read entry path: {0}")] CannotReadEntryPath(io::Error), + /// Invalid archive format or unsupported compression stream + #[error("tar: {0}")] + InvalidArchive(String), + /// File or directory not found #[error("tar: {path}: Cannot open: No such file or directory")] FileNotFound { path: PathBuf }, @@ -51,6 +55,10 @@ pub enum TarError { #[error("tar: Cannot extract '{path}': {source}")] CannotExtract { path: PathBuf, source: io::Error }, + /// General tar operation error + #[error("tar: {0}")] + TarOperationError(String), + /// Cannot finalize the archive #[error("tar: Cannot finalize archive: {0}")] CannotFinalizeArchive(io::Error), diff --git a/src/uu/tar/src/operations/create.rs b/src/uu/tar/src/operations/create.rs index a33d801..fc86f31 100644 --- a/src/uu/tar/src/operations/create.rs +++ b/src/uu/tar/src/operations/create.rs @@ -3,9 +3,11 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +use crate::compression::ArchiveWriter; use crate::errors::TarError; +use crate::CompressionMode; use std::collections::VecDeque; -use std::fs::{self, File}; +use std::fs; use std::io::{self, BufWriter, Write}; use std::path::Component::{self, ParentDir, Prefix, RootDir}; use std::path::{self, Path, PathBuf}; @@ -26,15 +28,14 @@ use uucore::error::UResult; /// - The archive file cannot be created /// - Any input file cannot be read /// - Files cannot be added due to I/O or permission errors -pub fn create_archive(archive_path: &Path, files: &[&Path], verbose: bool) -> UResult<()> { - // Create the output file - let file = File::create(archive_path).map_err(|e| TarError::CannotCreateArchive { - path: archive_path.to_path_buf(), - source: e, - })?; - - // Create Builder instance - let mut builder = Builder::new(file); +pub fn create_archive( + archive_path: &Path, + files: &[&Path], + verbose: bool, + compression: CompressionMode, +) -> UResult<()> { + let writer = ArchiveWriter::create(archive_path, compression)?; + let mut builder = Builder::new(writer); let mut out = BufWriter::new(io::stdout().lock()); // Add each file or directory to the archive @@ -106,7 +107,10 @@ pub fn create_archive(archive_path: &Path, files: &[&Path], verbose: bool) -> UR // Finish writing the archive out.flush().map_err(TarError::Io)?; - builder.finish().map_err(TarError::CannotFinalizeArchive)?; + let writer = builder + .into_inner() + .map_err(|e| TarError::TarOperationError(format!("Failed to finalize archive: {e}")))?; + writer.finish()?; Ok(()) } diff --git a/src/uu/tar/src/operations/extract.rs b/src/uu/tar/src/operations/extract.rs index 135b66f..e11c8c6 100644 --- a/src/uu/tar/src/operations/extract.rs +++ b/src/uu/tar/src/operations/extract.rs @@ -3,8 +3,9 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +use crate::compression::open_archive_reader; use crate::errors::TarError; -use std::fs::File; +use crate::CompressionMode; use std::io::{self, BufWriter, Write}; use std::path::Path; use tar::Archive; @@ -23,12 +24,13 @@ use uucore::error::UResult; /// - The archive file cannot be opened /// - The archive format is invalid /// - Files cannot be extracted due to I/O or permission errors -pub fn extract_archive(archive_path: &Path, verbose: bool) -> UResult<()> { - // Open the archive file - let file = File::open(archive_path).map_err(|e| TarError::from_io_error(e, archive_path))?; - - // Create Archive instance - let mut archive = Archive::new(file); +pub fn extract_archive( + archive_path: &Path, + verbose: bool, + compression: CompressionMode, +) -> UResult<()> { + let reader = open_archive_reader(archive_path, compression)?; + let mut archive = Archive::new(reader); let mut out = BufWriter::new(io::stdout().lock()); // Extract to current directory diff --git a/src/uu/tar/src/operations/list.rs b/src/uu/tar/src/operations/list.rs index c424ca8..1699730 100644 --- a/src/uu/tar/src/operations/list.rs +++ b/src/uu/tar/src/operations/list.rs @@ -3,9 +3,10 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +use crate::compression::open_archive_reader; use crate::errors::TarError; +use crate::CompressionMode; use chrono::{TimeZone, Utc}; -use std::fs::File; use std::io::{self, BufWriter, Write}; use std::path::Path; use tar::Archive; @@ -13,10 +14,13 @@ use uucore::error::UResult; use uucore::fs::display_permissions_unix; /// List the contents of a tar archive, printing one entry per line. -pub fn list_archive(archive_path: &Path, verbose: bool) -> UResult<()> { - let file: File = - File::open(archive_path).map_err(|e| TarError::from_io_error(e, archive_path))?; - let mut archive = Archive::new(file); +pub fn list_archive( + archive_path: &Path, + verbose: bool, + compression: CompressionMode, +) -> UResult<()> { + let reader = open_archive_reader(archive_path, compression)?; + let mut archive = Archive::new(reader); let mut out = BufWriter::new(io::stdout().lock()); for entry_result in archive.entries().map_err(TarError::CannotReadEntries)? { diff --git a/src/uu/tar/src/tar.rs b/src/uu/tar/src/tar.rs index 16e29d1..14a77c0 100644 --- a/src/uu/tar/src/tar.rs +++ b/src/uu/tar/src/tar.rs @@ -3,6 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +pub mod compression; pub mod errors; mod operations; @@ -14,6 +15,13 @@ use uucore::format_usage; const ABOUT: &str = "an archiving utility"; const USAGE: &str = "tar key [FILE...]\n tar {-c|-t|-x} [-v] -f ARCHIVE [FILE...]"; +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum CompressionMode { + Auto, + None, + Gzip, +} + /// Determines whether a string looks like a POSIX tar keystring. /// /// A valid keystring must not start with '-', must contain at least one @@ -131,6 +139,11 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { }; let verbose = matches.get_flag("verbose"); + let explicit_compression = if matches.get_flag("gzip") { + Some(CompressionMode::Gzip) + } else { + None + }; // Handle extract operation if matches.get_flag("extract") { @@ -138,7 +151,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { uucore::error::USimpleError::new(64, "option requires an argument -- 'f'") })?; - return operations::extract::extract_archive(archive_path, verbose); + let compression = explicit_compression.unwrap_or(CompressionMode::Auto); + return operations::extract::extract_archive(archive_path, verbose, compression); } // Handle create operation @@ -159,7 +173,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { )); } - return operations::create::create_archive(archive_path, &files, verbose); + let compression = explicit_compression.unwrap_or(CompressionMode::None); + return operations::create::create_archive(archive_path, &files, verbose, compression); } // Handle list operation @@ -168,7 +183,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { uucore::error::USimpleError::new(64, "option requires an argument -- 'f'") })?; - return operations::list::list_archive(archive_path, verbose); + let compression = explicit_compression.unwrap_or(CompressionMode::Auto); + return operations::list::list_archive(archive_path, verbose, compression); } // If no operation specified, show error @@ -200,7 +216,7 @@ pub fn uu_app() -> Command { arg!(-f --file "Use archive file or device ARCHIVE") .value_parser(clap::value_parser!(PathBuf)), // Compression options - // arg!(-z --gzip "Filter through gzip"), + arg!(-z --gzip "Filter through gzip"), // arg!(-j --bzip2 "Filter through bzip2"), // arg!(-J --xz "Filter through xz"), // Common options diff --git a/tests/by-util/test_tar.rs b/tests/by-util/test_tar.rs index 51395a3..71f15cc 100644 --- a/tests/by-util/test_tar.rs +++ b/tests/by-util/test_tar.rs @@ -3,6 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +use std::io::Write; use std::path::{self, PathBuf}; use uutests::{at_and_ucmd, new_ucmd}; @@ -749,3 +750,205 @@ fn test_list_conflicts_with_extract() { .code_is(2) .stderr_contains("cannot be used with"); } + +// Gzip-compressed archive tests + +#[test] +fn test_extract_gzip_archive() { + let (at, _ucmd) = at_and_ucmd!(); + + // Build a .tar.gz in memory: tar containing one file, then gzip-compress it + let mut tar_bytes = Vec::new(); + { + let mut builder = tar_rs_crate::Builder::new(&mut tar_bytes); + let content = b"hello from gzip"; + let mut header = tar_rs_crate::Header::new_gnu(); + header.set_path("gzfile.txt").unwrap(); + header.set_size(content.len() as u64); + header.set_mode(0o644); + header.set_cksum(); + builder.append(&header, &content[..]).unwrap(); + builder.finish().unwrap(); + } + + // Gzip-compress the tar bytes + let mut gz_bytes = Vec::new(); + { + let mut encoder = + flate2::write::GzEncoder::new(&mut gz_bytes, flate2::Compression::default()); + encoder.write_all(&tar_bytes).unwrap(); + encoder.finish().unwrap(); + } + + at.write_bytes("archive.tar.gz", &gz_bytes); + + // Extract using our tar implementation + new_ucmd!() + .arg("-xf") + .arg(at.plus("archive.tar.gz")) + .current_dir(at.as_string()) + .succeeds(); + + assert!(at.file_exists("gzfile.txt")); + assert_eq!(at.read("gzfile.txt"), "hello from gzip"); +} + +#[test] +fn test_extract_gzip_archive_with_explicit_flag() { + let (at, _ucmd) = at_and_ucmd!(); + + let mut tar_bytes = Vec::new(); + { + let mut builder = tar_rs_crate::Builder::new(&mut tar_bytes); + let content = b"hello from explicit gzip"; + let mut header = tar_rs_crate::Header::new_gnu(); + header.set_path("explicit-gzfile.txt").unwrap(); + header.set_size(content.len() as u64); + header.set_mode(0o644); + header.set_cksum(); + builder.append(&header, &content[..]).unwrap(); + builder.finish().unwrap(); + } + + let mut gz_bytes = Vec::new(); + { + let mut encoder = + flate2::write::GzEncoder::new(&mut gz_bytes, flate2::Compression::default()); + encoder.write_all(&tar_bytes).unwrap(); + encoder.finish().unwrap(); + } + + at.write_bytes("archive.tar.gz", &gz_bytes); + + new_ucmd!() + .args(&["-zxf", &at.plus_as_string("archive.tar.gz")]) + .current_dir(at.as_string()) + .succeeds(); + + assert!(at.file_exists("explicit-gzfile.txt")); + assert_eq!(at.read("explicit-gzfile.txt"), "hello from explicit gzip"); +} + +#[test] +fn test_create_gzip_archive() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.write("file1.txt", "test content"); + + ucmd.args(&["-zcf", "archive.tar.gz", "file1.txt"]) + .succeeds() + .no_output(); + + let mut decoder = + flate2::read::GzDecoder::new(std::io::Cursor::new(at.read_bytes("archive.tar.gz"))); + let mut decoded = Vec::new(); + std::io::Read::read_to_end(&mut decoder, &mut decoded).unwrap(); + + let mut archive = tar_rs_crate::Archive::new(std::io::Cursor::new(decoded)); + let mut entries = archive.entries().unwrap(); + let mut entry = entries.next().unwrap().unwrap(); + let mut contents = String::new(); + std::io::Read::read_to_string(&mut entry, &mut contents).unwrap(); + + assert_eq!(entry.path().unwrap().to_str(), Some("file1.txt")); + assert_eq!(contents, "test content"); + assert!(entries.next().is_none()); +} + +#[test] +fn test_gzip_roundtrip() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.write("roundtrip.txt", "roundtrip gzip content"); + + ucmd.args(&["-zcf", "archive.tar.gz", "roundtrip.txt"]) + .succeeds() + .no_output(); + + at.remove("roundtrip.txt"); + + new_ucmd!() + .args(&["-xf", &at.plus_as_string("archive.tar.gz")]) + .current_dir(at.as_string()) + .succeeds() + .no_output(); + + assert!(at.file_exists("roundtrip.txt")); + assert_eq!(at.read("roundtrip.txt"), "roundtrip gzip content"); +} + +#[test] +fn test_list_gzip_archive() { + let (at, _ucmd) = at_and_ucmd!(); + + // Build a .tar.gz in memory + let mut tar_bytes = Vec::new(); + { + let mut builder = tar_rs_crate::Builder::new(&mut tar_bytes); + let content = b"list test content"; + let mut header = tar_rs_crate::Header::new_gnu(); + header.set_path("listed.txt").unwrap(); + header.set_size(content.len() as u64); + header.set_mode(0o644); + header.set_cksum(); + builder.append(&header, &content[..]).unwrap(); + builder.finish().unwrap(); + } + + let mut gz_bytes = Vec::new(); + { + let mut encoder = + flate2::write::GzEncoder::new(&mut gz_bytes, flate2::Compression::default()); + encoder.write_all(&tar_bytes).unwrap(); + encoder.finish().unwrap(); + } + + at.write_bytes("archive.tar.gz", &gz_bytes); + + new_ucmd!() + .args(&["-tf", &at.plus_as_string("archive.tar.gz")]) + .succeeds() + .stdout_contains("listed.txt"); +} + +#[test] +fn test_list_gzip_archive_with_explicit_flag() { + let (at, _ucmd) = at_and_ucmd!(); + + let mut tar_bytes = Vec::new(); + { + let mut builder = tar_rs_crate::Builder::new(&mut tar_bytes); + let content = b"explicit gzip"; + let mut header = tar_rs_crate::Header::new_gnu(); + header.set_path("explicit.txt").unwrap(); + header.set_size(content.len() as u64); + header.set_mode(0o644); + header.set_cksum(); + builder.append(&header, &content[..]).unwrap(); + builder.finish().unwrap(); + } + + let mut gz_bytes = Vec::new(); + { + let mut encoder = + flate2::write::GzEncoder::new(&mut gz_bytes, flate2::Compression::default()); + encoder.write_all(&tar_bytes).unwrap(); + encoder.finish().unwrap(); + } + + at.write_bytes("archive.tar.gz", &gz_bytes); + + new_ucmd!() + .args(&["-ztf", &at.plus_as_string("archive.tar.gz")]) + .succeeds() + .stdout_contains("explicit.txt"); +} + +#[test] +fn test_extract_invalid_gzip_archive_fails() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.write("invalid.tar.gz", "definitely not gzip"); + + ucmd.args(&["-xf", "invalid.tar.gz"]).fails().code_is(2); +}