diff --git a/Cargo.lock b/Cargo.lock index 211fa3d51..9800d77be 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2688,6 +2688,7 @@ dependencies = [ "apple-sdk", "aws-config", "aws-sdk-s3", + "base64 0.22.1", "bytes", "clap", "duct", diff --git a/Cargo.toml b/Cargo.toml index 13557b8b0..be8b93342 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ anyhow = "1.0.100" apple-sdk = "0.6.0" aws-config = { version = "1", features = ["behavior-version-latest"] } aws-sdk-s3 = "1" +base64 = "0.22" bytes = "1.11.0" clap = "4.5.52" duct = "1.1.1" diff --git a/src/macho.rs b/src/macho.rs index 5b3155164..2173ee7b6 100644 --- a/src/macho.rs +++ b/src/macho.rs @@ -4,7 +4,7 @@ use { crate::validation::ValidationContext, - anyhow::{anyhow, Context, Result}, + anyhow::{Context, Result, anyhow}, apple_sdk::{AppleSdk, SdkSearch, SdkSearchLocation, SdkSorting, SdkVersion, SimpleSdk}, semver::Version, std::{ diff --git a/src/s3.rs b/src/s3.rs index 92ebac4b7..09d356782 100644 --- a/src/s3.rs +++ b/src/s3.rs @@ -4,12 +4,14 @@ use { crate::release::build_wanted_filenames, - anyhow::{Result, anyhow}, + anyhow::{Result, anyhow, ensure}, aws_sdk_s3::primitives::ByteStream, + base64::{Engine as _, engine::general_purpose::STANDARD as BASE64}, clap::ArgMatches, futures::{StreamExt, TryStreamExt}, + sha2::{Digest, Sha256}, std::{ - collections::BTreeSet, + collections::{BTreeMap, BTreeSet}, path::{Path, PathBuf}, }, }; @@ -21,12 +23,88 @@ const UPLOAD_CONCURRENCY: usize = 4; /// The AWS SDK uses exponential backoff with jitter between attempts. const S3_MAX_ATTEMPTS: u32 = 5; +/// A validated SHA-256 checksum. +#[derive(Clone, Debug, Eq, PartialEq)] +struct Sha256Sum([u8; 32]); + +impl Sha256Sum { + fn from_bytes(bytes: [u8; 32]) -> Self { + Self(bytes) + } + + fn to_base64(&self) -> String { + BASE64.encode(self.0) + } +} + +impl TryFrom<&str> for Sha256Sum { + type Error = anyhow::Error; + + fn try_from(hex_digest: &str) -> Result { + let bytes = hex::decode(hex_digest)?; + let bytes: [u8; 32] = bytes + .try_into() + .map_err(|_| anyhow!("expected 32-byte sha256 digest"))?; + Ok(Self(bytes)) + } +} + +/// Parse a `SHA256SUMS` file into a map of filename → digest. +fn parse_sha256sums(content: &str) -> Result> { + let mut digests = BTreeMap::new(); + + for (line_no, line) in content.lines().enumerate() { + let (digest, filename) = line + .split_once(" ") + .ok_or_else(|| anyhow!("malformed SHA256SUMS line {}", line_no + 1))?; + ensure!( + !filename.is_empty(), + "missing filename on SHA256SUMS line {}", + line_no + 1 + ); + let digest = Sha256Sum::try_from(digest)?; + ensure!( + digests.insert(filename.to_string(), digest).is_none(), + "duplicate filename in SHA256SUMS: {filename}" + ); + } + + Ok(digests) +} + +fn ensure_sha256sums_coverage( + wanted_filenames: &BTreeMap, + present_filenames: &BTreeSet, + sha256_digests: &BTreeMap, +) -> Result<()> { + let missing = wanted_filenames + .iter() + .filter(|(source, _)| present_filenames.contains(*source)) + .map(|(_, dest)| dest) + .filter(|dest| !sha256_digests.contains_key(*dest)) + .collect::>(); + + if missing.is_empty() { + Ok(()) + } else { + Err(anyhow!( + "SHA256SUMS missing {} entries; first missing artifact: {}", + missing.len(), + missing[0] + )) + } +} + /// Upload a single file to S3 under `key`, setting an immutable cache-control header. +/// +/// When `sha256` is provided the SHA-256 content checksum is included in the +/// PUT request so that S3 verifies data integrity on receipt. async fn upload_s3_file( s3: &aws_sdk_s3::Client, bucket: &str, key: &str, path: &Path, + sha256: Option<&Sha256Sum>, dry_run: bool, ) -> Result<()> { println!( @@ -43,13 +121,18 @@ async fn upload_s3_file( // concurrently, so splitting each file into multipart chunks would add complexity // without meaningfully improving throughput. let body = ByteStream::from_path(path).await?; - s3.put_object() + let mut req = s3 + .put_object() .bucket(bucket) .key(key) .body(body) - .cache_control("public, max-age=31536000, immutable") - .send() - .await?; + .cache_control("public, max-age=31536000, immutable"); + + if let Some(digest) = sha256 { + req = req.checksum_sha256(digest.to_base64()); + } + + req.send().await?; Ok(()) } @@ -117,6 +200,26 @@ pub async fn command_upload_mirror_distributions(args: &ArgMatches) -> Result<() .build(); let s3 = aws_sdk_s3::Client::from_conf(s3_config); + let shasums_path = dist_dir.join("SHA256SUMS"); + + // Parse SHA256SUMS (written and verified by upload-release-distributions) so + // we can supply a content checksum on every PUT. S3 will reject the upload if + // the data it receives does not match, guarding against silent corruption. + // In dry-run mode we skip reading the file entirely so the command can still be + // used to validate naming and missing-artifact handling on a fresh dist/. + let shasums_content = if dry_run { + None + } else { + Some(std::fs::read_to_string(&shasums_path)?) + }; + let sha256_digests = if let Some(content) = &shasums_content { + let sha256_digests = parse_sha256sums(content)?; + ensure_sha256sums_coverage(&wanted_filenames, &filenames, &sha256_digests)?; + sha256_digests + } else { + BTreeMap::new() + }; + // Upload all files concurrently (up to UPLOAD_CONCURRENCY in-flight at a time). let upload_futs = wanted_filenames .iter() @@ -126,7 +229,8 @@ pub async fn command_upload_mirror_distributions(args: &ArgMatches) -> Result<() let bucket = bucket.clone(); let key = format!("{prefix}{dest}"); let path = dist_dir.join(source); - async move { upload_s3_file(&s3, &bucket, &key, &path, dry_run).await } + let sha256 = sha256_digests.get(dest).cloned(); + async move { upload_s3_file(&s3, &bucket, &key, &path, sha256.as_ref(), dry_run).await } }); futures::stream::iter(upload_futs) @@ -134,10 +238,51 @@ pub async fn command_upload_mirror_distributions(args: &ArgMatches) -> Result<() .try_collect::>() .await?; - // Upload the SHA256SUMS file already written (and verified) by upload-release-distributions. - let shasums_path = dist_dir.join("SHA256SUMS"); + // Upload the SHA256SUMS file itself, computing its digest on the fly. + let shasums_sha256 = shasums_content.as_ref().map(|content| { + let mut hasher = Sha256::new(); + hasher.update(content.as_bytes()); + Sha256Sum::from_bytes(hasher.finalize().into()) + }); let shasums_key = format!("{prefix}SHA256SUMS"); - upload_s3_file(&s3, bucket, &shasums_key, &shasums_path, dry_run).await?; + upload_s3_file( + &s3, + bucket, + &shasums_key, + &shasums_path, + shasums_sha256.as_ref(), + dry_run, + ) + .await?; Ok(()) } + +#[cfg(test)] +mod tests { + use super::{Sha256Sum, ensure_sha256sums_coverage, parse_sha256sums}; + use std::collections::{BTreeMap, BTreeSet}; + + #[test] + fn sha256_sum_rejects_non_sha256_lengths() { + assert!(Sha256Sum::try_from("abcd").is_err()); + } + + #[test] + fn parse_sha256sums_rejects_malformed_lines() { + assert!(parse_sha256sums("not-a-valid-line\n").is_err()); + } + + #[test] + fn ensure_sha256sums_coverage_requires_every_uploaded_artifact() { + let wanted_filenames = + BTreeMap::from([("source.tar.zst".to_string(), "dest.tar.zst".to_string())]); + let present_filenames = BTreeSet::from(["source.tar.zst".to_string()]); + let sha256_digests = BTreeMap::new(); + + assert!( + ensure_sha256sums_coverage(&wanted_filenames, &present_filenames, &sha256_digests) + .is_err() + ); + } +} diff --git a/src/validation.rs b/src/validation.rs index 2528cc25e..1edd58332 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -4,21 +4,21 @@ use { crate::{json::*, macho::*}, - anyhow::{anyhow, Context, Result}, + anyhow::{Context, Result, anyhow}, clap::ArgMatches, normalize_path::NormalizePath, object::{ + Architecture, Endianness, FileKind, Object, SectionIndex, SymbolScope, elf::{ - FileHeader32, FileHeader64, ET_DYN, ET_EXEC, SHN_UNDEF, STB_GLOBAL, STB_WEAK, + ET_DYN, ET_EXEC, FileHeader32, FileHeader64, SHN_UNDEF, STB_GLOBAL, STB_WEAK, STV_DEFAULT, STV_HIDDEN, }, - macho::{MachHeader32, MachHeader64, LC_CODE_SIGNATURE, MH_OBJECT, MH_TWOLEVEL}, + macho::{LC_CODE_SIGNATURE, MH_OBJECT, MH_TWOLEVEL, MachHeader32, MachHeader64}, read::{ elf::{Dyn, FileHeader, SectionHeader, Sym}, macho::{LoadCommandVariant, MachHeader, Nlist, Section, Segment}, pe::{ImageNtHeaders, PeFile, PeFile32, PeFile64}, }, - Architecture, Endianness, FileKind, Object, SectionIndex, SymbolScope, }, once_cell::sync::Lazy, std::{