Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ anyhow = "1.0.100"
apple-sdk = "0.6.0"
aws-config = { version = "1", features = ["behavior-version-latest"] }
aws-sdk-s3 = "1"
base64 = "0.22"
bytes = "1.11.0"
clap = "4.5.52"
duct = "1.1.1"
Expand Down
2 changes: 1 addition & 1 deletion src/macho.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use {
crate::validation::ValidationContext,
anyhow::{anyhow, Context, Result},
anyhow::{Context, Result, anyhow},
apple_sdk::{AppleSdk, SdkSearch, SdkSearchLocation, SdkSorting, SdkVersion, SimpleSdk},
semver::Version,
std::{
Expand Down
165 changes: 155 additions & 10 deletions src/s3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@

use {
crate::release::build_wanted_filenames,
anyhow::{Result, anyhow},
anyhow::{Result, anyhow, ensure},
aws_sdk_s3::primitives::ByteStream,
base64::{Engine as _, engine::general_purpose::STANDARD as BASE64},
clap::ArgMatches,
futures::{StreamExt, TryStreamExt},
sha2::{Digest, Sha256},
std::{
collections::BTreeSet,
collections::{BTreeMap, BTreeSet},
path::{Path, PathBuf},
},
};
Expand All @@ -21,12 +23,88 @@ const UPLOAD_CONCURRENCY: usize = 4;
/// The AWS SDK uses exponential backoff with jitter between attempts.
const S3_MAX_ATTEMPTS: u32 = 5;

/// A validated SHA-256 checksum.
#[derive(Clone, Debug, Eq, PartialEq)]
struct Sha256Sum([u8; 32]);

impl Sha256Sum {
fn from_bytes(bytes: [u8; 32]) -> Self {
Self(bytes)
}

fn to_base64(&self) -> String {
BASE64.encode(self.0)
}
}

impl TryFrom<&str> for Sha256Sum {
type Error = anyhow::Error;

fn try_from(hex_digest: &str) -> Result<Self> {
let bytes = hex::decode(hex_digest)?;
let bytes: [u8; 32] = bytes
.try_into()
.map_err(|_| anyhow!("expected 32-byte sha256 digest"))?;
Ok(Self(bytes))
}
}

/// Parse a `SHA256SUMS` file into a map of filename → digest.
fn parse_sha256sums(content: &str) -> Result<BTreeMap<String, Sha256Sum>> {
let mut digests = BTreeMap::new();

for (line_no, line) in content.lines().enumerate() {
let (digest, filename) = line
.split_once(" ")
.ok_or_else(|| anyhow!("malformed SHA256SUMS line {}", line_no + 1))?;
ensure!(
!filename.is_empty(),
"missing filename on SHA256SUMS line {}",
line_no + 1
);
let digest = Sha256Sum::try_from(digest)?;
ensure!(
digests.insert(filename.to_string(), digest).is_none(),
"duplicate filename in SHA256SUMS: {filename}"
);
}

Ok(digests)
}

fn ensure_sha256sums_coverage(
wanted_filenames: &BTreeMap<String, String>,
present_filenames: &BTreeSet<String>,
sha256_digests: &BTreeMap<String, Sha256Sum>,
) -> Result<()> {
let missing = wanted_filenames
.iter()
.filter(|(source, _)| present_filenames.contains(*source))
.map(|(_, dest)| dest)
Comment on lines +82 to +83
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be filter_map?

.filter(|dest| !sha256_digests.contains_key(*dest))
.collect::<Vec<_>>();

if missing.is_empty() {
Ok(())
} else {
Err(anyhow!(
"SHA256SUMS missing {} entries; first missing artifact: {}",
missing.len(),
missing[0]
))
}
}

/// Upload a single file to S3 under `key`, setting an immutable cache-control header.
///
/// When `sha256` is provided the SHA-256 content checksum is included in the
/// PUT request so that S3 verifies data integrity on receipt.
async fn upload_s3_file(
s3: &aws_sdk_s3::Client,
bucket: &str,
key: &str,
path: &Path,
sha256: Option<&Sha256Sum>,
dry_run: bool,
) -> Result<()> {
println!(
Expand All @@ -43,13 +121,18 @@ async fn upload_s3_file(
// concurrently, so splitting each file into multipart chunks would add complexity
// without meaningfully improving throughput.
let body = ByteStream::from_path(path).await?;
s3.put_object()
let mut req = s3
.put_object()
.bucket(bucket)
.key(key)
.body(body)
.cache_control("public, max-age=31536000, immutable")
.send()
.await?;
.cache_control("public, max-age=31536000, immutable");

if let Some(digest) = sha256 {
req = req.checksum_sha256(digest.to_base64());
}
Copy link
Copy Markdown
Collaborator

@indygreg indygreg Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

File files where we don't have the checksum precomputed, we can compute it here (we have the file locally). This will prevent a file from getting corrupted during upload.

i.e. I would always send the SHA-256 to the S3 API.

Copy link
Copy Markdown
Member Author

@zanieb zanieb Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

None is only used for dry-run, I'm not sure we should bother computing it there (though I agree it'd be nice not to use an Option and have a footgun)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can add a guard that only let's it be null during dry-run, in the above branch where we early exit.


req.send().await?;
Ok(())
}

Expand Down Expand Up @@ -117,6 +200,26 @@ pub async fn command_upload_mirror_distributions(args: &ArgMatches) -> Result<()
.build();
let s3 = aws_sdk_s3::Client::from_conf(s3_config);

let shasums_path = dist_dir.join("SHA256SUMS");

// Parse SHA256SUMS (written and verified by upload-release-distributions) so
// we can supply a content checksum on every PUT. S3 will reject the upload if
// the data it receives does not match, guarding against silent corruption.
// In dry-run mode we skip reading the file entirely so the command can still be
// used to validate naming and missing-artifact handling on a fresh dist/.
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// used to validate naming and missing-artifact handling on a fresh dist/.
// used to validate naming and missing-artifact handling on a fresh dist.

let shasums_content = if dry_run {
None
} else {
Some(std::fs::read_to_string(&shasums_path)?)
};
let sha256_digests = if let Some(content) = &shasums_content {
let sha256_digests = parse_sha256sums(content)?;
ensure_sha256sums_coverage(&wanted_filenames, &filenames, &sha256_digests)?;
sha256_digests
} else {
BTreeMap::new()
};

// Upload all files concurrently (up to UPLOAD_CONCURRENCY in-flight at a time).
let upload_futs = wanted_filenames
.iter()
Expand All @@ -126,18 +229,60 @@ pub async fn command_upload_mirror_distributions(args: &ArgMatches) -> Result<()
let bucket = bucket.clone();
let key = format!("{prefix}{dest}");
let path = dist_dir.join(source);
async move { upload_s3_file(&s3, &bucket, &key, &path, dry_run).await }
let sha256 = sha256_digests.get(dest).cloned();
async move { upload_s3_file(&s3, &bucket, &key, &path, sha256.as_ref(), dry_run).await }
});

futures::stream::iter(upload_futs)
.buffer_unordered(UPLOAD_CONCURRENCY)
.try_collect::<Vec<_>>()
.await?;

// Upload the SHA256SUMS file already written (and verified) by upload-release-distributions.
let shasums_path = dist_dir.join("SHA256SUMS");
// Upload the SHA256SUMS file itself, computing its digest on the fly.
let shasums_sha256 = shasums_content.as_ref().map(|content| {
let mut hasher = Sha256::new();
hasher.update(content.as_bytes());
Sha256Sum::from_bytes(hasher.finalize().into())
});
Comment on lines +242 to +246
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sha256Sum::from_content seems like a reasonable helper

let shasums_key = format!("{prefix}SHA256SUMS");
upload_s3_file(&s3, bucket, &shasums_key, &shasums_path, dry_run).await?;
upload_s3_file(
&s3,
bucket,
&shasums_key,
&shasums_path,
shasums_sha256.as_ref(),
dry_run,
)
.await?;

Ok(())
}

#[cfg(test)]
mod tests {
use super::{Sha256Sum, ensure_sha256sums_coverage, parse_sha256sums};
use std::collections::{BTreeMap, BTreeSet};

#[test]
fn sha256_sum_rejects_non_sha256_lengths() {
assert!(Sha256Sum::try_from("abcd").is_err());
}

#[test]
fn parse_sha256sums_rejects_malformed_lines() {
assert!(parse_sha256sums("not-a-valid-line\n").is_err());
}

#[test]
fn ensure_sha256sums_coverage_requires_every_uploaded_artifact() {
let wanted_filenames =
BTreeMap::from([("source.tar.zst".to_string(), "dest.tar.zst".to_string())]);
let present_filenames = BTreeSet::from(["source.tar.zst".to_string()]);
let sha256_digests = BTreeMap::new();

assert!(
ensure_sha256sums_coverage(&wanted_filenames, &present_filenames, &sha256_digests)
.is_err()
);
}
}
8 changes: 4 additions & 4 deletions src/validation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,21 @@

use {
crate::{json::*, macho::*},
anyhow::{anyhow, Context, Result},
anyhow::{Context, Result, anyhow},
clap::ArgMatches,
normalize_path::NormalizePath,
object::{
Architecture, Endianness, FileKind, Object, SectionIndex, SymbolScope,
elf::{
FileHeader32, FileHeader64, ET_DYN, ET_EXEC, SHN_UNDEF, STB_GLOBAL, STB_WEAK,
ET_DYN, ET_EXEC, FileHeader32, FileHeader64, SHN_UNDEF, STB_GLOBAL, STB_WEAK,
STV_DEFAULT, STV_HIDDEN,
},
macho::{MachHeader32, MachHeader64, LC_CODE_SIGNATURE, MH_OBJECT, MH_TWOLEVEL},
macho::{LC_CODE_SIGNATURE, MH_OBJECT, MH_TWOLEVEL, MachHeader32, MachHeader64},
read::{
elf::{Dyn, FileHeader, SectionHeader, Sym},
macho::{LoadCommandVariant, MachHeader, Nlist, Section, Segment},
pe::{ImageNtHeaders, PeFile, PeFile32, PeFile64},
},
Architecture, Endianness, FileKind, Object, SectionIndex, SymbolScope,
},
once_cell::sync::Lazy,
std::{
Expand Down