-
-
Notifications
You must be signed in to change notification settings - Fork 282
Include sha256 sums in S3 artifact uploads #1048
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -4,12 +4,14 @@ | |||||
|
|
||||||
| use { | ||||||
| crate::release::build_wanted_filenames, | ||||||
| anyhow::{Result, anyhow}, | ||||||
| anyhow::{Result, anyhow, ensure}, | ||||||
| aws_sdk_s3::primitives::ByteStream, | ||||||
| base64::{Engine as _, engine::general_purpose::STANDARD as BASE64}, | ||||||
| clap::ArgMatches, | ||||||
| futures::{StreamExt, TryStreamExt}, | ||||||
| sha2::{Digest, Sha256}, | ||||||
| std::{ | ||||||
| collections::BTreeSet, | ||||||
| collections::{BTreeMap, BTreeSet}, | ||||||
| path::{Path, PathBuf}, | ||||||
| }, | ||||||
| }; | ||||||
|
|
@@ -21,12 +23,88 @@ const UPLOAD_CONCURRENCY: usize = 4; | |||||
| /// The AWS SDK uses exponential backoff with jitter between attempts. | ||||||
| const S3_MAX_ATTEMPTS: u32 = 5; | ||||||
|
|
||||||
| /// A validated SHA-256 checksum. | ||||||
| #[derive(Clone, Debug, Eq, PartialEq)] | ||||||
| struct Sha256Sum([u8; 32]); | ||||||
|
|
||||||
| impl Sha256Sum { | ||||||
| fn from_bytes(bytes: [u8; 32]) -> Self { | ||||||
| Self(bytes) | ||||||
| } | ||||||
|
|
||||||
| fn to_base64(&self) -> String { | ||||||
| BASE64.encode(self.0) | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| impl TryFrom<&str> for Sha256Sum { | ||||||
| type Error = anyhow::Error; | ||||||
|
|
||||||
| fn try_from(hex_digest: &str) -> Result<Self> { | ||||||
| let bytes = hex::decode(hex_digest)?; | ||||||
| let bytes: [u8; 32] = bytes | ||||||
| .try_into() | ||||||
| .map_err(|_| anyhow!("expected 32-byte sha256 digest"))?; | ||||||
| Ok(Self(bytes)) | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| /// Parse a `SHA256SUMS` file into a map of filename → digest. | ||||||
| fn parse_sha256sums(content: &str) -> Result<BTreeMap<String, Sha256Sum>> { | ||||||
| let mut digests = BTreeMap::new(); | ||||||
|
|
||||||
| for (line_no, line) in content.lines().enumerate() { | ||||||
| let (digest, filename) = line | ||||||
| .split_once(" ") | ||||||
| .ok_or_else(|| anyhow!("malformed SHA256SUMS line {}", line_no + 1))?; | ||||||
| ensure!( | ||||||
| !filename.is_empty(), | ||||||
| "missing filename on SHA256SUMS line {}", | ||||||
| line_no + 1 | ||||||
| ); | ||||||
| let digest = Sha256Sum::try_from(digest)?; | ||||||
| ensure!( | ||||||
| digests.insert(filename.to_string(), digest).is_none(), | ||||||
| "duplicate filename in SHA256SUMS: {filename}" | ||||||
| ); | ||||||
| } | ||||||
|
|
||||||
| Ok(digests) | ||||||
| } | ||||||
|
|
||||||
| fn ensure_sha256sums_coverage( | ||||||
| wanted_filenames: &BTreeMap<String, String>, | ||||||
| present_filenames: &BTreeSet<String>, | ||||||
| sha256_digests: &BTreeMap<String, Sha256Sum>, | ||||||
| ) -> Result<()> { | ||||||
| let missing = wanted_filenames | ||||||
| .iter() | ||||||
| .filter(|(source, _)| present_filenames.contains(*source)) | ||||||
| .map(|(_, dest)| dest) | ||||||
| .filter(|dest| !sha256_digests.contains_key(*dest)) | ||||||
| .collect::<Vec<_>>(); | ||||||
|
|
||||||
| if missing.is_empty() { | ||||||
| Ok(()) | ||||||
| } else { | ||||||
| Err(anyhow!( | ||||||
| "SHA256SUMS missing {} entries; first missing artifact: {}", | ||||||
| missing.len(), | ||||||
| missing[0] | ||||||
| )) | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| /// Upload a single file to S3 under `key`, setting an immutable cache-control header. | ||||||
| /// | ||||||
| /// When `sha256` is provided the SHA-256 content checksum is included in the | ||||||
| /// PUT request so that S3 verifies data integrity on receipt. | ||||||
| async fn upload_s3_file( | ||||||
| s3: &aws_sdk_s3::Client, | ||||||
| bucket: &str, | ||||||
| key: &str, | ||||||
| path: &Path, | ||||||
| sha256: Option<&Sha256Sum>, | ||||||
| dry_run: bool, | ||||||
| ) -> Result<()> { | ||||||
| println!( | ||||||
|
|
@@ -43,13 +121,18 @@ async fn upload_s3_file( | |||||
| // concurrently, so splitting each file into multipart chunks would add complexity | ||||||
| // without meaningfully improving throughput. | ||||||
| let body = ByteStream::from_path(path).await?; | ||||||
| s3.put_object() | ||||||
| let mut req = s3 | ||||||
| .put_object() | ||||||
| .bucket(bucket) | ||||||
| .key(key) | ||||||
| .body(body) | ||||||
| .cache_control("public, max-age=31536000, immutable") | ||||||
| .send() | ||||||
| .await?; | ||||||
| .cache_control("public, max-age=31536000, immutable"); | ||||||
|
|
||||||
| if let Some(digest) = sha256 { | ||||||
| req = req.checksum_sha256(digest.to_base64()); | ||||||
| } | ||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. File files where we don't have the checksum precomputed, we can compute it here (we have the file locally). This will prevent a file from getting corrupted during upload. i.e. I would always send the SHA-256 to the S3 API.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can add a guard that only let's it be null during dry-run, in the above branch where we early exit. |
||||||
|
|
||||||
| req.send().await?; | ||||||
| Ok(()) | ||||||
| } | ||||||
|
|
||||||
|
|
@@ -117,6 +200,26 @@ pub async fn command_upload_mirror_distributions(args: &ArgMatches) -> Result<() | |||||
| .build(); | ||||||
| let s3 = aws_sdk_s3::Client::from_conf(s3_config); | ||||||
|
|
||||||
| let shasums_path = dist_dir.join("SHA256SUMS"); | ||||||
|
|
||||||
| // Parse SHA256SUMS (written and verified by upload-release-distributions) so | ||||||
| // we can supply a content checksum on every PUT. S3 will reject the upload if | ||||||
| // the data it receives does not match, guarding against silent corruption. | ||||||
| // In dry-run mode we skip reading the file entirely so the command can still be | ||||||
| // used to validate naming and missing-artifact handling on a fresh dist/. | ||||||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| let shasums_content = if dry_run { | ||||||
| None | ||||||
| } else { | ||||||
| Some(std::fs::read_to_string(&shasums_path)?) | ||||||
| }; | ||||||
| let sha256_digests = if let Some(content) = &shasums_content { | ||||||
| let sha256_digests = parse_sha256sums(content)?; | ||||||
| ensure_sha256sums_coverage(&wanted_filenames, &filenames, &sha256_digests)?; | ||||||
| sha256_digests | ||||||
| } else { | ||||||
| BTreeMap::new() | ||||||
| }; | ||||||
|
|
||||||
| // Upload all files concurrently (up to UPLOAD_CONCURRENCY in-flight at a time). | ||||||
| let upload_futs = wanted_filenames | ||||||
| .iter() | ||||||
|
|
@@ -126,18 +229,60 @@ pub async fn command_upload_mirror_distributions(args: &ArgMatches) -> Result<() | |||||
| let bucket = bucket.clone(); | ||||||
| let key = format!("{prefix}{dest}"); | ||||||
| let path = dist_dir.join(source); | ||||||
| async move { upload_s3_file(&s3, &bucket, &key, &path, dry_run).await } | ||||||
| let sha256 = sha256_digests.get(dest).cloned(); | ||||||
| async move { upload_s3_file(&s3, &bucket, &key, &path, sha256.as_ref(), dry_run).await } | ||||||
| }); | ||||||
|
|
||||||
| futures::stream::iter(upload_futs) | ||||||
| .buffer_unordered(UPLOAD_CONCURRENCY) | ||||||
| .try_collect::<Vec<_>>() | ||||||
| .await?; | ||||||
|
|
||||||
| // Upload the SHA256SUMS file already written (and verified) by upload-release-distributions. | ||||||
| let shasums_path = dist_dir.join("SHA256SUMS"); | ||||||
| // Upload the SHA256SUMS file itself, computing its digest on the fly. | ||||||
| let shasums_sha256 = shasums_content.as_ref().map(|content| { | ||||||
| let mut hasher = Sha256::new(); | ||||||
| hasher.update(content.as_bytes()); | ||||||
| Sha256Sum::from_bytes(hasher.finalize().into()) | ||||||
| }); | ||||||
|
Comment on lines
+242
to
+246
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||
| let shasums_key = format!("{prefix}SHA256SUMS"); | ||||||
| upload_s3_file(&s3, bucket, &shasums_key, &shasums_path, dry_run).await?; | ||||||
| upload_s3_file( | ||||||
| &s3, | ||||||
| bucket, | ||||||
| &shasums_key, | ||||||
| &shasums_path, | ||||||
| shasums_sha256.as_ref(), | ||||||
| dry_run, | ||||||
| ) | ||||||
| .await?; | ||||||
|
|
||||||
| Ok(()) | ||||||
| } | ||||||
|
|
||||||
| #[cfg(test)] | ||||||
| mod tests { | ||||||
| use super::{Sha256Sum, ensure_sha256sums_coverage, parse_sha256sums}; | ||||||
| use std::collections::{BTreeMap, BTreeSet}; | ||||||
|
|
||||||
| #[test] | ||||||
| fn sha256_sum_rejects_non_sha256_lengths() { | ||||||
| assert!(Sha256Sum::try_from("abcd").is_err()); | ||||||
| } | ||||||
|
|
||||||
| #[test] | ||||||
| fn parse_sha256sums_rejects_malformed_lines() { | ||||||
| assert!(parse_sha256sums("not-a-valid-line\n").is_err()); | ||||||
| } | ||||||
|
|
||||||
| #[test] | ||||||
| fn ensure_sha256sums_coverage_requires_every_uploaded_artifact() { | ||||||
| let wanted_filenames = | ||||||
| BTreeMap::from([("source.tar.zst".to_string(), "dest.tar.zst".to_string())]); | ||||||
| let present_filenames = BTreeSet::from(["source.tar.zst".to_string()]); | ||||||
| let sha256_digests = BTreeMap::new(); | ||||||
|
|
||||||
| assert!( | ||||||
| ensure_sha256sums_coverage(&wanted_filenames, &present_filenames, &sha256_digests) | ||||||
| .is_err() | ||||||
| ); | ||||||
| } | ||||||
| } | ||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be
filter_map?